From 3a969af1ff03d0cfc97d33ac26557c86fa7bb569 Mon Sep 17 00:00:00 2001 From: Musa AbdulKareem Date: Wed, 31 Dec 2025 21:02:14 +0100 Subject: [PATCH 01/11] feat(ere-sp1): add SP1 cluster proving support Add support for distributed proof generation using SP1 Cluster infrastructure. Changes: - Add ClusterProverConfig and Cluster variant to ProverResourceType - Implement SP1ClusterClient with gRPC API and Redis artifact storage - Support compressed artifacts with zstd and chunked downloads - Add exponential backoff polling for proof completion - Integrate cluster proving flow into EreSP1 Configuration: - SP1_CLUSTER_ENDPOINT: gRPC endpoint (default: http://172.17.0.1:50051/) - SP1_CLUSTER_REDIS_URL: Redis URL for artifacts --- Cargo.lock | 119 +++++++- Cargo.toml | 4 + crates/zkvm-interface/Cargo.toml | 2 +- crates/zkvm-interface/src/zkvm.rs | 2 +- crates/zkvm-interface/src/zkvm/resource.rs | 54 ++++ crates/zkvm/sp1/Cargo.toml | 11 + crates/zkvm/sp1/build.rs | 7 + crates/zkvm/sp1/proto/cluster.proto | 122 ++++++++ crates/zkvm/sp1/src/zkvm.rs | 96 +++++- crates/zkvm/sp1/src/zkvm/cluster.rs | 340 +++++++++++++++++++++ crates/zkvm/sp1/src/zkvm/error.rs | 30 ++ crates/zkvm/sp1/src/zkvm/sdk.rs | 85 +++++- 12 files changed, 851 insertions(+), 21 deletions(-) create mode 100644 crates/zkvm/sp1/proto/cluster.proto create mode 100644 crates/zkvm/sp1/src/zkvm/cluster.rs diff --git a/Cargo.lock b/Cargo.lock index 98f98c9d..1c2252d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -671,6 +671,15 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arc-swap" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" +dependencies = [ + "rustversion", +] + [[package]] name = "ark-bn254" version = "0.5.0" @@ -2557,6 +2566,20 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", + "tokio-util", +] + [[package]] name = "common" version = "0.2.0" @@ -2786,6 +2809,12 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc16" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "338089f42c427b86394a5ee60ff321da23a5c89c9d89514c829687b26359fcff" + [[package]] name = "crc32fast" version = "1.4.2" @@ -4123,16 +4152,24 @@ name = "ere-sp1" version = "0.0.16" dependencies = [ "anyhow", + "bincode 1.3.3", "bincode 2.0.1", "ere-build-utils", "ere-compile-utils", "ere-test-utils", "ere-zkvm-interface", + "prost 0.13.5", + "redis", "serde", "sp1-sdk", "tempfile", "thiserror 2.0.12", + "tokio", + "tonic 0.12.3", + "tonic-build 0.12.3", "tracing", + "ulid", + "zstd 0.13.3", ] [[package]] @@ -11348,6 +11385,34 @@ dependencies = [ "rayon", ] +[[package]] +name = "redis" +version = "0.27.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09d8f99a4090c89cc489a94833c901ead69bfbf3877b4867d5482e321ee875bc" +dependencies = [ + "arc-swap", + "async-trait", + "bytes", + "combine", + "crc16", + "futures", + "futures-util", + "itertools 0.13.0", + "itoa", + "log", + "num-bigint 0.4.6", + "percent-encoding", + "pin-project-lite", + "rand 0.8.5", + "ryu", + "sha1_smol", + "socket2 0.5.9", + "tokio", + "tokio-util", + "url", +] + [[package]] name = "redox_syscall" version = "0.5.12" @@ -12914,6 +12979,12 @@ dependencies = [ "digest 0.10.7", ] +[[package]] +name = "sha1_smol" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" + [[package]] name = "sha2" version = "0.10.9" @@ -14589,6 +14660,20 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "tonic-build" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" +dependencies = [ + "prettyplease 0.2.32", + "proc-macro2", + "prost-build 0.13.5", + "prost-types 0.13.5", + "quote", + "syn 2.0.101", +] + [[package]] name = "tower" version = "0.4.13" @@ -15074,6 +15159,16 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "ulid" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "470dbf6591da1b39d43c14523b2b469c86879a53e8b758c8e090a470fe7b1fbe" +dependencies = [ + "rand 0.9.2", + "web-time", +] + [[package]] name = "unarray" version = "0.1.4" @@ -16270,7 +16365,7 @@ dependencies = [ "pbkdf2 0.11.0", "sha1", "time", - "zstd", + "zstd 0.11.2+zstd.1.5.2", ] [[package]] @@ -16753,7 +16848,7 @@ dependencies = [ "thiserror 1.0.69", "tokio", "tonic 0.8.3", - "tonic-build", + "tonic-build 0.8.4", "tracing", "twirp-rs", "uuid 1.17.0", @@ -16848,7 +16943,16 @@ version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ - "zstd-safe", + "zstd-safe 5.0.2+zstd.1.5.2", +] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe 7.2.4", ] [[package]] @@ -16861,6 +16965,15 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + [[package]] name = "zstd-sys" version = "2.0.15+zstd.1.5.7" diff --git a/Cargo.toml b/Cargo.toml index 7e83cdaf..1e971c2a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,7 @@ postcard = { version = "1.0.8", default-features = false } prost = "0.13" prost-build = "0.13" rand = "0.9.2" +redis = "0.27" rkyv = { version = "0.8.12", default-features = false } serde = { version = "1.0.219", default-features = false } serde_bytes = { version = "0.11.19", default-features = false } @@ -74,13 +75,16 @@ strum = "0.27.2" tempfile = "3.20.0" thiserror = "2.0.12" tokio = "1.0" +tonic = "0.12" toml = "0.8.23" tower-http = "0.6.6" tracing = "0.1.41" tracing-subscriber = "0.3.19" twirp = "0.9.1" twirp-build = "0.9.0" +ulid = "1.1" wait-timeout = "0.2.1" +zstd = "0.13" # Airbender dependencies airbender_execution_utils = { git = "https://github.com/matter-labs/zksync-airbender", package = "execution_utils", tag = "v0.5.2" } diff --git a/crates/zkvm-interface/Cargo.toml b/crates/zkvm-interface/Cargo.toml index 884d6b4d..c623ba50 100644 --- a/crates/zkvm-interface/Cargo.toml +++ b/crates/zkvm-interface/Cargo.toml @@ -15,7 +15,7 @@ strum = { workspace = true, features = ["derive"] } thiserror.workspace = true # Optional dependencies -clap = { workspace = true, features = ["derive"], optional = true } +clap = { workspace = true, features = ["derive", "env"], optional = true } [dev-dependencies] bincode = { workspace = true, features = ["alloc", "serde"] } diff --git a/crates/zkvm-interface/src/zkvm.rs b/crates/zkvm-interface/src/zkvm.rs index 0b48e33b..11a43817 100644 --- a/crates/zkvm-interface/src/zkvm.rs +++ b/crates/zkvm-interface/src/zkvm.rs @@ -11,7 +11,7 @@ mod resource; pub use error::CommonError; pub use proof::{Proof, ProofKind}; pub use report::{ProgramExecutionReport, ProgramProvingReport}; -pub use resource::{NetworkProverConfig, ProverResourceType}; +pub use resource::{ClusterProverConfig, NetworkProverConfig, ProverResourceType}; /// Input for the prover to execute/prove a guest program. #[derive(Clone, Debug, Default)] diff --git a/crates/zkvm-interface/src/zkvm/resource.rs b/crates/zkvm-interface/src/zkvm/resource.rs index 672eaff5..72258fc2 100644 --- a/crates/zkvm-interface/src/zkvm/resource.rs +++ b/crates/zkvm-interface/src/zkvm/resource.rs @@ -24,6 +24,55 @@ impl NetworkProverConfig { } } +/// Configuration for cluster-based proving (e.g., SP1 Cluster) +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +#[cfg_attr(feature = "clap", derive(clap::Args))] +pub struct ClusterProverConfig { + #[cfg_attr( + feature = "clap", + arg( + long, + env = "SP1_CLUSTER_ENDPOINT", + default_value = "http://172.17.0.1:50051/" + ) + )] + /// The gRPC endpoint URL of the cluster API service (e.g., http://172.17.0.1:50051) + pub endpoint: String, + + #[cfg_attr( + feature = "clap", + arg( + long, + env = "SP1_CLUSTER_REDIS_URL", + default_value = "redis://:redispassword@172.17.0.1:6379/0" + ) + )] + /// Redis URL for artifact storage (e.g., redis://:password@172.17.0.1:6379/0) + pub redis_url: String, +} + +impl Default for ClusterProverConfig { + fn default() -> Self { + Self { + endpoint: std::env::var("SP1_CLUSTER_ENDPOINT") + .unwrap_or("http://172.17.0.1:50051/".to_string()), + redis_url: std::env::var("SP1_CLUSTER_REDIS_URL") + .unwrap_or("redis://:redispassword@172.17.0.1:6379/0".to_string()), + } + } +} + +#[cfg(feature = "clap")] +impl ClusterProverConfig { + pub fn to_args(&self) -> Vec<&str> { + core::iter::once(["--endpoint", self.endpoint.as_str()]) + .chain(core::iter::once(["--redis-url", self.redis_url.as_str()])) + .flatten() + .collect() + } +} + /// ResourceType specifies what resource will be used to create the proofs. #[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] @@ -34,6 +83,8 @@ pub enum ProverResourceType { Gpu, /// Use a remote prover network Network(NetworkProverConfig), + /// Use a multi-GPU cluster (e.g., SP1 Cluster) + Cluster(ClusterProverConfig), } #[cfg(feature = "clap")] @@ -45,6 +96,9 @@ impl ProverResourceType { Self::Network(config) => core::iter::once("network") .chain(config.to_args()) .collect(), + Self::Cluster(config) => core::iter::once("cluster") + .chain(config.to_args()) + .collect(), } } } diff --git a/crates/zkvm/sp1/Cargo.toml b/crates/zkvm/sp1/Cargo.toml index a54760e7..ea1c567d 100644 --- a/crates/zkvm/sp1/Cargo.toml +++ b/crates/zkvm/sp1/Cargo.toml @@ -20,11 +20,22 @@ sp1-sdk = { workspace = true, optional = true } ere-compile-utils = { workspace = true, optional = true } ere-zkvm-interface.workspace = true +# Cluster dependencies +prost.workspace = true +redis = { workspace = true, features = ["tokio-comp", "cluster-async"] } +tokio = { workspace = true, features = ["rt-multi-thread", "macros", "time"] } +tonic.workspace = true +ulid.workspace = true +zstd.workspace = true +# bincode 1.x for deserializing proofs from SP1 Cluster (cluster uses bincode 1.x) +bincode1 = { package = "bincode", version = "1.3" } + [dev-dependencies] ere-test-utils = { workspace = true, features = ["host"] } [build-dependencies] ere-build-utils.workspace = true +tonic-build = "0.12" [features] default = ["compiler", "zkvm"] diff --git a/crates/zkvm/sp1/build.rs b/crates/zkvm/sp1/build.rs index f3b5e662..bc88045c 100644 --- a/crates/zkvm/sp1/build.rs +++ b/crates/zkvm/sp1/build.rs @@ -2,4 +2,11 @@ use ere_build_utils::detect_and_generate_name_and_sdk_version; fn main() { detect_and_generate_name_and_sdk_version("sp1", "sp1-sdk"); + + // Compile cluster proto + let proto_dir = std::path::Path::new("proto"); + tonic_build::configure() + .build_server(false) + .compile_protos(&[proto_dir.join("cluster.proto")], &[proto_dir]) + .expect("Failed to compile cluster proto"); } diff --git a/crates/zkvm/sp1/proto/cluster.proto b/crates/zkvm/sp1/proto/cluster.proto new file mode 100644 index 00000000..32278892 --- /dev/null +++ b/crates/zkvm/sp1/proto/cluster.proto @@ -0,0 +1,122 @@ +// The RPC for interacting with the Prover Cluster. +syntax = "proto3"; + +package cluster; + +service ClusterService { + rpc ProofRequestCreate(ProofRequestCreateRequest) returns (Empty) {} + rpc ProofRequestCancel(ProofRequestCancelRequest) returns (Empty) {} + rpc ProofRequestUpdate(ProofRequestUpdateRequest) returns (Empty) {} + rpc ProofRequestGet(ProofRequestGetRequest) returns (ProofRequestGetResponse) {} + rpc ProofRequestList(ProofRequestListRequest) returns (ProofRequestListResponse) {} + + rpc Healthcheck(Empty) returns (Empty); +} + +message Empty {} + +message ProofRequestCreateRequest { + string proof_id = 1; + string program_artifact_id = 2; + string stdin_artifact_id = 3; + optional string options_artifact_id = 4; + optional string proof_artifact_id = 5; + bytes requester = 6; + uint64 deadline = 7; + uint64 cycle_limit = 8; + uint64 gas_limit = 9; +} + +message ProofRequestCancelRequest { + string proof_id = 1; +} + +message ProofRequest { + string id = 1; + ProofRequestStatus proof_status = 2; + bytes requester = 3; + ExecutionResult execution_result = 4; + string stdin_artifact_id = 5; + string program_artifact_id = 6; + optional string proof_artifact_id = 7; + optional string options_artifact_id = 8; + optional uint64 cycle_limit = 9; + optional uint64 gas_limit = 10; + uint64 deadline = 11; + bool handled = 12; + string metadata = 13; + uint64 created_at = 14; + uint64 updated_at = 15; +} + +message ProofRequestListRequest { + repeated ProofRequestStatus proof_status = 1; + repeated ExecutionStatus execution_status = 2; + optional uint64 minimum_deadline = 3; + optional bool handled = 4; + optional uint32 limit = 5; + optional uint32 offset = 6; +} + +message ProofRequestListResponse { + repeated ProofRequest proof_requests = 1; +} + +enum ProofRequestStatus { + PROOF_REQUEST_STATUS_UNSPECIFIED = 0; + PROOF_REQUEST_STATUS_PENDING = 1; + PROOF_REQUEST_STATUS_COMPLETED = 2; + PROOF_REQUEST_STATUS_FAILED = 3; + PROOF_REQUEST_STATUS_CANCELLED = 4; +} + +// The possible statuses for the execution of the program. +enum ExecutionStatus { + EXECUTION_STATUS_UNSPECIFIED = 0; + EXECUTION_STATUS_UNEXECUTED = 1; + EXECUTION_STATUS_EXECUTED = 2; + EXECUTION_STATUS_FAILED = 3; + EXECUTION_STATUS_CANCELLED = 4; +} + +// The specific reasons why an execution might fail. +enum ExecutionFailureCause { + EXECUTION_FAILURE_CAUSE_UNSPECIFIED = 0; + EXECUTION_FAILURE_CAUSE_HALT_WITH_NON_ZERO_EXIT_CODE = 1; + EXECUTION_FAILURE_CAUSE_INVALID_MEMORY_ACCESS = 2; + EXECUTION_FAILURE_CAUSE_UNSUPPORTED_SYSCALL = 3; + EXECUTION_FAILURE_CAUSE_BREAKPOINT = 4; + EXECUTION_FAILURE_CAUSE_EXCEEDED_CYCLE_LIMIT = 5; + EXECUTION_FAILURE_CAUSE_INVALID_SYSCALL_USAGE = 6; + EXECUTION_FAILURE_CAUSE_UNIMPLEMENTED = 7; + EXECUTION_FAILURE_CAUSE_END_IN_UNCONSTRAINED = 8; +} + +message ExecutionResult { + ExecutionStatus status = 1; + ExecutionFailureCause failure_cause = 2; + uint64 cycles = 3; + uint64 gas = 4; + bytes public_values_hash = 5; +} + +// The request to get a specific proof request. +message ProofRequestGetRequest { + string proof_id = 1; +} + +// The response containing the details of a single proof request. +message ProofRequestGetResponse { + ProofRequest proof_request = 1; +} + +// The request to update a proof request. +message ProofRequestUpdateRequest { + string proof_id = 1; + optional ProofRequestStatus proof_status = 2; + optional ExecutionResult execution_result = 3; + optional uint64 deadline = 4; + optional bool handled = 5; + optional string metadata = 6; +} + diff --git a/crates/zkvm/sp1/src/zkvm.rs b/crates/zkvm/sp1/src/zkvm.rs index 2a3c855e..a9f14c3b 100644 --- a/crates/zkvm/sp1/src/zkvm.rs +++ b/crates/zkvm/sp1/src/zkvm.rs @@ -13,11 +13,22 @@ use std::{ }; use tracing::info; +pub mod cluster; mod error; mod sdk; +pub use cluster::SP1ClusterClient; pub use error::Error; +/// SP1 Network/Cluster proof mode constants +/// These match sp1_sdk::network::proto::types::ProofMode enum values +pub mod proof_mode { + pub const CORE: i32 = 1; + pub const COMPRESSED: i32 = 2; + pub const PLONK: i32 = 3; + pub const GROTH16: i32 = 4; +} + include!(concat!(env!("OUT_DIR"), "/name_and_sdk_version.rs")); pub struct EreSP1 { @@ -40,7 +51,7 @@ pub struct EreSP1 { impl EreSP1 { pub fn new(program: SP1Program, resource: ProverResourceType) -> Result { - let prover = Prover::new(&resource); + let prover = Prover::new(&resource)?; let (pk, vk) = prover.setup(&program.elf); Ok(Self { program, @@ -58,6 +69,61 @@ impl EreSP1 { fn prover_mut(&'_ self) -> Result, Error> { self.prover.write().map_err(|_| Error::RwLockPosioned) } + + /// Prove via the cluster + fn prove_via_cluster( + &self, + input: &Input, + proof_kind: ProofKind, + ) -> anyhow::Result<(PublicValues, Proof, ProgramProvingReport)> { + use sp1_sdk::proof::ProofFromNetwork; + + info!("Generating {:?} proof via SP1 Cluster...", proof_kind,); + + let mode = match proof_kind { + ProofKind::Compressed => proof_mode::COMPRESSED, + ProofKind::Groth16 => proof_mode::GROTH16, + }; + + // Serialize stdin in SP1 format using bincode 1.x (must match sp1-cluster's bincode version) + let mut stdin = SP1Stdin::new(); + stdin.write_slice(input.stdin()); + let stdin_bytes = bincode1::serialize(&stdin) + .map_err(|e| CommonError::serialize("stdin", "bincode1", e))?; + + // Use the prover's cluster proving method + let prover = self.prover()?; + let result = prover.prove_cluster(self.program.elf(), &stdin_bytes, mode)?; + + // The proof from cluster is serialized ProofFromNetwork using bincode 1.x + let proof_from_network: ProofFromNetwork = bincode1::deserialize(&result.proof) + .map_err(|err| CommonError::deserialize("proof", "bincode1", err))?; + + info!( + "Received proof from cluster: sp1_version={}, proof_type={:?}", + proof_from_network.sp1_version, + SP1ProofMode::from(&proof_from_network.proof) + ); + + let public_values = proof_from_network.public_values.as_slice().to_vec(); + + // Re-serialize as SP1ProofWithPublicValues for storage (using bincode 2.x for ere compatibility) + let sp1_proof = SP1ProofWithPublicValues { + proof: proof_from_network.proof, + public_values: proof_from_network.public_values, + sp1_version: proof_from_network.sp1_version, + tee_proof: None, + }; + let proof_bytes = bincode::serde::encode_to_vec(&sp1_proof, bincode::config::legacy()) + .map_err(|e| CommonError::serialize("proof", "bincode", e))?; + let proof = Proof::new(proof_kind, proof_bytes); + + Ok(( + public_values, + proof, + ProgramProvingReport::new(result.proving_time), + )) + } } impl zkVM for EreSP1 { @@ -87,6 +153,11 @@ impl zkVM for EreSP1 { ) -> anyhow::Result<(PublicValues, Proof, ProgramProvingReport)> { info!("Generating proof…"); + // Handle cluster proving separately + if self.prover()?.is_cluster() { + return self.prove_via_cluster(input, proof_kind); + } + let stdin = input_to_stdin(input)?; let mode = match proof_kind { @@ -104,7 +175,8 @@ impl zkVM for EreSP1 { // Note that `take` has to be done explicitly first so the // Moongate container could be removed properly. take(&mut *prover); - *prover = Prover::new(&self.resource); + *prover = Prover::new(&self.resource) + .expect("Failed to recreate prover after panic recovery"); } Error::Panic(panic_msg(err)) @@ -274,4 +346,24 @@ mod tests { let test_case = BasicProgram::::valid_test_case(); run_zkvm_prove(&zkvm, &test_case); } + + #[test] + #[ignore = "Requires SP1_CLUSTER_ENDPOINT environment variable to be set"] + fn test_prove_sp1_cluster() { + use ere_zkvm_interface::zkvm::ClusterProverConfig; + + // Check if we have the required environment variable + if std::env::var("SP1_CLUSTER_ENDPOINT").is_err() { + eprintln!("Skipping cluster test: SP1_CLUSTER_ENDPOINT not set"); + return; + } + + // Create a cluster prover configuration + let cluster_config = ClusterProverConfig::default(); + let program = basic_program(); + let zkvm = EreSP1::new(program, ProverResourceType::Cluster(cluster_config)).unwrap(); + + let test_case = BasicProgram::::valid_test_case(); + run_zkvm_prove(&zkvm, &test_case); + } } diff --git a/crates/zkvm/sp1/src/zkvm/cluster.rs b/crates/zkvm/sp1/src/zkvm/cluster.rs new file mode 100644 index 00000000..597422fc --- /dev/null +++ b/crates/zkvm/sp1/src/zkvm/cluster.rs @@ -0,0 +1,340 @@ +//! gRPC client for SP1 Cluster API with Redis artifact storage. + +use crate::zkvm::Error; +use redis::AsyncCommands; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use tokio::time::sleep; +use tonic::transport::Channel; +use tracing::{debug, info}; +use ulid::Ulid; + +// Include the generated protobuf code +pub mod cluster_proto { + tonic::include_proto!("cluster"); +} + +use cluster_proto::{ + ExecutionFailureCause, ExecutionStatus, ProofRequestCreateRequest, ProofRequestGetRequest, + ProofRequestStatus, cluster_service_client::ClusterServiceClient, +}; + +/// Default timeout for proof generation (4 hours) +const DEFAULT_TIMEOUT_SECS: u64 = 4 * 60 * 60; + +/// Polling interval configuration for exponential backoff +const POLL_INITIAL_INTERVAL_MS: u64 = 500; +const POLL_MAX_INTERVAL_MS: u64 = 10_000; +const POLL_BACKOFF_MULTIPLIER: f64 = 1.5; + +/// SP1 Cluster client that uses gRPC API and Redis for artifact storage +pub struct SP1ClusterClient { + grpc_endpoint: String, + redis_url: String, +} + +impl SP1ClusterClient { + /// Creates a new SP1 Cluster client + pub fn new(grpc_endpoint: &str, redis_url: &str) -> Result { + if grpc_endpoint.is_empty() { + return Err(Error::EndpointNotConfigured); + } + if redis_url.is_empty() { + return Err(Error::RedisNotConfigured); + } + + info!( + "Created SP1 Cluster client: grpc={}, redis={}", + grpc_endpoint, redis_url + ); + + Ok(Self { + grpc_endpoint: grpc_endpoint.to_string(), + redis_url: redis_url.to_string(), + }) + } + + /// Synchronous wrapper for prove that creates a runtime on-demand + pub fn prove_sync(&self, elf: &[u8], stdin: &[u8], mode: i32) -> Result { + let runtime = tokio::runtime::Runtime::new() + .map_err(|e| Error::ClusterProve(format!("Failed to create tokio runtime: {}", e)))?; + let result = runtime.block_on(self.prove(elf, stdin, mode)); + + // Drop the runtime in a separate thread to avoid panic when + // called from within an async context (e.g., inside another block_on) + std::thread::spawn(move || drop(runtime)); + + result + } + + /// Connect to the gRPC service + async fn connect_grpc(&self) -> Result, Error> { + ClusterServiceClient::connect(self.grpc_endpoint.clone()) + .await + .map_err(|e| Error::GrpcConnect(e.to_string())) + } + + /// Connect to Redis + async fn connect_redis(&self) -> Result { + let client = redis::Client::open(self.redis_url.as_str()) + .map_err(|e| Error::Redis(e.to_string()))?; + client + .get_multiplexed_async_connection() + .await + .map_err(|e| Error::Redis(e.to_string())) + } + + /// Generate a unique artifact ID + fn create_artifact_id(&self) -> String { + format!("artifact_{}", Ulid::new()) + } + + /// Upload an artifact to Redis with zstd compression + async fn upload_artifact( + &self, + conn: &mut redis::aio::MultiplexedConnection, + artifact_id: &str, + data: &[u8], + ) -> Result<(), Error> { + // Compress with zstd (level 0 for fast compression) + let compressed = zstd::encode_all(data, 0) + .map_err(|e| Error::Redis(format!("Failed to compress artifact: {}", e)))?; + + // Store with just the artifact_id as key (as SP1 Cluster expects) + conn.set::<_, _, ()>(artifact_id, &compressed) + .await + .map_err(|e| Error::Redis(e.to_string()))?; + + debug!( + "Uploaded artifact {} ({} bytes -> {} bytes compressed)", + artifact_id, + data.len(), + compressed.len() + ); + Ok(()) + } + + /// Download an artifact from Redis with zstd decompression + /// Handles both simple keys and chunked storage (for large artifacts) + async fn download_artifact( + &self, + conn: &mut redis::aio::MultiplexedConnection, + artifact_id: &str, + ) -> Result, Error> { + // Check if artifact is stored in chunks + let chunks_key = format!("{}:chunks", artifact_id); + let total_chunks: usize = conn + .hlen(&chunks_key) + .await + .map_err(|e| Error::Redis(e.to_string()))?; + + let compressed: Vec = if total_chunks == 0 { + // Simple key storage + conn.get(artifact_id) + .await + .map_err(|e| Error::Redis(e.to_string()))? + } else { + // Chunked storage - download all chunks and combine + let mut chunks: Vec> = Vec::with_capacity(total_chunks); + for i in 0..total_chunks { + let chunk: Vec = conn + .hget(&chunks_key, i) + .await + .map_err(|e| Error::Redis(format!("Failed to get chunk {}: {}", i, e)))?; + chunks.push(chunk); + } + chunks.into_iter().flatten().collect() + }; + + // Decompress with zstd + let data = zstd::decode_all(compressed.as_slice()) + .map_err(|e| Error::Redis(format!("Failed to decompress artifact: {}", e)))?; + + debug!( + "Downloaded artifact {} ({} bytes compressed -> {} bytes, chunks: {})", + artifact_id, + compressed.len(), + data.len(), + total_chunks + ); + Ok(data) + } + + /// Delete artifacts from Redis to clean up after proving + async fn cleanup_artifacts( + &self, + conn: &mut redis::aio::MultiplexedConnection, + artifact_ids: &[&str], + ) { + for artifact_id in artifact_ids { + // Try to delete both the simple key and chunks key + let chunks_key = format!("{}:chunks", artifact_id); + let _: Result<(), _> = conn.del::<_, ()>(*artifact_id).await; + let _: Result<(), _> = conn.del::<_, ()>(&chunks_key).await; + } + debug!("Cleaned up {} artifacts from Redis", artifact_ids.len()); + } + + /// Submit a proof request and wait for completion + pub async fn prove(&self, elf: &[u8], stdin: &[u8], mode: i32) -> Result { + let mut grpc = self.connect_grpc().await?; + let mut redis = self.connect_redis().await?; + + // Upload artifacts + let program_id = self.create_artifact_id(); + let stdin_id = self.create_artifact_id(); + let proof_id = self.create_artifact_id(); + + // Program needs to be bincode serialized (wrapping the ELF bytes) + // Use bincode 1.x to match sp1-cluster's bincode version + let program_serialized = bincode1::serialize(&elf.to_vec()) + .map_err(|e| Error::Redis(format!("Failed to serialize program: {}", e)))?; + + self.upload_artifact(&mut redis, &program_id, &program_serialized) + .await?; + + // Stdin is uploaded as-is (already serialized by caller) + self.upload_artifact(&mut redis, &stdin_id, stdin).await?; + + // Create proof request with validated ID + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_millis(); + let request_id = format!("ere_{}", timestamp); + + let deadline = SystemTime::now() + Duration::from_secs(DEFAULT_TIMEOUT_SECS); + + info!("Submitting proof request: {}", request_id); + + grpc.proof_request_create(ProofRequestCreateRequest { + proof_id: request_id.clone(), + program_artifact_id: program_id.clone(), + stdin_artifact_id: stdin_id.clone(), + options_artifact_id: Some(mode.to_string()), + proof_artifact_id: Some(proof_id.clone()), + requester: vec![], + deadline: deadline.duration_since(UNIX_EPOCH).unwrap().as_secs(), + cycle_limit: 0, + gas_limit: 0, + }) + .await + .map_err(|e| Error::GrpcRequest(e.to_string()))?; + + // Poll for completion with exponential backoff + let start = std::time::Instant::now(); + let mut poll_interval_ms = POLL_INITIAL_INTERVAL_MS; + loop { + if SystemTime::now() > deadline { + // Cleanup uploaded artifacts before returning error + self.cleanup_artifacts(&mut redis, &[&program_id, &stdin_id]) + .await; + return Err(Error::ProveTimeout(DEFAULT_TIMEOUT_SECS)); + } + + let resp = grpc + .proof_request_get(ProofRequestGetRequest { + proof_id: request_id.clone(), + }) + .await + .map_err(|e| Error::GrpcRequest(e.to_string()))?; + + if let Some(proof_request) = resp.into_inner().proof_request { + let status = + ProofRequestStatus::try_from(proof_request.proof_status).unwrap_or_default(); + + match status { + ProofRequestStatus::Completed => { + info!("Proof completed in {:?}", start.elapsed()); + + // Get the actual proof artifact ID from the response + let actual_proof_id = + proof_request.proof_artifact_id.as_ref().ok_or_else(|| { + Error::ClusterProve("No proof artifact ID in response".to_string()) + })?; + + // Download proof + let proof_data = + self.download_artifact(&mut redis, actual_proof_id).await?; + + // Cleanup uploaded artifacts (program, stdin) + // Note: We don't clean up the proof artifact as it might be needed by the cluster + self.cleanup_artifacts(&mut redis, &[&program_id, &stdin_id]) + .await; + + // Get execution result + let (cycles, public_values_hash) = + if let Some(exec) = proof_request.execution_result { + (exec.cycles, exec.public_values_hash) + } else { + (0, vec![]) + }; + + return Ok(ProveResult { + proof: proof_data, + cycles, + public_values_hash, + proving_time: start.elapsed(), + }); + } + ProofRequestStatus::Failed | ProofRequestStatus::Cancelled => { + // Use proto enum for status display + let status_str = status.as_str_name(); + + let elapsed = start.elapsed(); + + // Build error message + let mut error_msg = format!( + "Proof request {} (status={}) after {:?}", + request_id, status_str, elapsed + ); + + // Add execution details + if let Some(exec) = &proof_request.execution_result { + let exec_status = ExecutionStatus::try_from(exec.status) + .unwrap_or(ExecutionStatus::Unspecified); + let failure_cause = ExecutionFailureCause::try_from(exec.failure_cause) + .unwrap_or(ExecutionFailureCause::Unspecified); + error_msg.push_str(&format!( + " - Execution: status={}, failure_cause={}, cycles={}, gas={}", + exec_status.as_str_name(), + failure_cause.as_str_name(), + exec.cycles, + exec.gas + )); + } else { + error_msg.push_str( + " - Execution: no execution_result (execution may not have started)", + ); + } + + // Add metadata if available + if !proof_request.metadata.is_empty() { + error_msg.push_str(&format!(" - metadata: {}", proof_request.metadata)); + } + + // Cleanup uploaded artifacts before returning error + self.cleanup_artifacts(&mut redis, &[&program_id, &stdin_id]) + .await; + + return Err(Error::ClusterProve(error_msg)); + } + _ => {} + } + } + + // Exponential backoff: increase interval up to max + sleep(Duration::from_millis(poll_interval_ms)).await; + poll_interval_ms = ((poll_interval_ms as f64 * POLL_BACKOFF_MULTIPLIER) as u64) + .min(POLL_MAX_INTERVAL_MS); + } + } +} + +/// Result from a prove operation +#[derive(Debug)] +pub struct ProveResult { + pub proof: Vec, + pub cycles: u64, + pub public_values_hash: Vec, + pub proving_time: Duration, +} diff --git a/crates/zkvm/sp1/src/zkvm/error.rs b/crates/zkvm/sp1/src/zkvm/error.rs index 18017eba..48103a44 100644 --- a/crates/zkvm/sp1/src/zkvm/error.rs +++ b/crates/zkvm/sp1/src/zkvm/error.rs @@ -30,4 +30,34 @@ pub enum Error { #[error("SP1 SDK verification failed: {0}")] Verify(#[source] SP1VerificationError), + + // Cluster-specific errors + #[error( + "SP1 Cluster endpoint not configured. Set SP1_CLUSTER_ENDPOINT environment variable or provide endpoint in ClusterProverConfig" + )] + EndpointNotConfigured, + + #[error("Redis URL not configured. Set SP1_CLUSTER_REDIS_URL environment variable")] + RedisNotConfigured, + + #[error("Failed to connect to gRPC service: {0}")] + GrpcConnect(String), + + #[error("gRPC request failed: {0}")] + GrpcRequest(String), + + #[error("Redis error: {0}")] + Redis(String), + + #[error("SP1 Cluster proving failed: {0}")] + ClusterProve(String), + + #[error("SP1 Cluster proving timed out after {0} seconds")] + ProveTimeout(u64), + + // Network-specific errors + #[error( + "Network proving requires a private key. Set NETWORK_PRIVATE_KEY environment variable or provide api_key in NetworkProverConfig" + )] + NetworkPrivateKeyNotConfigured, } diff --git a/crates/zkvm/sp1/src/zkvm/sdk.rs b/crates/zkvm/sp1/src/zkvm/sdk.rs index 3b926668..8b36740e 100644 --- a/crates/zkvm/sp1/src/zkvm/sdk.rs +++ b/crates/zkvm/sp1/src/zkvm/sdk.rs @@ -1,4 +1,5 @@ use crate::zkvm::Error; +use crate::zkvm::cluster::{ProveResult as ClusterProveResult, SP1ClusterClient}; use ere_zkvm_interface::zkvm::{NetworkProverConfig, ProverResourceType}; use sp1_sdk::{ CpuProver, CudaProver, NetworkProver, Prover as _, ProverClient, SP1ProofMode, @@ -10,21 +11,33 @@ pub enum Prover { Cpu(CpuProver), Gpu(CudaProver), Network(NetworkProver), + /// Cluster prover uses a separate SP1ClusterClient with a local CpuProver for verification + Cluster { + client: SP1ClusterClient, + local_prover: CpuProver, + }, } impl Default for Prover { fn default() -> Self { - Self::new(&ProverResourceType::Cpu) + Self::Cpu(ProverClient::builder().cpu().build()) } } impl Prover { - pub fn new(resource: &ProverResourceType) -> Self { - match resource { + pub fn new(resource: &ProverResourceType) -> Result { + Ok(match resource { ProverResourceType::Cpu => Self::Cpu(ProverClient::builder().cpu().build()), ProverResourceType::Gpu => Self::Gpu(ProverClient::builder().cuda().build()), - ProverResourceType::Network(config) => Self::Network(build_network_prover(config)), - } + ProverResourceType::Network(config) => Self::Network(build_network_prover(config)?), + ProverResourceType::Cluster(config) => { + let client = SP1ClusterClient::new(&config.endpoint, &config.redis_url)?; + Self::Cluster { + client, + local_prover: ProverClient::builder().cpu().build(), + } + } + }) } pub fn setup(&self, elf: &[u8]) -> (SP1ProvingKey, SP1VerifyingKey) { @@ -32,6 +45,7 @@ impl Prover { Self::Cpu(cpu_prover) => cpu_prover.setup(elf), Self::Gpu(cuda_prover) => cuda_prover.setup(elf), Self::Network(network_prover) => network_prover.setup(elf), + Self::Cluster { local_prover, .. } => local_prover.setup(elf), } } @@ -44,6 +58,10 @@ impl Prover { Self::Cpu(cpu_prover) => cpu_prover.execute(elf, input).run(), Self::Gpu(cuda_prover) => cuda_prover.execute(elf, input).run(), Self::Network(network_prover) => network_prover.execute(elf, input).run(), + Self::Cluster { local_prover, .. } => { + // Execute locally - cluster is for proving only + local_prover.execute(elf, input).run() + } } .map_err(Error::Execute) } @@ -55,11 +73,48 @@ impl Prover { mode: SP1ProofMode, ) -> Result { match self { - Self::Cpu(cpu_prover) => cpu_prover.prove(pk, input).mode(mode).run(), - Self::Gpu(cuda_prover) => cuda_prover.prove(pk, input).mode(mode).run(), - Self::Network(network_prover) => network_prover.prove(pk, input).mode(mode).run(), + Self::Cpu(cpu_prover) => cpu_prover + .prove(pk, input) + .mode(mode) + .run() + .map_err(Error::Prove), + Self::Gpu(cuda_prover) => cuda_prover + .prove(pk, input) + .mode(mode) + .run() + .map_err(Error::Prove), + Self::Network(network_prover) => network_prover + .prove(pk, input) + .mode(mode) + .run() + .map_err(Error::Prove), + Self::Cluster { .. } => { + // This method shouldn't be called for cluster - use prove_cluster instead + Err(Error::ClusterProve( + "Use prove_cluster() for cluster proving".to_string(), + )) + } } - .map_err(Error::Prove) + } + + /// Prove using the cluster + pub fn prove_cluster( + &self, + elf: &[u8], + stdin_bytes: &[u8], + mode: i32, + ) -> Result { + match self { + Self::Cluster { client, .. } => client.prove_sync(elf, stdin_bytes, mode), + _ => Err(Error::ClusterProve( + "prove_cluster is only available for Cluster prover".to_string(), + )), + } + } + + /// Check if this is a cluster prover + pub fn is_cluster(&self) -> bool { + matches!(self, Self::Cluster { .. }) } pub fn verify( @@ -71,12 +126,16 @@ impl Prover { Self::Cpu(cpu_prover) => cpu_prover.verify(proof, vk), Self::Gpu(cuda_prover) => cuda_prover.verify(proof, vk), Self::Network(network_prover) => network_prover.verify(proof, vk), + Self::Cluster { local_prover, .. } => { + // Verify locally + local_prover.verify(proof, vk) + } } .map_err(Error::Verify) } } -fn build_network_prover(config: &NetworkProverConfig) -> NetworkProver { +fn build_network_prover(config: &NetworkProverConfig) -> Result { let mut builder = ProverClient::builder().network(); // Check if we have a private key in the config or environment if let Some(api_key) = &config.api_key { @@ -84,9 +143,7 @@ fn build_network_prover(config: &NetworkProverConfig) -> NetworkProver { } else if let Ok(private_key) = std::env::var("NETWORK_PRIVATE_KEY") { builder = builder.private_key(&private_key); } else { - panic!( - "Network proving requires a private key. Set NETWORK_PRIVATE_KEY environment variable or provide api_key in NetworkProverConfig" - ); + return Err(Error::NetworkPrivateKeyNotConfigured); } // Set the RPC URL if provided if !config.endpoint.is_empty() { @@ -95,5 +152,5 @@ fn build_network_prover(config: &NetworkProverConfig) -> NetworkProver { builder = builder.rpc_url(&rpc_url); } // Otherwise SP1 SDK will use its default RPC URL - builder.build() + Ok(builder.build()) } From 29b4ae71306c48196d6119d5ae34324fe1522d26 Mon Sep 17 00:00:00 2001 From: Musa AbdulKareem Date: Sat, 3 Jan 2026 07:30:04 +0100 Subject: [PATCH 02/11] fix: update image registry variable to use lowercase repository name in CI workflow --- .github/workflows/test-zkvm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-zkvm.yml b/.github/workflows/test-zkvm.yml index 80981d2d..55a01637 100644 --- a/.github/workflows/test-zkvm.yml +++ b/.github/workflows/test-zkvm.yml @@ -78,7 +78,7 @@ jobs: IMAGE_TAG="$ZKVM_CRATE_VERSION-${GIT_REV:0:7}" CACHED_IMAGE_TAG="$ZKVM_CRATE_VERSION-${CACHED_GIT_REV:0:7}" - IMAGE_REGISTRY="ghcr.io/${{ github.repository }}" + IMAGE_REGISTRY="ghcr.io/${GITHUB_REPOSITORY,,}" BASE_IMAGE="$IMAGE_REGISTRY/ere-base:$IMAGE_TAG" BASE_ZKVM_IMAGE="$IMAGE_REGISTRY/ere-base-${{ inputs.zkvm }}:$IMAGE_TAG" COMPILER_ZKVM_IMAGE="$IMAGE_REGISTRY/ere-compiler-${{ inputs.zkvm }}:$IMAGE_TAG" From 4bac87eb8dc14ab51188b49feaba971bd90859bb Mon Sep 17 00:00:00 2001 From: Musa AbdulKareem Date: Sat, 3 Jan 2026 07:43:47 +0100 Subject: [PATCH 03/11] fix: update panic messages for unimplemented resource types in zkVM implementations Changes: - Updated panic messages to include Cluster resource type for EreOpenVM, EreRisc0, EreZiren, and EreZisk. - Adjusted match patterns to handle both Network and Cluster resource types consistently across implementations. --- crates/zkvm/openvm/src/zkvm.rs | 8 ++++---- crates/zkvm/risc0/src/zkvm.rs | 11 ++++++---- crates/zkvm/sp1/src/zkvm/cluster.rs | 32 ++++++++++++++--------------- crates/zkvm/ziren/src/zkvm.rs | 8 ++++++-- crates/zkvm/zisk/src/zkvm.rs | 9 ++++++-- 5 files changed, 40 insertions(+), 28 deletions(-) diff --git a/crates/zkvm/openvm/src/zkvm.rs b/crates/zkvm/openvm/src/zkvm.rs index 24173025..41ccdef5 100644 --- a/crates/zkvm/openvm/src/zkvm.rs +++ b/crates/zkvm/openvm/src/zkvm.rs @@ -41,9 +41,9 @@ impl EreOpenVM { ProverResourceType::Gpu => { panic!("Feature `cuda` is disabled. Enable `cuda` to use GPU resource type") } - ProverResourceType::Network(_) => { + ProverResourceType::Network(_) | ProverResourceType::Cluster(_) => { panic!( - "Network proving not yet implemented for OpenVM. Use CPU or GPU resource type." + "Network/Cluster proving not yet implemented for OpenVM. Use CPU or GPU resource type." ); } _ => {} @@ -172,9 +172,9 @@ impl zkVM for EreOpenVM { ProverResourceType::Gpu => { panic!("Feature `cuda` is disabled. Enable `cuda` to use GPU resource type") } - ProverResourceType::Network(_) => { + ProverResourceType::Network(_) | ProverResourceType::Cluster(_) => { panic!( - "Network proving not yet implemented for OpenVM. Use CPU or GPU resource type." + "Network/Cluster proving not yet implemented for OpenVM. Use CPU or GPU resource type." ); } } diff --git a/crates/zkvm/risc0/src/zkvm.rs b/crates/zkvm/risc0/src/zkvm.rs index 35f2a2af..80f782b7 100644 --- a/crates/zkvm/risc0/src/zkvm.rs +++ b/crates/zkvm/risc0/src/zkvm.rs @@ -51,9 +51,12 @@ pub struct EreRisc0 { impl EreRisc0 { pub fn new(program: Risc0Program, resource: ProverResourceType) -> Result { - if matches!(resource, ProverResourceType::Network(_)) { + if matches!( + resource, + ProverResourceType::Network(_) | ProverResourceType::Cluster(_) + ) { panic!( - "Network proving not yet implemented for RISC Zero. Use CPU or GPU resource type." + "Network/Cluster proving not yet implemented for RISC Zero. Use CPU or GPU resource type." ); } @@ -127,9 +130,9 @@ impl zkVM for EreRisc0 { Rc::new(DefaultProver::new("r0vm-cuda").map_err(Error::InitializeCudaProver)?) } } - ProverResourceType::Network(_) => { + ProverResourceType::Network(_) | ProverResourceType::Cluster(_) => { panic!( - "Network proving not yet implemented for RISC Zero. Use CPU or GPU resource type." + "Network/Cluster proving not yet implemented for RISC Zero. Use CPU or GPU resource type." ); } }; diff --git a/crates/zkvm/sp1/src/zkvm/cluster.rs b/crates/zkvm/sp1/src/zkvm/cluster.rs index 597422fc..3f9b9628 100644 --- a/crates/zkvm/sp1/src/zkvm/cluster.rs +++ b/crates/zkvm/sp1/src/zkvm/cluster.rs @@ -56,7 +56,7 @@ impl SP1ClusterClient { /// Synchronous wrapper for prove that creates a runtime on-demand pub fn prove_sync(&self, elf: &[u8], stdin: &[u8], mode: i32) -> Result { let runtime = tokio::runtime::Runtime::new() - .map_err(|e| Error::ClusterProve(format!("Failed to create tokio runtime: {}", e)))?; + .map_err(|e| Error::ClusterProve(format!("Failed to create tokio runtime: {e}")))?; let result = runtime.block_on(self.prove(elf, stdin, mode)); // Drop the runtime in a separate thread to avoid panic when @@ -97,7 +97,7 @@ impl SP1ClusterClient { ) -> Result<(), Error> { // Compress with zstd (level 0 for fast compression) let compressed = zstd::encode_all(data, 0) - .map_err(|e| Error::Redis(format!("Failed to compress artifact: {}", e)))?; + .map_err(|e| Error::Redis(format!("Failed to compress artifact: {e}")))?; // Store with just the artifact_id as key (as SP1 Cluster expects) conn.set::<_, _, ()>(artifact_id, &compressed) @@ -121,7 +121,7 @@ impl SP1ClusterClient { artifact_id: &str, ) -> Result, Error> { // Check if artifact is stored in chunks - let chunks_key = format!("{}:chunks", artifact_id); + let chunks_key = format!("{artifact_id}:chunks"); let total_chunks: usize = conn .hlen(&chunks_key) .await @@ -139,7 +139,7 @@ impl SP1ClusterClient { let chunk: Vec = conn .hget(&chunks_key, i) .await - .map_err(|e| Error::Redis(format!("Failed to get chunk {}: {}", i, e)))?; + .map_err(|e| Error::Redis(format!("Failed to get chunk {i}: {e}")))?; chunks.push(chunk); } chunks.into_iter().flatten().collect() @@ -147,7 +147,7 @@ impl SP1ClusterClient { // Decompress with zstd let data = zstd::decode_all(compressed.as_slice()) - .map_err(|e| Error::Redis(format!("Failed to decompress artifact: {}", e)))?; + .map_err(|e| Error::Redis(format!("Failed to decompress artifact: {e}")))?; debug!( "Downloaded artifact {} ({} bytes compressed -> {} bytes, chunks: {})", @@ -167,7 +167,7 @@ impl SP1ClusterClient { ) { for artifact_id in artifact_ids { // Try to delete both the simple key and chunks key - let chunks_key = format!("{}:chunks", artifact_id); + let chunks_key = format!("{artifact_id}:chunks"); let _: Result<(), _> = conn.del::<_, ()>(*artifact_id).await; let _: Result<(), _> = conn.del::<_, ()>(&chunks_key).await; } @@ -187,7 +187,7 @@ impl SP1ClusterClient { // Program needs to be bincode serialized (wrapping the ELF bytes) // Use bincode 1.x to match sp1-cluster's bincode version let program_serialized = bincode1::serialize(&elf.to_vec()) - .map_err(|e| Error::Redis(format!("Failed to serialize program: {}", e)))?; + .map_err(|e| Error::Redis(format!("Failed to serialize program: {e}")))?; self.upload_artifact(&mut redis, &program_id, &program_serialized) .await?; @@ -200,7 +200,7 @@ impl SP1ClusterClient { .duration_since(UNIX_EPOCH) .unwrap() .as_millis(); - let request_id = format!("ere_{}", timestamp); + let request_id = format!("ere_{timestamp}"); let deadline = SystemTime::now() + Duration::from_secs(DEFAULT_TIMEOUT_SECS); @@ -284,8 +284,7 @@ impl SP1ClusterClient { // Build error message let mut error_msg = format!( - "Proof request {} (status={}) after {:?}", - request_id, status_str, elapsed + "Proof request {request_id} (status={status_str}) after {elapsed:?}" ); // Add execution details @@ -294,12 +293,12 @@ impl SP1ClusterClient { .unwrap_or(ExecutionStatus::Unspecified); let failure_cause = ExecutionFailureCause::try_from(exec.failure_cause) .unwrap_or(ExecutionFailureCause::Unspecified); + let exec_status_name = exec_status.as_str_name(); + let failure_cause_name = failure_cause.as_str_name(); + let cycles = exec.cycles; + let gas = exec.gas; error_msg.push_str(&format!( - " - Execution: status={}, failure_cause={}, cycles={}, gas={}", - exec_status.as_str_name(), - failure_cause.as_str_name(), - exec.cycles, - exec.gas + " - Execution: status={exec_status_name}, failure_cause={failure_cause_name}, cycles={cycles}, gas={gas}" )); } else { error_msg.push_str( @@ -309,7 +308,8 @@ impl SP1ClusterClient { // Add metadata if available if !proof_request.metadata.is_empty() { - error_msg.push_str(&format!(" - metadata: {}", proof_request.metadata)); + let metadata = &proof_request.metadata; + error_msg.push_str(&format!(" - metadata: {metadata}")); } // Cleanup uploaded artifacts before returning error diff --git a/crates/zkvm/ziren/src/zkvm.rs b/crates/zkvm/ziren/src/zkvm.rs index dd4ea3af..6cfa7cc1 100644 --- a/crates/zkvm/ziren/src/zkvm.rs +++ b/crates/zkvm/ziren/src/zkvm.rs @@ -27,9 +27,13 @@ impl EreZiren { pub fn new(program: ZirenProgram, resource: ProverResourceType) -> Result { if matches!( resource, - ProverResourceType::Gpu | ProverResourceType::Network(_) + ProverResourceType::Gpu + | ProverResourceType::Network(_) + | ProverResourceType::Cluster(_) ) { - panic!("Network or Gpu proving not yet implemented for ZKM. Use CPU resource type."); + panic!( + "Network/Cluster/Gpu proving not yet implemented for ZKM. Use CPU resource type." + ); } let (pk, vk) = CpuProver::new().setup(program.elf()); Ok(Self { program, pk, vk }) diff --git a/crates/zkvm/zisk/src/zkvm.rs b/crates/zkvm/zisk/src/zkvm.rs index d7029c24..44319230 100644 --- a/crates/zkvm/zisk/src/zkvm.rs +++ b/crates/zkvm/zisk/src/zkvm.rs @@ -31,8 +31,13 @@ pub struct EreZisk { impl EreZisk { pub fn new(program: ZiskProgram, resource: ProverResourceType) -> Result { - if matches!(resource, ProverResourceType::Network(_)) { - panic!("Network proving not yet implemented for ZisK. Use CPU or GPU resource type."); + if matches!( + resource, + ProverResourceType::Network(_) | ProverResourceType::Cluster(_) + ) { + panic!( + "Network/Cluster proving not yet implemented for ZisK. Use CPU or GPU resource type." + ); } let sdk = ZiskSdk::new(program.elf, resource, ZiskOptions::from_env())?; Ok(Self { From e2bbae86cf1da600113c902dd9657b1a3267bcba Mon Sep 17 00:00:00 2001 From: Musa AbdulKareem Date: Wed, 7 Jan 2026 23:13:44 +0100 Subject: [PATCH 04/11] chore: update clap dependency to include derive and env features --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1e971c2a..c76e0388 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,7 +53,7 @@ bytemuck = "1.23.1" cargo_metadata = "0.19.0" ciborium = { version = "0.2.2", default-features = false } ciborium-io = { version = "0.2.2", default-features = false } -clap = "4.5.42" +clap = { version = "4.5.42", features = ["derive", "env"] } dashmap = "6.1.0" digest = { version = "0.10.7", default-features = false } eyre = "0.6.12" From 979abb3b42fad21503c560a595f526818401d395 Mon Sep 17 00:00:00 2001 From: Musa AbdulKareem Date: Wed, 7 Jan 2026 23:38:14 +0100 Subject: [PATCH 05/11] fix(ziren): improve ZKM toolchain linking in install script - Source environment file after zkmup installation to ensure PATH is set - Add explicit error handling for zkmup list-available command - Add fallback logic to find toolchain directory - Add debug output when toolchain path resolution fails This fixes the CI failure where rustup toolchain link was called with missing PATH argument due to zkmup not being in PATH after installation. --- scripts/sdk_installers/install_ziren_sdk.sh | 36 ++++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/scripts/sdk_installers/install_ziren_sdk.sh b/scripts/sdk_installers/install_ziren_sdk.sh index 3d2b5d4a..638b4f55 100755 --- a/scripts/sdk_installers/install_ziren_sdk.sh +++ b/scripts/sdk_installers/install_ziren_sdk.sh @@ -33,12 +33,40 @@ ZIREM_VERSION="1.2.3" # Step 1: Download and run the script that installs the zkmup binary itself. curl --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/ProjectZKM/toolchain/refs/heads/main/setup.sh | sh -# Step 2: Ensure the installed zkmup script is in PATH +# Step 2: Source the environment file to ensure zkmup is available +if [ -f "${HOME}/.zkm-toolchain/env" ]; then + . "${HOME}/.zkm-toolchain/env" +fi + +# Step 3: Ensure the installed zkmup script is in PATH (fallback if sourcing didn't work) export PATH="${PATH}:${HOME}/.zkm-toolchain/bin" -# Step 3: Link the latest toolchain as toolchain `zkm` -rustup toolchain link zkm $(ls -d $HOME/.zkm-toolchain/* | grep "$(zkmup list-available | cut -d' ' -f1)$") -# Step 4: Install cargo-ziren by building from source +# Step 4: Link the latest toolchain as toolchain `zkm` +# Get the latest available version from zkmup +LATEST_VERSION=$(zkmup list-available 2>/dev/null | head -1 | cut -d' ' -f1) +if [ -z "${LATEST_VERSION}" ]; then + echo "Error: Failed to get available ZKM toolchain versions from zkmup" >&2 + exit 1 +fi + +# Find the toolchain directory +TOOLCHAIN_PATH="${HOME}/.zkm-toolchain/${LATEST_VERSION}" +if [ ! -d "${TOOLCHAIN_PATH}" ]; then + # Try to find any installed toolchain + TOOLCHAIN_PATH=$(ls -d ${HOME}/.zkm-toolchain/*/ 2>/dev/null | grep -v bin | grep -v env | head -1) +fi + +if [ -z "${TOOLCHAIN_PATH}" ] || [ ! -d "${TOOLCHAIN_PATH}" ]; then + echo "Error: Could not find ZKM toolchain directory" >&2 + echo "Available directories in ~/.zkm-toolchain:" >&2 + ls -la "${HOME}/.zkm-toolchain/" >&2 || true + exit 1 +fi + +echo "Linking ZKM toolchain from: ${TOOLCHAIN_PATH}" +rustup toolchain link zkm "${TOOLCHAIN_PATH}" + +# Step 5: Install cargo-ziren by building from source cargo +nightly install --locked --git https://github.com/ProjectZKM/Ziren.git --tag "v${ZIREM_VERSION}" zkm-cli # Verify ZKM installation From 3edcc4ceb828ab8bb0840ffd9e09b4523c8c093f Mon Sep 17 00:00:00 2001 From: Musa AbdulKareem Date: Wed, 7 Jan 2026 23:44:34 +0100 Subject: [PATCH 06/11] fix(ziren): remove +nightly from cargo install command The Dockerfile.base for ziren already sets nightly as the default toolchain via 'rustup default nightly', so using 'cargo +nightly' is unnecessary and can fail with 'no such command' error in some Docker environments where the +toolchain syntax isn't recognized. --- scripts/sdk_installers/install_ziren_sdk.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/sdk_installers/install_ziren_sdk.sh b/scripts/sdk_installers/install_ziren_sdk.sh index 638b4f55..185a1dd1 100755 --- a/scripts/sdk_installers/install_ziren_sdk.sh +++ b/scripts/sdk_installers/install_ziren_sdk.sh @@ -67,7 +67,9 @@ echo "Linking ZKM toolchain from: ${TOOLCHAIN_PATH}" rustup toolchain link zkm "${TOOLCHAIN_PATH}" # Step 5: Install cargo-ziren by building from source -cargo +nightly install --locked --git https://github.com/ProjectZKM/Ziren.git --tag "v${ZIREM_VERSION}" zkm-cli +# Note: The Dockerfile sets nightly as default, so we don't need +nightly here. +# Using +nightly can fail if cargo doesn't recognize the toolchain selector syntax. +cargo install --locked --git https://github.com/ProjectZKM/Ziren.git --tag "v${ZIREM_VERSION}" zkm-cli # Verify ZKM installation echo "Verifying ZKM installation..." From 6445887d0ba2671253d46c4565e2e0f9c6cae09e Mon Sep 17 00:00:00 2001 From: Matteo Lisotto Date: Mon, 9 Feb 2026 10:42:07 +0100 Subject: [PATCH 07/11] refactor: use `sp1-cluster-common` package --- Cargo.lock | 247 +++++++++++++++++++++++++++- Cargo.toml | 1 + crates/zkvm/sp1/Cargo.toml | 3 +- crates/zkvm/sp1/build.rs | 7 - crates/zkvm/sp1/proto/cluster.proto | 122 -------------- crates/zkvm/sp1/src/zkvm/cluster.rs | 7 +- 6 files changed, 248 insertions(+), 139 deletions(-) delete mode 100644 crates/zkvm/sp1/proto/cluster.proto diff --git a/Cargo.lock b/Cargo.lock index 1c2252d9..2e011a74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1093,6 +1093,17 @@ dependencies = [ "term 1.2.0", ] +[[package]] +name = "async-scoped" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4042078ea593edffc452eef14e99fdb2b120caa4ad9618bcdeabc4a023b98740" +dependencies = [ + "futures", + "pin-project", + "tokio", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -1137,6 +1148,15 @@ dependencies = [ "rustc_version 0.4.1", ] +[[package]] +name = "atomic" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" +dependencies = [ + "bytemuck", +] + [[package]] name = "atomic-polyfill" version = "1.0.3" @@ -2389,8 +2409,10 @@ checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ "android-tzdata", "iana-time-zone", + "js-sys", "num-traits", "serde", + "wasm-bindgen", "windows-link 0.1.3", ] @@ -4158,15 +4180,14 @@ dependencies = [ "ere-compile-utils", "ere-test-utils", "ere-zkvm-interface", - "prost 0.13.5", "redis", "serde", + "sp1-cluster-common", "sp1-sdk", "tempfile", "thiserror 2.0.12", "tokio", "tonic 0.12.3", - "tonic-build 0.12.3", "tracing", "ulid", "zstd 0.13.3", @@ -5930,6 +5951,16 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "if-addrs" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69b2eeee38fef3aa9b4cc5f1beea8a2444fc00e7377cafae396de3f5c2065e24" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "impl-codec" version = "0.6.0" @@ -7242,6 +7273,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "mti" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9563a7d5556636e74bbd8773241fbcbc5c89b9f6bfdc97b29b56e740c2c74b9" +dependencies = [ + "typeid_prefix", + "typeid_suffix", +] + [[package]] name = "multimap" version = "0.8.3" @@ -7959,6 +8000,86 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "opentelemetry" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b69a91d4893e713e06f724597ad630f1fa76057a5e1026c0ca67054a9032a76" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "once_cell", + "pin-project-lite", + "thiserror 1.0.69", +] + +[[package]] +name = "opentelemetry-appender-tracing" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be314095f27dde46fca7038b023457d2b3459e1c39033dacc2ec1b31df11a61c" +dependencies = [ + "once_cell", + "opentelemetry", + "tracing", + "tracing-core", + "tracing-subscriber 0.3.20", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a94c69209c05319cdf7460c6d4c055ed102be242a0a6245835d7bc42c6ec7f54" +dependencies = [ + "async-trait", + "futures-core", + "http 0.2.12", + "opentelemetry", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost 0.12.6", + "thiserror 1.0.69", + "tokio", + "tonic 0.11.0", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "984806e6cf27f2b49282e2a05e288f30594f3dbc74eb7a6e99422bc48ed78162" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost 0.12.6", + "tonic 0.11.0", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae312d58eaa90a82d2e627fd86e075cf5230b3f11794e2ed74199ebbe572d4fd" +dependencies = [ + "async-trait", + "futures-channel", + "futures-executor", + "futures-util", + "glob", + "lazy_static", + "once_cell", + "opentelemetry", + "ordered-float", + "percent-encoding", + "rand 0.8.5", + "serde_json", + "thiserror 1.0.69", + "tokio", + "tokio-stream", +] + [[package]] name = "openvm" version = "1.4.2" @@ -13185,6 +13306,34 @@ dependencies = [ "sp1-prover", ] +[[package]] +name = "sp1-cluster-common" +version = "1.0.0" +source = "git+https://github.com/succinctlabs/sp1-cluster#9b433a1ef4b90573fa1b05a4f7f6b33fd9a950f0" +dependencies = [ + "backoff", + "chrono", + "eyre", + "futures", + "if-addrs", + "lazy_static", + "log", + "opentelemetry", + "opentelemetry-appender-tracing", + "opentelemetry-otlp", + "opentelemetry_sdk", + "prost 0.13.5", + "rustls 0.23.27", + "serde", + "sp1-prover-types", + "tokio-blocked", + "tonic 0.12.3", + "tonic-build 0.12.3", + "tracing", + "tracing-opentelemetry", + "tracing-subscriber 0.3.20", +] + [[package]] name = "sp1-core-executor" version = "5.2.4" @@ -13407,6 +13556,26 @@ dependencies = [ "tracing-subscriber 0.3.20", ] +[[package]] +name = "sp1-prover-types" +version = "6.0.0-rc.1" +source = "git+https://github.com/succinctlabs/sp1?branch=tamir%2Ffix-deferred-digest#6aa07b14fe890effff53dd5128e757dafd2b160b" +dependencies = [ + "anyhow", + "async-scoped", + "bincode 1.3.3", + "chrono", + "futures-util", + "hashbrown 0.14.5", + "mti", + "prost 0.13.5", + "serde", + "tokio", + "tonic 0.12.3", + "tonic-build 0.12.3", + "tracing", +] + [[package]] name = "sp1-recursion-circuit" version = "5.2.4" @@ -14387,6 +14556,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "tokio-blocked" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbd262ab9dcfa7f6820fc236c63609a2256e3ed46ca563abc3279ea2c6586212" +dependencies = [ + "tracing", + "tracing-core", + "tracing-subscriber 0.3.20", +] + [[package]] name = "tokio-io-timeout" version = "1.2.1" @@ -14614,6 +14794,33 @@ dependencies = [ "tracing-futures", ] +[[package]] +name = "tonic" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76c4eb7a4e9ef9d4763600161f12f5070b92a578e1b634db88a6887844c91a13" +dependencies = [ + "async-stream", + "async-trait", + "axum 0.6.20", + "base64 0.21.7", + "bytes", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper-timeout 0.4.1", + "percent-encoding", + "pin-project", + "prost 0.12.6", + "tokio", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tonic" version = "0.12.3" @@ -14876,6 +15083,24 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-opentelemetry" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f68803492bf28ab40aeccaecc7021096bd256baf7ca77c3d425d89b35a7be4e4" +dependencies = [ + "js-sys", + "once_cell", + "opentelemetry", + "opentelemetry_sdk", + "smallvec", + "tracing", + "tracing-core", + "tracing-log", + "tracing-subscriber 0.3.20", + "web-time", +] + [[package]] name = "tracing-subscriber" version = "0.2.25" @@ -15105,6 +15330,21 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" +[[package]] +name = "typeid_prefix" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9da1387307fdee46aa441e4f08a1b491e659fcac1aca9cd71f2c624a0de5d1b" + +[[package]] +name = "typeid_suffix" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77b55e96f110c6db5d1a2f24072552537f0091dc90cebeaa679540bac93e7405" +dependencies = [ + "uuid 1.17.0", +] + [[package]] name = "typenum" version = "1.18.0" @@ -15300,9 +15540,12 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" dependencies = [ + "atomic", "getrandom 0.3.3", "js-sys", + "md-5", "rand 0.9.2", + "sha1_smol", "uuid-macro-internal", "wasm-bindgen", ] diff --git a/Cargo.toml b/Cargo.toml index c76e0388..acb5252e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -134,6 +134,7 @@ risc0-zkvm-platform = { version = "2.2.1", default-features = false } # SP1 dependencies sp1-sdk = "5.2.4" sp1-zkvm = { version = "5.2.4", default-features = false } +sp1-cluster-common = { git = "https://github.com/succinctlabs/sp1-cluster", package = "sp1-cluster-common" } # Ziren dependencies zkm-sdk = { git = "https://github.com/ProjectZKM/Ziren.git", tag = "v1.2.3" } diff --git a/crates/zkvm/sp1/Cargo.toml b/crates/zkvm/sp1/Cargo.toml index ea1c567d..347333f3 100644 --- a/crates/zkvm/sp1/Cargo.toml +++ b/crates/zkvm/sp1/Cargo.toml @@ -21,12 +21,12 @@ ere-compile-utils = { workspace = true, optional = true } ere-zkvm-interface.workspace = true # Cluster dependencies -prost.workspace = true redis = { workspace = true, features = ["tokio-comp", "cluster-async"] } tokio = { workspace = true, features = ["rt-multi-thread", "macros", "time"] } tonic.workspace = true ulid.workspace = true zstd.workspace = true +sp1-cluster-common.workspace = true # bincode 1.x for deserializing proofs from SP1 Cluster (cluster uses bincode 1.x) bincode1 = { package = "bincode", version = "1.3" } @@ -35,7 +35,6 @@ ere-test-utils = { workspace = true, features = ["host"] } [build-dependencies] ere-build-utils.workspace = true -tonic-build = "0.12" [features] default = ["compiler", "zkvm"] diff --git a/crates/zkvm/sp1/build.rs b/crates/zkvm/sp1/build.rs index bc88045c..f3b5e662 100644 --- a/crates/zkvm/sp1/build.rs +++ b/crates/zkvm/sp1/build.rs @@ -2,11 +2,4 @@ use ere_build_utils::detect_and_generate_name_and_sdk_version; fn main() { detect_and_generate_name_and_sdk_version("sp1", "sp1-sdk"); - - // Compile cluster proto - let proto_dir = std::path::Path::new("proto"); - tonic_build::configure() - .build_server(false) - .compile_protos(&[proto_dir.join("cluster.proto")], &[proto_dir]) - .expect("Failed to compile cluster proto"); } diff --git a/crates/zkvm/sp1/proto/cluster.proto b/crates/zkvm/sp1/proto/cluster.proto deleted file mode 100644 index 32278892..00000000 --- a/crates/zkvm/sp1/proto/cluster.proto +++ /dev/null @@ -1,122 +0,0 @@ -// The RPC for interacting with the Prover Cluster. -syntax = "proto3"; - -package cluster; - -service ClusterService { - rpc ProofRequestCreate(ProofRequestCreateRequest) returns (Empty) {} - rpc ProofRequestCancel(ProofRequestCancelRequest) returns (Empty) {} - rpc ProofRequestUpdate(ProofRequestUpdateRequest) returns (Empty) {} - rpc ProofRequestGet(ProofRequestGetRequest) returns (ProofRequestGetResponse) {} - rpc ProofRequestList(ProofRequestListRequest) returns (ProofRequestListResponse) {} - - rpc Healthcheck(Empty) returns (Empty); -} - -message Empty {} - -message ProofRequestCreateRequest { - string proof_id = 1; - string program_artifact_id = 2; - string stdin_artifact_id = 3; - optional string options_artifact_id = 4; - optional string proof_artifact_id = 5; - bytes requester = 6; - uint64 deadline = 7; - uint64 cycle_limit = 8; - uint64 gas_limit = 9; -} - -message ProofRequestCancelRequest { - string proof_id = 1; -} - -message ProofRequest { - string id = 1; - ProofRequestStatus proof_status = 2; - bytes requester = 3; - ExecutionResult execution_result = 4; - string stdin_artifact_id = 5; - string program_artifact_id = 6; - optional string proof_artifact_id = 7; - optional string options_artifact_id = 8; - optional uint64 cycle_limit = 9; - optional uint64 gas_limit = 10; - uint64 deadline = 11; - bool handled = 12; - string metadata = 13; - uint64 created_at = 14; - uint64 updated_at = 15; -} - -message ProofRequestListRequest { - repeated ProofRequestStatus proof_status = 1; - repeated ExecutionStatus execution_status = 2; - optional uint64 minimum_deadline = 3; - optional bool handled = 4; - optional uint32 limit = 5; - optional uint32 offset = 6; -} - -message ProofRequestListResponse { - repeated ProofRequest proof_requests = 1; -} - -enum ProofRequestStatus { - PROOF_REQUEST_STATUS_UNSPECIFIED = 0; - PROOF_REQUEST_STATUS_PENDING = 1; - PROOF_REQUEST_STATUS_COMPLETED = 2; - PROOF_REQUEST_STATUS_FAILED = 3; - PROOF_REQUEST_STATUS_CANCELLED = 4; -} - -// The possible statuses for the execution of the program. -enum ExecutionStatus { - EXECUTION_STATUS_UNSPECIFIED = 0; - EXECUTION_STATUS_UNEXECUTED = 1; - EXECUTION_STATUS_EXECUTED = 2; - EXECUTION_STATUS_FAILED = 3; - EXECUTION_STATUS_CANCELLED = 4; -} - -// The specific reasons why an execution might fail. -enum ExecutionFailureCause { - EXECUTION_FAILURE_CAUSE_UNSPECIFIED = 0; - EXECUTION_FAILURE_CAUSE_HALT_WITH_NON_ZERO_EXIT_CODE = 1; - EXECUTION_FAILURE_CAUSE_INVALID_MEMORY_ACCESS = 2; - EXECUTION_FAILURE_CAUSE_UNSUPPORTED_SYSCALL = 3; - EXECUTION_FAILURE_CAUSE_BREAKPOINT = 4; - EXECUTION_FAILURE_CAUSE_EXCEEDED_CYCLE_LIMIT = 5; - EXECUTION_FAILURE_CAUSE_INVALID_SYSCALL_USAGE = 6; - EXECUTION_FAILURE_CAUSE_UNIMPLEMENTED = 7; - EXECUTION_FAILURE_CAUSE_END_IN_UNCONSTRAINED = 8; -} - -message ExecutionResult { - ExecutionStatus status = 1; - ExecutionFailureCause failure_cause = 2; - uint64 cycles = 3; - uint64 gas = 4; - bytes public_values_hash = 5; -} - -// The request to get a specific proof request. -message ProofRequestGetRequest { - string proof_id = 1; -} - -// The response containing the details of a single proof request. -message ProofRequestGetResponse { - ProofRequest proof_request = 1; -} - -// The request to update a proof request. -message ProofRequestUpdateRequest { - string proof_id = 1; - optional ProofRequestStatus proof_status = 2; - optional ExecutionResult execution_result = 3; - optional uint64 deadline = 4; - optional bool handled = 5; - optional string metadata = 6; -} - diff --git a/crates/zkvm/sp1/src/zkvm/cluster.rs b/crates/zkvm/sp1/src/zkvm/cluster.rs index 3f9b9628..86f08fb4 100644 --- a/crates/zkvm/sp1/src/zkvm/cluster.rs +++ b/crates/zkvm/sp1/src/zkvm/cluster.rs @@ -8,12 +8,7 @@ use tonic::transport::Channel; use tracing::{debug, info}; use ulid::Ulid; -// Include the generated protobuf code -pub mod cluster_proto { - tonic::include_proto!("cluster"); -} - -use cluster_proto::{ +use sp1_cluster_common::proto::{ ExecutionFailureCause, ExecutionStatus, ProofRequestCreateRequest, ProofRequestGetRequest, ProofRequestStatus, cluster_service_client::ClusterServiceClient, }; From ae57c58e9416f53fbe1bb070e807e871ec7bbc98 Mon Sep 17 00:00:00 2001 From: Matteo Lisotto Date: Mon, 9 Feb 2026 10:43:04 +0100 Subject: [PATCH 08/11] feat: generalize cluster flags. Don't use the SP1 specific flags --- crates/zkvm-interface/src/zkvm/resource.rs | 33 ++------------- crates/zkvm/sp1/src/zkvm.rs | 22 +++++++--- crates/zkvm/sp1/src/zkvm/cluster.rs | 4 +- crates/zkvm/sp1/src/zkvm/error.rs | 6 ++- crates/zkvm/sp1/src/zkvm/sdk.rs | 48 +++++++++++++++------- 5 files changed, 59 insertions(+), 54 deletions(-) diff --git a/crates/zkvm-interface/src/zkvm/resource.rs b/crates/zkvm-interface/src/zkvm/resource.rs index 72258fc2..5811c00d 100644 --- a/crates/zkvm-interface/src/zkvm/resource.rs +++ b/crates/zkvm-interface/src/zkvm/resource.rs @@ -29,40 +29,15 @@ impl NetworkProverConfig { #[serde(rename_all = "kebab-case")] #[cfg_attr(feature = "clap", derive(clap::Args))] pub struct ClusterProverConfig { - #[cfg_attr( - feature = "clap", - arg( - long, - env = "SP1_CLUSTER_ENDPOINT", - default_value = "http://172.17.0.1:50051/" - ) - )] - /// The gRPC endpoint URL of the cluster API service (e.g., http://172.17.0.1:50051) + #[cfg_attr(feature = "clap", arg(long))] + /// The gRPC endpoint URL of the cluster API service pub endpoint: String, - #[cfg_attr( - feature = "clap", - arg( - long, - env = "SP1_CLUSTER_REDIS_URL", - default_value = "redis://:redispassword@172.17.0.1:6379/0" - ) - )] - /// Redis URL for artifact storage (e.g., redis://:password@172.17.0.1:6379/0) + #[cfg_attr(feature = "clap", arg(long))] + /// Redis URL for artifact storage pub redis_url: String, } -impl Default for ClusterProverConfig { - fn default() -> Self { - Self { - endpoint: std::env::var("SP1_CLUSTER_ENDPOINT") - .unwrap_or("http://172.17.0.1:50051/".to_string()), - redis_url: std::env::var("SP1_CLUSTER_REDIS_URL") - .unwrap_or("redis://:redispassword@172.17.0.1:6379/0".to_string()), - } - } -} - #[cfg(feature = "clap")] impl ClusterProverConfig { pub fn to_args(&self) -> Vec<&str> { diff --git a/crates/zkvm/sp1/src/zkvm.rs b/crates/zkvm/sp1/src/zkvm.rs index a9f14c3b..7634c45e 100644 --- a/crates/zkvm/sp1/src/zkvm.rs +++ b/crates/zkvm/sp1/src/zkvm.rs @@ -348,18 +348,28 @@ mod tests { } #[test] - #[ignore = "Requires SP1_CLUSTER_ENDPOINT environment variable to be set"] + #[ignore = "Requires CLUSTER_* environment variables to be set"] fn test_prove_sp1_cluster() { use ere_zkvm_interface::zkvm::ClusterProverConfig; - // Check if we have the required environment variable - if std::env::var("SP1_CLUSTER_ENDPOINT").is_err() { - eprintln!("Skipping cluster test: SP1_CLUSTER_ENDPOINT not set"); + let endpoint = std::env::var("CLUSTER_ENDPOINT") + .ok() + .filter(|value| !value.trim().is_empty()); + let redis_url = std::env::var("CLUSTER_REDIS_URL") + .ok() + .filter(|value| !value.trim().is_empty()); + + // Require both endpoint and Redis URL. + if endpoint.is_none() || redis_url.is_none() { + eprintln!("Skipping cluster test: CLUSTER_* environment variables not set"); return; } - // Create a cluster prover configuration - let cluster_config = ClusterProverConfig::default(); + // Use empty config to exercise env resolution in SP1 cluster setup. + let cluster_config = ClusterProverConfig { + endpoint: String::new(), + redis_url: String::new(), + }; let program = basic_program(); let zkvm = EreSP1::new(program, ProverResourceType::Cluster(cluster_config)).unwrap(); diff --git a/crates/zkvm/sp1/src/zkvm/cluster.rs b/crates/zkvm/sp1/src/zkvm/cluster.rs index 86f08fb4..dbd64cfd 100644 --- a/crates/zkvm/sp1/src/zkvm/cluster.rs +++ b/crates/zkvm/sp1/src/zkvm/cluster.rs @@ -30,10 +30,10 @@ pub struct SP1ClusterClient { impl SP1ClusterClient { /// Creates a new SP1 Cluster client pub fn new(grpc_endpoint: &str, redis_url: &str) -> Result { - if grpc_endpoint.is_empty() { + if grpc_endpoint.trim().is_empty() { return Err(Error::EndpointNotConfigured); } - if redis_url.is_empty() { + if redis_url.trim().is_empty() { return Err(Error::RedisNotConfigured); } diff --git a/crates/zkvm/sp1/src/zkvm/error.rs b/crates/zkvm/sp1/src/zkvm/error.rs index 48103a44..37b673be 100644 --- a/crates/zkvm/sp1/src/zkvm/error.rs +++ b/crates/zkvm/sp1/src/zkvm/error.rs @@ -33,11 +33,13 @@ pub enum Error { // Cluster-specific errors #[error( - "SP1 Cluster endpoint not configured. Set SP1_CLUSTER_ENDPOINT environment variable or provide endpoint in ClusterProverConfig" + "SP1 Cluster endpoint not configured. Set CLUSTER_ENDPOINT or provide endpoint in ClusterProverConfig" )] EndpointNotConfigured, - #[error("Redis URL not configured. Set SP1_CLUSTER_REDIS_URL environment variable")] + #[error( + "Redis URL not configured. Set CLUSTER_REDIS_URL or provide redis_url in ClusterProverConfig" + )] RedisNotConfigured, #[error("Failed to connect to gRPC service: {0}")] diff --git a/crates/zkvm/sp1/src/zkvm/sdk.rs b/crates/zkvm/sp1/src/zkvm/sdk.rs index 8b36740e..2240f7ba 100644 --- a/crates/zkvm/sp1/src/zkvm/sdk.rs +++ b/crates/zkvm/sp1/src/zkvm/sdk.rs @@ -1,6 +1,6 @@ use crate::zkvm::Error; use crate::zkvm::cluster::{ProveResult as ClusterProveResult, SP1ClusterClient}; -use ere_zkvm_interface::zkvm::{NetworkProverConfig, ProverResourceType}; +use ere_zkvm_interface::zkvm::{ClusterProverConfig, NetworkProverConfig, ProverResourceType}; use sp1_sdk::{ CpuProver, CudaProver, NetworkProver, Prover as _, ProverClient, SP1ProofMode, SP1ProofWithPublicValues, SP1ProvingKey, SP1Stdin, SP1VerifyingKey, @@ -31,7 +31,7 @@ impl Prover { ProverResourceType::Gpu => Self::Gpu(ProverClient::builder().cuda().build()), ProverResourceType::Network(config) => Self::Network(build_network_prover(config)?), ProverResourceType::Cluster(config) => { - let client = SP1ClusterClient::new(&config.endpoint, &config.redis_url)?; + let client = build_cluster_client(config)?; Self::Cluster { client, local_prover: ProverClient::builder().cpu().build(), @@ -135,22 +135,40 @@ impl Prover { } } +fn env_non_empty(name: &str) -> Option { + std::env::var(name).ok().filter(|value| !value.trim().is_empty()) +} + +fn cfg_or_envs_non_empty(config_value: &str, env_names: &[&str]) -> Option { + if !config_value.trim().is_empty() { + return Some(config_value.to_string()); + } + + env_names.iter().find_map(|name| env_non_empty(name)) +} + +fn build_cluster_client(config: &ClusterProverConfig) -> Result { + let endpoint = + cfg_or_envs_non_empty(&config.endpoint, &["CLUSTER_ENDPOINT"]).ok_or(Error::EndpointNotConfigured)?; + let redis_url = + cfg_or_envs_non_empty(&config.redis_url, &["CLUSTER_REDIS_URL"]).ok_or(Error::RedisNotConfigured)?; + SP1ClusterClient::new(&endpoint, &redis_url) +} + fn build_network_prover(config: &NetworkProverConfig) -> Result { let mut builder = ProverClient::builder().network(); - // Check if we have a private key in the config or environment - if let Some(api_key) = &config.api_key { - builder = builder.private_key(api_key); - } else if let Ok(private_key) = std::env::var("NETWORK_PRIVATE_KEY") { - builder = builder.private_key(&private_key); - } else { - return Err(Error::NetworkPrivateKeyNotConfigured); - } - // Set the RPC URL if provided - if !config.endpoint.is_empty() { - builder = builder.rpc_url(&config.endpoint); - } else if let Ok(rpc_url) = std::env::var("NETWORK_RPC_URL") { + let private_key = config + .api_key + .as_deref() + .filter(|key| !key.trim().is_empty()) + .map(str::to_string) + .or_else(|| env_non_empty("NETWORK_PRIVATE_KEY")) + .ok_or(Error::NetworkPrivateKeyNotConfigured)?; + builder = builder.private_key(&private_key); + + if let Some(rpc_url) = cfg_or_envs_non_empty(&config.endpoint, &["NETWORK_RPC_URL"]) { builder = builder.rpc_url(&rpc_url); } - // Otherwise SP1 SDK will use its default RPC URL + Ok(builder.build()) } From 9c5bc381656ce7ab6c8938475cfb77f206713c12 Mon Sep 17 00:00:00 2001 From: Matteo Lisotto Date: Mon, 9 Feb 2026 13:39:47 +0100 Subject: [PATCH 09/11] chore: apply cargo fmt --- crates/zkvm/sp1/src/zkvm/sdk.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/crates/zkvm/sp1/src/zkvm/sdk.rs b/crates/zkvm/sp1/src/zkvm/sdk.rs index 2240f7ba..bd95e8cd 100644 --- a/crates/zkvm/sp1/src/zkvm/sdk.rs +++ b/crates/zkvm/sp1/src/zkvm/sdk.rs @@ -136,7 +136,9 @@ impl Prover { } fn env_non_empty(name: &str) -> Option { - std::env::var(name).ok().filter(|value| !value.trim().is_empty()) + std::env::var(name) + .ok() + .filter(|value| !value.trim().is_empty()) } fn cfg_or_envs_non_empty(config_value: &str, env_names: &[&str]) -> Option { @@ -148,10 +150,10 @@ fn cfg_or_envs_non_empty(config_value: &str, env_names: &[&str]) -> Option Result { - let endpoint = - cfg_or_envs_non_empty(&config.endpoint, &["CLUSTER_ENDPOINT"]).ok_or(Error::EndpointNotConfigured)?; - let redis_url = - cfg_or_envs_non_empty(&config.redis_url, &["CLUSTER_REDIS_URL"]).ok_or(Error::RedisNotConfigured)?; + let endpoint = cfg_or_envs_non_empty(&config.endpoint, &["CLUSTER_ENDPOINT"]) + .ok_or(Error::EndpointNotConfigured)?; + let redis_url = cfg_or_envs_non_empty(&config.redis_url, &["CLUSTER_REDIS_URL"]) + .ok_or(Error::RedisNotConfigured)?; SP1ClusterClient::new(&endpoint, &redis_url) } From c7a25033b8321460fc0073f6143829cd7ba0bda8 Mon Sep 17 00:00:00 2001 From: Matteo Lisotto Date: Mon, 9 Feb 2026 14:20:57 +0100 Subject: [PATCH 10/11] deps: pin `sp1-cluster` to a commit --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index acb5252e..9918005c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -134,7 +134,7 @@ risc0-zkvm-platform = { version = "2.2.1", default-features = false } # SP1 dependencies sp1-sdk = "5.2.4" sp1-zkvm = { version = "5.2.4", default-features = false } -sp1-cluster-common = { git = "https://github.com/succinctlabs/sp1-cluster", package = "sp1-cluster-common" } +sp1-cluster-common = { git = "https://github.com/succinctlabs/sp1-cluster", package = "sp1-cluster-common", rev = "9b433a1" } # Ziren dependencies zkm-sdk = { git = "https://github.com/ProjectZKM/Ziren.git", tag = "v1.2.3" } From e3dae30d3c287645dc3947333c58978dbf199cf8 Mon Sep 17 00:00:00 2001 From: Matteo Lisotto Date: Wed, 11 Feb 2026 11:07:27 +0100 Subject: [PATCH 11/11] fix: restore `Default` trait for `ClusterProverConfig` --- crates/zkvm-interface/src/zkvm/resource.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/zkvm-interface/src/zkvm/resource.rs b/crates/zkvm-interface/src/zkvm/resource.rs index 5811c00d..1d3c08fe 100644 --- a/crates/zkvm-interface/src/zkvm/resource.rs +++ b/crates/zkvm-interface/src/zkvm/resource.rs @@ -25,7 +25,7 @@ impl NetworkProverConfig { } /// Configuration for cluster-based proving (e.g., SP1 Cluster) -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] #[cfg_attr(feature = "clap", derive(clap::Args))] pub struct ClusterProverConfig {