From e5998402df397cebbf20a45d47bc57954cc77cb9 Mon Sep 17 00:00:00 2001 From: Yogesh Shahi Date: Wed, 13 May 2026 20:34:11 +0530 Subject: [PATCH 1/4] perf(prover): drop R1CS after sumcheck to free memory during WHIR rounds WhirR1CSScheme::prove takes the R1CS by value but only reads it twice: in run_zk_sumcheck_prover and in calculate_external_row_of_r1cs_matrices (immediately after). Without an explicit drop, the matrices stay resident through the entire WHIR commit + prove path that follows, even though nothing downstream touches them. After PR #438 (24618492) made the sumcheck/commit phase the dominant memory consumer, R1CS lifetime extension became visible at the global peak. Dropping it on the spot frees ~80 MB of sparse-matrix storage before the WHIR rounds allocate their working buffers. Measured on complete_age_check: - run peak memory: 880 MB -> 816 MB (-64 MB, -7.3%) - prove_with_toml / prove_with_witness peak: 880 MB -> 816 MB - new proof verifies (provekit-cli verify exits 0) Proof bytes differ run-to-run as expected (zk-sumcheck samples a fresh blinding mask each invocation); verifier acceptance is the safety signal. --- provekit/prover/src/whir_r1cs.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/provekit/prover/src/whir_r1cs.rs b/provekit/prover/src/whir_r1cs.rs index 8578cd2df..6cd06703b 100644 --- a/provekit/prover/src/whir_r1cs.rs +++ b/provekit/prover/src/whir_r1cs.rs @@ -152,6 +152,7 @@ impl WhirR1CSProver for WhirR1CSScheme { drop(full_witness); let alphas = calculate_external_row_of_r1cs_matrices(&alpha, &r1cs); + drop(r1cs); let (x, public_weight) = get_public_weights(public_inputs, &mut merlin, self.m); let blinding_offset = blinding.offset; From c1eaa5e5cd4efe9a07d59d7265c3481399c207e0 Mon Sep 17 00:00:00 2001 From: zkfriendly Date: Wed, 13 May 2026 17:22:51 +0200 Subject: [PATCH 2/4] perf(verifier): drop at,bt,ct --- provekit/verifier/src/whir_r1cs.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/provekit/verifier/src/whir_r1cs.rs b/provekit/verifier/src/whir_r1cs.rs index 744117b6c..ab650adad 100644 --- a/provekit/verifier/src/whir_r1cs.rs +++ b/provekit/verifier/src/whir_r1cs.rs @@ -98,6 +98,7 @@ impl WhirR1CSVerifier for WhirR1CSScheme { &data_from_sumcheck_verifier.alpha, r1cs, ); + drop((at, bt, ct)); let blinding_eval = data_from_sumcheck_verifier.blinding_eval; let blinding_weights = expand_powers::<4>(&data_from_sumcheck_verifier.alpha); From b0030d7915f3a91dc5cdc1ae3e92ac0875ffa8a3 Mon Sep 17 00:00:00 2001 From: zkfriendly Date: Wed, 13 May 2026 20:52:40 +0200 Subject: [PATCH 3/4] perf(prover): use parallel iterator in accumulate - if vector len is bigger than workload size in LinearForm accumulate parallel iterator is used which can be about 5% faster --- provekit/common/src/prefix_covector.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/provekit/common/src/prefix_covector.rs b/provekit/common/src/prefix_covector.rs index e0a813e6d..7272dc832 100644 --- a/provekit/common/src/prefix_covector.rs +++ b/provekit/common/src/prefix_covector.rs @@ -1,6 +1,7 @@ use { crate::FieldElement, ark_std::{One, Zero}, + rayon::prelude::*, whir::algebra::{dot, linear_form::LinearForm, multilinear_extend}, }; @@ -76,11 +77,18 @@ impl LinearForm for PrefixCovector { } fn accumulate(&self, accumulator: &mut [FieldElement], scalar: FieldElement) { - for (acc, val) in accumulator[..self.vector.len()] - .iter_mut() - .zip(&self.vector) - { - *acc += scalar * *val; + let accumulator = &mut accumulator[..self.vector.len()]; + if self.vector.len() > whir::utils::workload_size::() { + accumulator + .par_iter_mut() + .zip(self.vector.par_iter()) + .for_each(|(acc, val)| { + *acc += scalar * *val; + }); + } else { + for (acc, val) in accumulator.iter_mut().zip(&self.vector) { + *acc += scalar * *val; + } } } } From 2069fc4b2a10d96f5fc0b7b753857565776d54ba Mon Sep 17 00:00:00 2001 From: zkfriendly Date: Thu, 14 May 2026 07:44:27 +0200 Subject: [PATCH 4/4] refactor: consume instead of drop Updated the `multiply_transposed_by_eq_alpha` and `calculate_external_row_of_r1cs_matrices` functions to accept `SparseMatrix` and `R1CS` by value instead of by reference. This change improves peak memory --- provekit/common/src/utils/sumcheck.rs | 12 ++++++------ provekit/prover/src/whir_r1cs.rs | 3 +-- provekit/verifier/src/whir_r1cs.rs | 10 ++-------- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/provekit/common/src/utils/sumcheck.rs b/provekit/common/src/utils/sumcheck.rs index 17a7c4299..527594646 100644 --- a/provekit/common/src/utils/sumcheck.rs +++ b/provekit/common/src/utils/sumcheck.rs @@ -208,9 +208,9 @@ pub fn transpose_r1cs_matrices(r1cs: &R1CS) -> (SparseMatrix, SparseMatrix, Spar /// external row. #[instrument(skip_all)] pub fn multiply_transposed_by_eq_alpha( - at: &SparseMatrix, - bt: &SparseMatrix, - ct: &SparseMatrix, + at: SparseMatrix, + bt: SparseMatrix, + ct: SparseMatrix, alpha: &[FieldElement], r1cs: &R1CS, ) -> [Vec; 3] { @@ -237,8 +237,8 @@ pub fn multiply_transposed_by_eq_alpha( #[instrument(skip_all)] pub fn calculate_external_row_of_r1cs_matrices( alpha: &[FieldElement], - r1cs: &R1CS, + r1cs: R1CS, ) -> [Vec; 3] { - let (at, bt, ct) = transpose_r1cs_matrices(r1cs); - multiply_transposed_by_eq_alpha(&at, &bt, &ct, alpha, r1cs) + let (at, bt, ct) = transpose_r1cs_matrices(&r1cs); + multiply_transposed_by_eq_alpha(at, bt, ct, alpha, &r1cs) } diff --git a/provekit/prover/src/whir_r1cs.rs b/provekit/prover/src/whir_r1cs.rs index 6cd06703b..f35255efa 100644 --- a/provekit/prover/src/whir_r1cs.rs +++ b/provekit/prover/src/whir_r1cs.rs @@ -151,8 +151,7 @@ impl WhirR1CSProver for WhirR1CSScheme { ); drop(full_witness); - let alphas = calculate_external_row_of_r1cs_matrices(&alpha, &r1cs); - drop(r1cs); + let alphas = calculate_external_row_of_r1cs_matrices(&alpha, r1cs); let (x, public_weight) = get_public_weights(public_inputs, &mut merlin, self.m); let blinding_offset = blinding.offset; diff --git a/provekit/verifier/src/whir_r1cs.rs b/provekit/verifier/src/whir_r1cs.rs index ab650adad..28f613790 100644 --- a/provekit/verifier/src/whir_r1cs.rs +++ b/provekit/verifier/src/whir_r1cs.rs @@ -91,14 +91,8 @@ impl WhirR1CSVerifier for WhirR1CSScheme { ); let x: FieldElement = arthur.verifier_message(); - let alphas = multiply_transposed_by_eq_alpha( - &at, - &bt, - &ct, - &data_from_sumcheck_verifier.alpha, - r1cs, - ); - drop((at, bt, ct)); + let alphas = + multiply_transposed_by_eq_alpha(at, bt, ct, &data_from_sumcheck_verifier.alpha, r1cs); let blinding_eval = data_from_sumcheck_verifier.blinding_eval; let blinding_weights = expand_powers::<4>(&data_from_sumcheck_verifier.alpha);