diff --git a/flake.lock b/flake.lock
index bf2a97c87..6c0fe2e01 100644
--- a/flake.lock
+++ b/flake.lock
@@ -50,24 +50,6 @@
         "type": "github"
       }
     },
-    "flake-utils_2": {
-      "inputs": {
-        "systems": "systems_2"
-      },
-      "locked": {
-        "lastModified": 1705309234,
-        "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=",
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26",
-        "type": "github"
-      },
-      "original": {
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "type": "github"
-      }
-    },
     "gitignore": {
       "inputs": {
         "nixpkgs": [
@@ -91,11 +73,11 @@
     },
     "nixpkgs": {
       "locked": {
-        "lastModified": 1718318537,
-        "narHash": "sha256-4Zu0RYRcAY/VWuu6awwq4opuiD//ahpc2aFHg2CWqFY=",
+        "lastModified": 1723637854,
+        "narHash": "sha256-med8+5DSWa2UnOqtdICndjDAEjxr5D7zaIiK4pn0Q7c=",
         "owner": "nixos",
         "repo": "nixpkgs",
-        "rev": "e9ee548d90ff586a6471b4ae80ae9cfcbceb3420",
+        "rev": "c3aa7b8938b17aebd2deecf7be0636000d62a2b9",
         "type": "github"
       },
       "original": {
@@ -107,27 +89,27 @@
     },
     "nixpkgs-stable": {
       "locked": {
-        "lastModified": 1710695816,
-        "narHash": "sha256-3Eh7fhEID17pv9ZxrPwCLfqXnYP006RKzSs0JptsN84=",
+        "lastModified": 1720386169,
+        "narHash": "sha256-NGKVY4PjzwAa4upkGtAMz1npHGoRzWotlSnVlqI40mo=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "614b4613980a522ba49f0d194531beddbb7220d3",
+        "rev": "194846768975b7ad2c4988bdb82572c00222c0d7",
         "type": "github"
       },
       "original": {
         "owner": "NixOS",
-        "ref": "nixos-23.11",
+        "ref": "nixos-24.05",
         "repo": "nixpkgs",
         "type": "github"
       }
     },
     "nixpkgs_2": {
       "locked": {
-        "lastModified": 1706487304,
-        "narHash": "sha256-LE8lVX28MV2jWJsidW13D2qrHU/RUUONendL2Q/WlJg=",
+        "lastModified": 1718428119,
+        "narHash": "sha256-WdWDpNaq6u1IPtxtYHHWpl5BmabtpmLnMAx0RdJ/vo8=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "90f456026d284c22b3e3497be980b2e47d0b28ac",
+        "rev": "e6cea36f83499eb4e9cd184c8a8e823296b50ad5",
         "type": "github"
       },
       "original": {
@@ -147,11 +129,11 @@
         "nixpkgs-stable": "nixpkgs-stable"
       },
       "locked": {
-        "lastModified": 1717664902,
-        "narHash": "sha256-7XfBuLULizXjXfBYy/VV+SpYMHreNRHk9nKMsm1bgb4=",
+        "lastModified": 1723202784,
+        "narHash": "sha256-qbhjc/NEGaDbyy0ucycubq4N3//gDFFH3DOmp1D3u1Q=",
         "owner": "cachix",
         "repo": "pre-commit-hooks.nix",
-        "rev": "cc4d466cb1254af050ff7bdf47f6d404a7c646d1",
+        "rev": "c7012d0c18567c889b948781bc74a501e92275d1",
         "type": "github"
       },
       "original": {
@@ -171,15 +153,14 @@
     },
     "rust-overlay": {
       "inputs": {
-        "flake-utils": "flake-utils_2",
         "nixpkgs": "nixpkgs_2"
       },
       "locked": {
-        "lastModified": 1718417877,
-        "narHash": "sha256-s8QrTANEtY6UxzfkcBfoN93bgW9aCRIq54LPRVNu/4c=",
+        "lastModified": 1723688259,
+        "narHash": "sha256-WzeUR1MG9MnJnh9T7qcVe/v12qHvJvzdc3Z5HCeE2ns=",
         "owner": "oxalica",
         "repo": "rust-overlay",
-        "rev": "7c2d603cb67c974ef8c5cfee1150060dbb299e04",
+        "rev": "6e75319846684326d900daff1e2e11338cc80d2b",
         "type": "github"
       },
       "original": {
@@ -202,21 +183,6 @@
         "repo": "default",
         "type": "github"
       }
-    },
-    "systems_2": {
-      "locked": {
-        "lastModified": 1681028828,
-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
-        "owner": "nix-systems",
-        "repo": "default",
-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nix-systems",
-        "repo": "default",
-        "type": "github"
-      }
     }
   },
   "root": "root",
diff --git a/flake.nix b/flake.nix
index 342d921d3..3b13220b0 100644
--- a/flake.nix
+++ b/flake.nix
@@ -63,7 +63,7 @@
               clangStdenv
               llvm_15
               typos
-              grcov
+              # grcov # TODO uncomment this line after https://github.com/mozilla/grcov/issues/1187#issuecomment-2252214718
             ] ++ lib.optionals stdenv.isDarwin
               [ darwin.apple_sdk.frameworks.Security ];
 
diff --git a/plonk/src/circuit/plonk_verifier/gadgets.rs b/plonk/src/circuit/plonk_verifier/gadgets.rs
index 0426fb7eb..c50dde1b9 100644
--- a/plonk/src/circuit/plonk_verifier/gadgets.rs
+++ b/plonk/src/circuit/plonk_verifier/gadgets.rs
@@ -34,16 +34,20 @@ use jf_utils::{bytes_to_field_elements, field_switching};
 
 /// Aggregate polynomial commitments into a single commitment (in the
 /// ScalarsAndBases form). Useful in batch opening.
+///
 /// The verification key type is guaranteed to match the Plonk proof type.
+///
 /// The returned commitment is a generalization of `[F]1` described
 /// in Sec 8.3, step 10 of https://eprint.iacr.org/2019/953.pdf
-/// input
+///
+/// Input:
 /// - vks: verification key variable
 /// - challenges: challenge variable in FpElemVar form
 /// - poly_evals: zeta^n, zeta^n-1 and Lagrange evaluated at 1
 /// - batch_proof: batched proof inputs
 /// - non_native_field_info: aux information for non-native field
-/// Output
+///
+/// Output:
 /// - scalar and bases prepared for MSM
 /// - buffer info for u and v powers
 pub(super) fn aggregate_poly_commitments_circuit<E, F>(
diff --git a/plonk/src/proof_system/prover.rs b/plonk/src/proof_system/prover.rs
index db2f57dc7..34f3c42a4 100644
--- a/plonk/src/proof_system/prover.rs
+++ b/plonk/src/proof_system/prover.rs
@@ -68,8 +68,10 @@ impl<E: Pairing> Prover<E> {
     }
 
     /// Round 1:
+    ///
     /// 1. Compute and commit wire witness polynomials.
     /// 2. Compute public input polynomial.
+    ///
     /// Return the wire witness polynomials and their commitments,
     /// also return the public input polynomial.
     pub(crate) fn run_1st_round<C: Arithmetization<E::ScalarField>, R: CryptoRng + RngCore>(
diff --git a/plonk/src/proof_system/verifier.rs b/plonk/src/proof_system/verifier.rs
index 8da99ab40..7d1b3cbf1 100644
--- a/plonk/src/proof_system/verifier.rs
+++ b/plonk/src/proof_system/verifier.rs
@@ -190,11 +190,12 @@ where
 
     /// Batchly verify multiple (aggregated) PCS opening proofs.
     ///
-    /// We need to verify that
+    /// We need to verify that:
     /// - `e(Ai, [x]2) = e(Bi, [1]2) for i \in {0, .., m-1}`, where
     /// - `Ai = [open_proof_i] + u_i * [shifted_open_proof_i]` and
     /// - `Bi = eval_point_i * [open_proof_i] + u_i * next_eval_point_i *
     ///   [shifted_open_proof_i] + comm_i - eval_i * [1]1`.
+    ///
     /// By Schwartz-Zippel lemma, it's equivalent to check that for a random r:
     /// - `e(A0 + ... + r^{m-1} * Am, [x]2) = e(B0 + ... + r^{m-1} * Bm, [1]2)`.
     pub(crate) fn batch_verify_opening_proofs<T>(
diff --git a/relation/src/constraint_system.rs b/relation/src/constraint_system.rs
index 990606291..6d8d24fba 100644
--- a/relation/src/constraint_system.rs
+++ b/relation/src/constraint_system.rs
@@ -822,10 +822,10 @@ impl<F: FftField> PlonkCircuit<F> {
         self.eval_domain.size() != 1
     }
 
-    /// Re-arrange the order of the gates so that
+    /// Re-arrange the order of the gates so that:
     /// 1. io gates are in the front.
     /// 2. variable table lookup gate are at the rear so that they do not affect
-    /// the range gates when merging the lookup tables.
+    ///    the range gates when merging the lookup tables.
     ///
     /// Remember to pad gates before calling the method.
     fn rearrange_gates(&mut self) -> Result<(), CircuitError> {
diff --git a/relation/src/gadgets/ecc/emulated/short_weierstrass.rs b/relation/src/gadgets/ecc/emulated/short_weierstrass.rs
index 6dabf2c9f..06d801fff 100644
--- a/relation/src/gadgets/ecc/emulated/short_weierstrass.rs
+++ b/relation/src/gadgets/ecc/emulated/short_weierstrass.rs
@@ -153,28 +153,36 @@ impl<F: PrimeField> PlonkCircuit<F> {
     /// Constrain variable `p2` to be the point addition of `p0` and
     /// `p1` over an elliptic curve.
     /// Let p0 = (x0, y0, inf0), p1 = (x1, y1, inf1), p2 = (x2, y2, inf2)
-    /// The addition formula for affine points of sw curve is
-    ///   If either p0 or p1 is infinity, then p2 equals to another point.
+    /// The addition formula for affine points of sw curve is as follows:
+    ///
+    /// If either p0 or p1 is infinity, then p2 equals to another point.
     ///   1. if p0 == p1
-    ///     - if y0 == 0 then inf2 = 1
-    ///     - Calculate s = (3 * x0^2 + a) / (2 * y0)
-    ///     - x2 = s^2 - x0 - x1
-    ///     - y2 = s(x0 - x2) - y0
+    ///      - if y0 == 0 then inf2 = 1
+    ///      - Calculate s = (3 * x0^2 + a) / (2 * y0)
+    ///      - x2 = s^2 - x0 - x1
+    ///      - y2 = s(x0 - x2) - y0
     ///   2. Otherwise
-    ///     - if x0 == x1 then inf2 = 1
-    ///     - Calculate s = (y0 - y1) / (x0 - x1)
-    ///     - x2 = s^2 - x0 - x1
-    ///     - y2 = s(x0 - x2) - y0
+    ///      - if x0 == x1 then inf2 = 1
+    ///      - Calculate s = (y0 - y1) / (x0 - x1)
+    ///      - x2 = s^2 - x0 - x1
+    ///      - y2 = s(x0 - x2) - y0
+    ///
     /// The first case is equivalent to the following:
+    ///
     /// - inf0 == 1 || inf1 == 1 || x0 != x1 || y0 != y1 || y0 != 0 || inf2 == 0
     /// - (x0 + x1 + x2) * (y0 + y0)^2 == (3 * x0^2 + a)^2
     /// - (y2 + y0) * (y0 + y0) == (3 * x0^2 + a) (x0 - x2)
+    ///
     /// The second case is equivalent to the following:
+    ///
     /// - inf0 == 1 || inf1 == 1 || x0 != x1 || y0 == y1 || inf2 == 0
     /// - (x0 - x1)^2 (x0 + x1 + x2) == (y0 - y1)^2
     /// - (x0 - x2) (y0 - y1) == (y0 + y2) (x0 - x1)
+    ///
     /// First check in both cases can be combined into the following:
+    ///
     /// inf0 == 1 || inf1 == 1 || inf2 == 0 || x0 != x1 || (y0 == y1 && y0 != 0)
+    ///
     /// For the rest equality checks,
     ///   - Both LHS and RHS must be multiplied with an indicator variable
     ///     (!inf0 && !inf1). So that if either p0 or p1 is infinity, those
diff --git a/vid/.gitignore b/vid/.gitignore
new file mode 100644
index 000000000..c41cc9e35
--- /dev/null
+++ b/vid/.gitignore
@@ -0,0 +1 @@
+/target
\ No newline at end of file
diff --git a/vid/Cargo.toml b/vid/Cargo.toml
index a8e9fb54e..9731be489 100644
--- a/vid/Cargo.toml
+++ b/vid/Cargo.toml
@@ -46,6 +46,11 @@ name = "advz"
 harness = false
 required-features = ["test-srs"]
 
+[[bench]]
+name = "advz_multiplicity"
+harness = false
+required-features = ["test-srs"]
+
 [features]
 default = ["parallel"]
 std = [
diff --git a/vid/benches/advz.rs b/vid/benches/advz.rs
index a3a93a0ae..67bc9fec1 100644
--- a/vid/benches/advz.rs
+++ b/vid/benches/advz.rs
@@ -27,16 +27,15 @@ where
 {
     // play with these items
     //
-    // CODE_RATE is merely a convenient way to automatically choose polynomial
-    // degree as a function of storage node count.
-    // If desired, you could set polynomial degrees independent of storage node
-    // count.
-    const CODE_RATE: u32 = 4; // ratio of num_storage_nodes : polynomial_degree
+    // INVERSE_CODE_RATE is merely a convenient way to automatically choose
+    // polynomial degree as a function of storage node count. If desired, you
+    // could set polynomial degrees independent of storage node count.
+    const INVERSE_CODE_RATE: u32 = 4; // ratio of num_storage_nodes : polynomial_degree
     let storage_node_counts = [512, 1024];
     let payload_byte_lens = [1 * MB];
 
     // more items as a function of the above
-    let poly_degrees_iter = storage_node_counts.iter().map(|c| c / CODE_RATE);
+    let poly_degrees_iter = storage_node_counts.iter().map(|c| c / INVERSE_CODE_RATE);
     let supported_degree = poly_degrees_iter.clone().max().unwrap();
     let vid_sizes_iter = poly_degrees_iter.zip(storage_node_counts);
     let mut rng = jf_utils::test_rng();
diff --git a/vid/benches/advz_multiplicity.rs b/vid/benches/advz_multiplicity.rs
new file mode 100644
index 000000000..9af98ebae
--- /dev/null
+++ b/vid/benches/advz_multiplicity.rs
@@ -0,0 +1,105 @@
+// Copyright (c) 2024 Espresso Systems (espressosys.com)
+// This file is part of the Jellyfish library.
+
+// You should have received a copy of the MIT License
+// along with the Jellyfish library. If not, see <https://mit-license.org/>.
+
+//! Benchmarks demonstrating performance improvement in [`Advz::verify_share`]
+//! from use of parallelism over `multiplicity`.
+//!
+//! Run
+//! ```
+//! cargo bench --bench=advz_multiplicity --features="test-srs"
+//! ```
+//!
+//! By
+//! [default](https://github.com/rayon-rs/rayon/blob/main/FAQ.md#how-many-threads-will-rayon-spawn)
+//! the number of threads = number of available CPU cores. You can override this
+//! choice by prevising the above command with `RAYON_NUM_THREADS=N `. Example:
+//! set `N=1` to eliminate parallelism.
+
+use ark_bn254::Bn254;
+use ark_ec::pairing::Pairing;
+use ark_serialize::Write;
+use ark_std::rand::RngCore;
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use digest::{crypto_common::generic_array::ArrayLength, Digest, DynDigest, OutputSizeUser};
+use jf_pcs::{checked_fft_size, prelude::UnivariateKzgPCS, PolynomialCommitmentScheme};
+use jf_utils::field_byte_len;
+use jf_vid::{advz::Advz, VidScheme};
+use sha2::Sha256;
+
+const KB: usize = 1 << 10;
+// const MB: usize = KB << 10;
+
+fn advz<E, H>(c: &mut Criterion)
+where
+    E: Pairing,
+    // TODO(Gus) clean up nasty trait bounds upstream
+    H: Digest + DynDigest + Default + Clone + Write + Send + Sync,
+    <<H as OutputSizeUser>::OutputSize as ArrayLength<u8>>::ArrayType: Copy,
+{
+    // play with these items
+    //
+    // INVERSE_CODE_RATE is merely a convenient way to automatically choose
+    // recovery threshold as a function of storage node count. If desired, you
+    // could set recovery thresholds independent of storage node counts.
+    let multiplicities = [1, 256];
+    let num_storage_nodes = 128;
+    const INVERSE_CODE_RATE: usize = 4; // ratio of num_storage_nodes : recovery_threshold
+
+    // more items as a function of the above
+    let recovery_threshold = num_storage_nodes / INVERSE_CODE_RATE;
+    let max_multiplicity = multiplicities.iter().max().unwrap();
+    let max_degree = recovery_threshold * max_multiplicity;
+    let coeff_byte_len = field_byte_len::<E::ScalarField>();
+    let payload_byte_len = {
+        // ensure payload is large enough to fill at least 1 polynomial at
+        // maximum multiplicity.
+        max_degree * coeff_byte_len
+    };
+    let mut rng = jf_utils::test_rng();
+    let payload_bytes = {
+        // random payload data
+        let mut payload_bytes = vec![0u8; payload_byte_len];
+        rng.fill_bytes(&mut payload_bytes);
+        payload_bytes
+    };
+    let srs =
+        UnivariateKzgPCS::<E>::gen_srs_for_testing(&mut rng, checked_fft_size(max_degree).unwrap())
+            .unwrap();
+
+    let benchmark_group_name = format!(
+        "advz_verify_payload_{}KB_multiplicity",
+        payload_byte_len / KB
+    );
+    let mut grp = c.benchmark_group(benchmark_group_name);
+    for multiplicity in multiplicities {
+        let mut advz = Advz::<E, H>::with_multiplicity(
+            num_storage_nodes.try_into().unwrap(),
+            recovery_threshold.try_into().unwrap(),
+            multiplicity.try_into().unwrap(),
+            &srs,
+        )
+        .unwrap();
+        let disperse = advz.disperse(&payload_bytes).unwrap();
+        let (shares, common, commit) = (disperse.shares, disperse.common, disperse.commit);
+        grp.bench_function(BenchmarkId::from_parameter(multiplicity), |b| {
+            // verify only the 0th share
+            b.iter(|| {
+                advz.verify_share(&shares[0], &common, &commit)
+                    .unwrap()
+                    .unwrap()
+            });
+        });
+    }
+    grp.finish();
+}
+
+fn advz_main(c: &mut Criterion) {
+    advz::<Bn254, Sha256>(c);
+}
+
+criterion_group!(name = benches; config = Criterion::default().sample_size(10); targets = advz_main);
+
+criterion_main!(benches);
diff --git a/vid/src/advz.rs b/vid/src/advz.rs
index ec720e0fc..756654076 100644
--- a/vid/src/advz.rs
+++ b/vid/src/advz.rs
@@ -32,6 +32,7 @@ use bytes_to_field::{bytes_to_field, field_to_bytes};
 use core::mem;
 use derivative::Derivative;
 use digest::crypto_common::Output;
+use itertools::Itertools;
 use jf_merkle_tree::{
     hasher::{HasherDigest, HasherMerkleTree, HasherNode},
     MerkleCommitment, MerkleTreeScheme,
@@ -78,18 +79,13 @@ pub type AdvzGPU<'srs, E, H> = AdvzInternal<
 pub struct AdvzInternal<E, H, T>
 where
     E: Pairing,
+    T: Sync,
 {
     recovery_threshold: u32,
     num_storage_nodes: u32,
-    multiplicity: u32,
+    max_multiplicity: u32,
     ck: KzgProverParam<E>,
     vk: KzgVerifierParam<E>,
-    multi_open_domain: Radix2EvaluationDomain<KzgPoint<E>>,
-
-    // TODO might be able to eliminate this field and instead use
-    // `EvaluationDomain::reindex_by_subdomain()` on `multi_open_domain`
-    // but that method consumes `other` and its doc is unclear.
-    eval_domain: Radix2EvaluationDomain<KzgPoint<E>>,
 
     // tuple of
     // - reference to the SRS/ProverParam loaded to GPU
@@ -120,6 +116,7 @@ type KzgEvalsMerkleTreeProof<E, H> =
 impl<E, H, T> AdvzInternal<E, H, T>
 where
     E: Pairing,
+    T: Sync,
 {
     pub(crate) fn new_internal(
         num_storage_nodes: u32,  // n (code rate: r = k/n)
@@ -128,19 +125,22 @@ where
     ) -> VidResult<Self> {
         // TODO intelligent choice of multiplicity
         // https://github.com/EspressoSystems/jellyfish/issues/534
-        let multiplicity = 1;
+        let max_multiplicity = 1;
 
-        Self::with_multiplicity_internal(num_storage_nodes, recovery_threshold, multiplicity, srs)
+        Self::with_multiplicity_internal(
+            num_storage_nodes,
+            recovery_threshold,
+            max_multiplicity,
+            srs,
+        )
     }
 
     pub(crate) fn with_multiplicity_internal(
         num_storage_nodes: u32,  // n (code rate: r = k/n)
         recovery_threshold: u32, // k
-        multiplicity: u32,       // batch m chunks, keep the rate r = (m*k)/(m*n)
+        max_multiplicity: u32,   // batch m chunks, keep the rate r = (m*k)/(m*n)
         srs: impl Borrow<KzgSrs<E>>,
     ) -> VidResult<Self> {
-        // TODO support any degree, give multiple shares to nodes if needed
-        // https://github.com/EspressoSystems/jellyfish/issues/393
         if num_storage_nodes < recovery_threshold {
             return Err(VidError::Argument(format!(
                 "recovery_threshold {} exceeds num_storage_nodes {}",
@@ -148,49 +148,29 @@ where
             )));
         }
 
-        if !multiplicity.is_power_of_two() {
+        // TODO TEMPORARY: enforce power-of-2
+        // https://github.com/EspressoSystems/jellyfish/issues/668
+        if !recovery_threshold.is_power_of_two() {
             return Err(VidError::Argument(format!(
-                "multiplicity {multiplicity} should be a power of two"
+                "recovery_threshold {recovery_threshold} should be a power of two"
             )));
         }
-
-        // erasure code params
-        let chunk_size = multiplicity * recovery_threshold; // message length m
-        let code_word_size = multiplicity * num_storage_nodes; // code word length n
-        let poly_degree = chunk_size - 1;
-
-        let (ck, vk) = UnivariateKzgPCS::trim_fft_size(srs, poly_degree as usize).map_err(vid)?;
-        let multi_open_domain = UnivariateKzgPCS::<E>::multi_open_rou_eval_domain(
-            poly_degree as usize,
-            code_word_size as usize,
-        )
-        .map_err(vid)?;
-        let eval_domain = Radix2EvaluationDomain::new(chunk_size as usize).ok_or_else(|| {
-            VidError::Internal(anyhow::anyhow!(
-                "fail to construct domain of size {}",
-                chunk_size
-            ))
-        })?;
-
-        // TODO TEMPORARY: enforce power-of-2 chunk size
-        // Remove this restriction after we get KZG in eval form
-        // https://github.com/EspressoSystems/jellyfish/issues/339
-        if chunk_size as usize != eval_domain.size() {
+        if !max_multiplicity.is_power_of_two() {
             return Err(VidError::Argument(format!(
-                "recovery_threshold {} currently unsupported, round to {} instead",
-                chunk_size,
-                eval_domain.size()
+                "max_multiplicity {max_multiplicity} should be a power of two"
             )));
         }
 
+        let supported_degree =
+            usize::try_from(max_multiplicity * recovery_threshold - 1).map_err(vid)?;
+        let (ck, vk) = UnivariateKzgPCS::trim_fft_size(srs, supported_degree).map_err(vid)?;
+
         Ok(Self {
             recovery_threshold,
             num_storage_nodes,
-            multiplicity,
+            max_multiplicity,
             ck,
             vk,
-            multi_open_domain,
-            eval_domain,
             srs_on_gpu_and_cuda_stream: None,
             _pd: Default::default(),
         })
@@ -214,7 +194,7 @@ where
     /// # Errors
     /// Return [`VidError::Argument`] if
     /// - `num_storage_nodes < recovery_threshold`
-    /// - TEMPORARY `recovery_threshold` is not a power of two [github issue](https://github.com/EspressoSystems/jellyfish/issues/339)
+    /// - TEMPORARY `recovery_threshold` is not a power of two [github issue](https://github.com/EspressoSystems/jellyfish/issues/668)
     pub fn new(
         num_storage_nodes: u32,
         recovery_threshold: u32,
@@ -223,21 +203,28 @@ where
         Self::new_internal(num_storage_nodes, recovery_threshold, srs)
     }
 
-    /// Like [`Advz::new`] except with a `multiplicity` arg.
+    /// Like [`Advz::new`] except with a `max_multiplicity` arg.
     ///
-    /// `multiplicity` is an implementation-specific optimization arg.
-    /// Each storage node gets `multiplicity` evaluations per polynomial.
+    /// `max_multiplicity` is an implementation-specific optimization arg.
+    /// Each storage node gets up to `max_multiplicity` evaluations per
+    /// polynomial. The actual multiplicity used will be the smallest value m
+    /// such that payload can fit (payload_len <= m * recovery_threshold).
     ///
     /// # Errors
     /// In addition to [`Advz::new`], return [`VidError::Argument`] if
-    /// - TEMPORARY `multiplicity` is not a power of two [github issue](https://github.com/EspressoSystems/jellyfish/issues/339)
+    /// - TEMPORARY `max_multiplicity` is not a power of two [github issue](https://github.com/EspressoSystems/jellyfish/issues/668)
     pub fn with_multiplicity(
         num_storage_nodes: u32,
         recovery_threshold: u32,
-        multiplicity: u32,
+        max_multiplicity: u32,
         srs: impl Borrow<KzgSrs<E>>,
     ) -> VidResult<Self> {
-        Self::with_multiplicity_internal(num_storage_nodes, recovery_threshold, multiplicity, srs)
+        Self::with_multiplicity_internal(
+            num_storage_nodes,
+            recovery_threshold,
+            max_multiplicity,
+            srs,
+        )
     }
 }
 
@@ -261,13 +248,13 @@ where
     pub fn with_multiplicity(
         num_storage_nodes: u32,
         recovery_threshold: u32,
-        multiplicity: u32,
+        max_multiplicity: u32,
         srs: impl Borrow<KzgSrs<E>>,
     ) -> VidResult<Self> {
         let mut advz = Self::with_multiplicity_internal(
             num_storage_nodes,
             recovery_threshold,
-            multiplicity,
+            max_multiplicity,
             srs,
         )?;
         advz.init_gpu_srs()?;
@@ -280,7 +267,7 @@ where
             self.ck.powers_of_g.len() - 1,
         )
         .map_err(vid)?;
-        self.srs_on_gpu_and_cuda_stream = Some((srs_on_gpu, warmup_new_stream().unwrap()));
+        self.srs_on_gpu_and_cuda_stream = Some((srs_on_gpu, warmup_new_stream().map_err(vid)?));
         Ok(())
     }
 }
@@ -306,11 +293,11 @@ where
     evals: Vec<KzgEval<E>>,
 
     #[serde(with = "canonical")]
-    // aggretate_proofs.len() equals self.multiplicity
+    // aggregate_proofs.len() equals multiplicity
     // TODO further aggregate into a single KZG proof.
     aggregate_proofs: Vec<KzgProof<E>>,
 
-    evals_proof: KzgEvalsMerkleTreeProof<E, H>,
+    eval_proofs: Vec<KzgEvalsMerkleTreeProof<E, H>>,
 }
 
 /// The [`VidScheme::Common`] type for [`Advz`].
@@ -375,12 +362,10 @@ where
         &mut self,
         polys: &[DensePolynomial<E::ScalarField>],
     ) -> VidResult<Vec<KzgCommit<E>>> {
-        // let mut srs_on_gpu = self.srs_on_gpu_and_cuda_stream.as_mut().unwrap().0;
-        // let stream = &self.srs_on_gpu_and_cuda_stream.as_ref().unwrap().1;
         if polys.is_empty() {
             return Ok(vec![]);
         }
-        let (srs_on_gpu, stream) = self.srs_on_gpu_and_cuda_stream.as_mut().unwrap(); // safe by construction
+        let (srs_on_gpu, stream) = self.srs_on_gpu_and_cuda_stream.as_mut().map_err(vid)?; // safe by construction
         <UnivariateKzgPCS<E> as GPUCommittable<E>>::gpu_batch_commit_with_loaded_prover_param(
             srs_on_gpu, polys, stream,
         )
@@ -392,6 +377,7 @@ impl<E, H, T> VidScheme for AdvzInternal<E, H, T>
 where
     E: Pairing,
     H: HasherDigest,
+    T: Sync,
     AdvzInternal<E, H, T>: MaybeGPU<E>,
 {
     // use HasherNode<H> instead of Output<H> to easily meet trait bounds
@@ -405,9 +391,10 @@ where
         B: AsRef<[u8]>,
     {
         let payload = payload.as_ref();
-        let bytes_to_polys_time = start_timer!(|| "encode payload bytes into polynomials");
-        let polys = self.bytes_to_polys(payload);
-        end_timer!(bytes_to_polys_time);
+        let payload_byte_len = payload.len().try_into().map_err(vid)?;
+        let multiplicity = self.min_multiplicity(payload_byte_len)?;
+        let chunk_size = multiplicity * self.recovery_threshold;
+        let polys = self.bytes_to_polys(payload)?;
 
         let poly_commits_time = start_timer!(|| "batch poly commit");
         let poly_commits = <Self as MaybeGPU<E>>::kzg_batch_commit(self, &polys)?;
@@ -421,82 +408,10 @@ where
         B: AsRef<[u8]>,
     {
         let payload = payload.as_ref();
-        let payload_byte_len = payload.len().try_into().map_err(vid)?;
-        let disperse_time = start_timer!(|| format!(
-            "VID disperse {} payload bytes to {} nodes",
-            payload_byte_len, self.num_storage_nodes
-        ));
-        let _chunk_size = self.multiplicity * self.recovery_threshold;
-        let code_word_size = self.multiplicity * self.num_storage_nodes;
-
-        // partition payload into polynomial coefficients
-        let bytes_to_polys_time = start_timer!(|| "encode payload bytes into polynomials");
-        let polys = self.bytes_to_polys(payload);
-        end_timer!(bytes_to_polys_time);
-
-        // evaluate polynomials
-        let all_storage_node_evals_timer = start_timer!(|| format!(
-            "compute all storage node evals for {} polynomials with {} coefficients",
-            polys.len(),
-            _chunk_size
-        ));
-        let all_storage_node_evals = self.evaluate_polys(&polys)?;
-        end_timer!(all_storage_node_evals_timer);
-
-        // vector commitment to polynomial evaluations
-        let all_evals_commit_timer =
-            start_timer!(|| "compute merkle root of all storage node evals");
-        let all_evals_commit =
-            KzgEvalsMerkleTree::<E, H>::from_elems(None, &all_storage_node_evals).map_err(vid)?;
-        end_timer!(all_evals_commit_timer);
-
-        let common_timer = start_timer!(|| format!("compute {} KZG commitments", polys.len()));
-        let common = Common {
-            poly_commits: <Self as MaybeGPU<E>>::kzg_batch_commit(self, &polys)?,
-            all_evals_digest: all_evals_commit.commitment().digest(),
-            payload_byte_len,
-            num_storage_nodes: self.num_storage_nodes,
-            multiplicity: self.multiplicity,
-        };
-        end_timer!(common_timer);
-
-        let commit = Self::derive_commit(
-            &common.poly_commits,
-            payload_byte_len,
-            self.num_storage_nodes,
-        )?;
-        let pseudorandom_scalar = Self::pseudorandom_scalar(&common, &commit)?;
-
-        // Compute aggregate polynomial as a pseudorandom linear combo of polynomial via
-        // evaluation of the polynomial whose coefficients are polynomials and whose
-        // input point is the pseudorandom scalar.
-        let aggregate_poly =
-            polynomial_eval(polys.iter().map(PolynomialMultiplier), pseudorandom_scalar);
-
-        let agg_proofs_timer = start_timer!(|| format!(
-            "compute aggregate proofs for {} storage nodes",
-            self.num_storage_nodes
-        ));
-        let aggregate_proofs = UnivariateKzgPCS::multi_open_rou_proofs(
-            &self.ck,
-            &aggregate_poly,
-            code_word_size as usize,
-            &self.multi_open_domain,
-        )
-        .map_err(vid)?;
-        end_timer!(agg_proofs_timer);
-
-        let assemblage_timer = start_timer!(|| "assemble shares for dispersal");
-        let shares =
-            self.assemble_shares(all_storage_node_evals, aggregate_proofs, all_evals_commit)?;
-        end_timer!(assemblage_timer);
-        end_timer!(disperse_time);
+        let polys = self.bytes_to_polys(payload)?;
+        let poly_commits = <Self as MaybeGPU<E>>::kzg_batch_commit(self, &polys)?;
 
-        Ok(VidDisperse {
-            shares,
-            common,
-            commit,
-        })
+        self.disperse_with_polys_and_commits(payload, polys, poly_commits)
     }
 
     fn verify_share(
@@ -506,43 +421,54 @@ where
         commit: &Self::Commit,
     ) -> VidResult<Result<(), ()>> {
         // check arguments
-        let multiplicity: usize = common.multiplicity.try_into().map_err(vid)?;
-        if share.evals.len() / multiplicity != common.poly_commits.len() {
+        if common.num_storage_nodes != self.num_storage_nodes {
+            return Err(VidError::Argument(format!(
+                "common num_storage_nodes {} differs from self {}",
+                common.num_storage_nodes, self.num_storage_nodes
+            )));
+        }
+        let multiplicity =
+            self.min_multiplicity(common.payload_byte_len.try_into().map_err(vid)?)?;
+        if common.multiplicity != multiplicity {
             return Err(VidError::Argument(format!(
-                "(share eval, common poly commit) lengths differ ({},{})",
-                share.evals.len() / multiplicity,
+                "common multiplicity {} differs from derived min {}",
+                common.multiplicity, multiplicity
+            )));
+        }
+        if share.evals.len() / multiplicity as usize != common.poly_commits.len() {
+            return Err(VidError::Argument(format!(
+                "number of share evals / multiplicity {}/{} differs from number of common polynomial commitments {}",
+                share.evals.len(), multiplicity,
                 common.poly_commits.len()
             )));
         }
-
-        if common.num_storage_nodes != self.num_storage_nodes {
+        if share.eval_proofs.len() != multiplicity as usize {
             return Err(VidError::Argument(format!(
-                "common num_storage_nodes differs from self ({},{})",
-                common.num_storage_nodes, self.num_storage_nodes
+                "number of eval_proofs {} differs from common multiplicity {}",
+                share.eval_proofs.len(),
+                multiplicity,
             )));
         }
 
-        let polys_len = common.poly_commits.len();
+        Self::is_consistent(commit, common)?;
 
         if share.index >= self.num_storage_nodes {
             return Ok(Err(())); // not an arg error
         }
 
-        Self::is_consistent(commit, common)?;
-
-        // verify eval proof
-        // TODO: check all indices that represents the shares
-        if KzgEvalsMerkleTree::<E, H>::verify(
-            common.all_evals_digest,
-            &KzgEvalsMerkleTreeIndex::<E, H>::from(share.index as u64),
-            &share.evals_proof,
-        )
-        .map_err(vid)?
-        .is_err()
-        {
-            return Ok(Err(()));
+        // verify eval proofs
+        for i in 0..multiplicity {
+            if KzgEvalsMerkleTree::<E, H>::verify(
+                common.all_evals_digest,
+                &KzgEvalsMerkleTreeIndex::<E, H>::from((share.index * multiplicity) + i),
+                &share.eval_proofs[i as usize],
+            )
+            .map_err(vid)?
+            .is_err()
+            {
+                return Ok(Err(()));
+            }
         }
-
         let pseudorandom_scalar = Self::pseudorandom_scalar(common, commit)?;
 
         // Compute aggregate polynomial [commitment|evaluation]
@@ -562,31 +488,57 @@ where
         );
 
         // verify aggregate proof
-        (0..self.multiplicity as usize)
-            .map(|i| {
-                let aggregate_eval = polynomial_eval(
-                    share.evals[i * polys_len..(i + 1) * polys_len]
-                        .iter()
-                        .map(FieldMultiplier),
-                    pseudorandom_scalar,
-                );
-                Ok(UnivariateKzgPCS::verify(
-                    &self.vk,
-                    &aggregate_poly_commit,
-                    &self
-                        .multi_open_domain
-                        .element((share.index as usize * multiplicity) + i),
-                    &aggregate_eval,
-                    &share.aggregate_proofs[i],
-                )
-                .map_err(vid)?
-                .then_some(())
-                .ok_or(()))
-            })
-            .collect()
+        //
+        // some boilerplate needed to accommodate builds without `parallel`
+        // feature.
+        let multiplicities = Vec::from_iter((0..multiplicity as usize));
+        let polys_len = common.poly_commits.len();
+        let multi_open_domain = self.multi_open_domain(multiplicity)?;
+        let verification_iter = parallelizable_slice_iter(&multiplicities).map(|i| {
+            let range = i * polys_len..(i + 1) * polys_len;
+            let aggregate_eval = polynomial_eval(
+                share
+                    .evals
+                    .get(range.clone())
+                    .ok_or_else(|| {
+                        VidError::Internal(anyhow::anyhow!(
+                            "share evals range {:?} out of bounds for length {}",
+                            range,
+                            share.evals.len()
+                        ))
+                    })?
+                    .iter()
+                    .map(FieldMultiplier),
+                pseudorandom_scalar,
+            );
+            Ok(UnivariateKzgPCS::verify(
+                &self.vk,
+                &aggregate_poly_commit,
+                &multi_open_domain.element((share.index * multiplicity) as usize + i),
+                &aggregate_eval,
+                &share.aggregate_proofs[*i],
+            )
+            .map_err(vid)?
+            .then_some(())
+            .ok_or(()))
+        });
+        let abort = |result: &VidResult<Result<(), ()>>| match result {
+            Ok(success) => success.is_err(),
+            Err(_) => true,
+        };
+
+        // abort immediately on any failure of verification
+        #[cfg(feature = "parallel")]
+        let result = verification_iter.find_any(abort);
+
+        #[cfg(not(feature = "parallel"))]
+        let result = verification_iter.clone().find(abort); // `clone` because we need mutable
+
+        result.unwrap_or(Ok(Ok(())))
     }
 
     fn recover_payload(&self, shares: &[Self::Share], common: &Self::Common) -> VidResult<Vec<u8>> {
+        // check args
         if shares.len() < self.recovery_threshold as usize {
             return Err(VidError::Argument(format!(
                 "not enough shares {}, expected at least {}",
@@ -601,7 +553,7 @@ where
             )));
         }
 
-        // all shares must have equal evals len
+        // check args: all shares must have equal evals len
         let num_evals = shares
             .first()
             .ok_or_else(|| VidError::Argument("shares is empty".into()))?
@@ -620,26 +572,29 @@ where
                 share.evals.len()
             )));
         }
-        if num_evals != self.multiplicity as usize * common.poly_commits.len() {
+        if num_evals != common.multiplicity as usize * common.poly_commits.len() {
             return Err(VidError::Argument(format!(
                 "num_evals should be (multiplicity * poly_commits): {} but is instead: {}",
-                self.multiplicity as usize * common.poly_commits.len(),
+                common.multiplicity as usize * common.poly_commits.len(),
                 num_evals,
             )));
         }
-        let chunk_size = self.multiplicity * self.recovery_threshold;
-        let num_polys = num_evals / self.multiplicity as usize;
 
-        let elems_capacity = num_polys * chunk_size as usize;
-        let mut elems = Vec::with_capacity(elems_capacity);
+        // convenience quantities
+        let chunk_size =
+            usize::try_from(common.multiplicity * self.recovery_threshold).map_err(vid)?;
+        let num_polys = common.poly_commits.len();
+        let elems_capacity = num_polys * chunk_size;
+        let fft_domain = Self::eval_domain(chunk_size)?;
 
+        let mut elems = Vec::with_capacity(elems_capacity);
         let mut evals = Vec::with_capacity(num_evals);
         for p in 0..num_polys {
             for share in shares {
                 // extract all evaluations for polynomial p from the share
-                for m in 0..self.multiplicity as usize {
+                for m in 0..common.multiplicity as usize {
                     evals.push((
-                        (share.index * self.multiplicity) as usize + m,
+                        (share.index * common.multiplicity) as usize + m,
                         share.evals[(m * num_polys) + p],
                     ))
                 }
@@ -647,14 +602,14 @@ where
             let mut coeffs = reed_solomon_erasure_decode_rou(
                 mem::take(&mut evals),
                 chunk_size as usize,
-                &self.multi_open_domain,
+                &self.multi_open_domain(common.multiplicity)?,
             )
             .map_err(vid)?;
 
             // TODO TEMPORARY: use FFT to encode polynomials in eval form
             // Remove these FFTs after we get KZG in eval form
             // https://github.com/EspressoSystems/jellyfish/issues/339
-            self.eval_domain.fft_in_place(&mut coeffs);
+            fft_domain.fft_in_place(&mut coeffs);
 
             elems.append(&mut coeffs);
         }
@@ -697,50 +652,145 @@ impl<E, H, SrsRef> AdvzInternal<E, H, SrsRef>
 where
     E: Pairing,
     H: HasherDigest,
+    SrsRef: Sync,
     AdvzInternal<E, H, SrsRef>: MaybeGPU<E>,
 {
-    fn evaluate_polys(
+    fn disperse_with_polys_and_commits(
         &self,
-        polys: &[DensePolynomial<<E as Pairing>::ScalarField>],
-    ) -> Result<Vec<Vec<<E as Pairing>::ScalarField>>, VidError>
-    where
-        E: Pairing,
-        H: HasherDigest,
-    {
-        let code_word_size = (self.num_storage_nodes * self.multiplicity) as usize;
-        let mut all_storage_node_evals = vec![Vec::with_capacity(polys.len()); code_word_size];
-        // this is to avoid `SrsRef` not implementing `Sync` problem,
-        // instead of sending entire `self` cross thread, we only send a ref which is
-        // Sync
-        let multi_open_domain_ref = &self.multi_open_domain;
-
-        let all_poly_evals = parallelizable_slice_iter(polys)
-            .map(|poly| {
-                UnivariateKzgPCS::<E>::multi_open_rou_evals(
-                    poly,
-                    code_word_size,
-                    multi_open_domain_ref,
-                )
-                .map_err(vid)
-            })
-            .collect::<Result<Vec<_>, VidError>>()?;
+        payload: &[u8],
+        polys: Vec<DensePolynomial<<E as Pairing>::ScalarField>>,
+        poly_commits: Vec<KzgCommit<E>>,
+    ) -> VidResult<VidDisperse<Self>> {
+        let payload_byte_len = payload.len().try_into().map_err(vid)?;
+        let disperse_time = start_timer!(|| format!(
+            "VID disperse {} payload bytes to {} nodes",
+            payload_byte_len, self.num_storage_nodes
+        ));
+        let multiplicity = self.min_multiplicity(payload.len())?;
+        let code_word_size = usize::try_from(multiplicity * self.num_storage_nodes).map_err(vid)?;
+        let multi_open_domain = self.multi_open_domain(multiplicity)?;
 
-        for poly_evals in all_poly_evals {
-            for (storage_node_evals, poly_eval) in all_storage_node_evals
-                .iter_mut()
-                .zip(poly_evals.into_iter())
-            {
-                storage_node_evals.push(poly_eval);
+        // evaluate polynomials
+        let all_storage_node_evals_timer = start_timer!(|| format!(
+            "compute all storage node evals for {} polynomials with {} coefficients",
+            polys.len(),
+            multiplicity * self.recovery_threshold
+        ));
+        let all_storage_node_evals = {
+            let mut all_storage_node_evals = vec![Vec::with_capacity(polys.len()); code_word_size];
+            let all_poly_evals = parallelizable_slice_iter(&polys)
+                .map(|poly| {
+                    UnivariateKzgPCS::<E>::multi_open_rou_evals(
+                        poly,
+                        code_word_size,
+                        &multi_open_domain,
+                    )
+                    .map_err(vid)
+                })
+                .collect::<Result<Vec<_>, VidError>>()?;
+
+            for poly_evals in all_poly_evals {
+                for (storage_node_evals, poly_eval) in all_storage_node_evals
+                    .iter_mut()
+                    .zip(poly_evals.into_iter())
+                {
+                    storage_node_evals.push(poly_eval);
+                }
             }
-        }
 
-        // sanity checks
-        assert_eq!(all_storage_node_evals.len(), code_word_size);
-        for storage_node_evals in all_storage_node_evals.iter() {
-            assert_eq!(storage_node_evals.len(), polys.len());
-        }
+            // sanity checks
+            assert_eq!(all_storage_node_evals.len(), code_word_size);
+            for storage_node_evals in all_storage_node_evals.iter() {
+                assert_eq!(storage_node_evals.len(), polys.len());
+            }
 
-        Ok(all_storage_node_evals)
+            all_storage_node_evals
+        };
+        end_timer!(all_storage_node_evals_timer);
+
+        // vector commitment to polynomial evaluations
+        let all_evals_commit_timer =
+            start_timer!(|| "compute merkle root of all storage node evals");
+        let all_evals_commit =
+            KzgEvalsMerkleTree::<E, H>::from_elems(None, &all_storage_node_evals).map_err(vid)?;
+        end_timer!(all_evals_commit_timer);
+
+        let common = Common {
+            poly_commits,
+            all_evals_digest: all_evals_commit.commitment().digest(),
+            payload_byte_len,
+            num_storage_nodes: self.num_storage_nodes,
+            multiplicity,
+        };
+
+        let commit = Self::derive_commit(
+            &common.poly_commits,
+            payload_byte_len,
+            self.num_storage_nodes,
+        )?;
+        let pseudorandom_scalar = Self::pseudorandom_scalar(&common, &commit)?;
+
+        // Compute aggregate polynomial as a pseudorandom linear combo of polynomial via
+        // evaluation of the polynomial whose coefficients are polynomials and whose
+        // input point is the pseudorandom scalar.
+        let aggregate_poly =
+            polynomial_eval(polys.iter().map(PolynomialMultiplier), pseudorandom_scalar);
+
+        let agg_proofs_timer = start_timer!(|| format!(
+            "compute aggregate proofs for {} storage nodes",
+            self.num_storage_nodes
+        ));
+        let aggregate_proofs = UnivariateKzgPCS::multi_open_rou_proofs(
+            &self.ck,
+            &aggregate_poly,
+            code_word_size as usize,
+            &multi_open_domain,
+        )
+        .map_err(vid)?;
+        end_timer!(agg_proofs_timer);
+
+        let assemblage_timer = start_timer!(|| "assemble shares for dispersal");
+        let shares: Vec<_> = {
+            // compute share data
+            let share_data = all_storage_node_evals
+                .into_iter()
+                .zip(aggregate_proofs)
+                .enumerate()
+                .map(|(i, (eval, proof))| {
+                    let eval_proof = all_evals_commit
+                        .lookup(KzgEvalsMerkleTreeIndex::<E, H>::from(i as u64))
+                        .expect_ok()
+                        .map_err(vid)?
+                        .1;
+                    Ok((eval, proof, eval_proof))
+                })
+                .collect::<Result<Vec<_>, VidError>>()?;
+
+            // split share data into chunks of size multiplicity
+            share_data
+                .into_iter()
+                .chunks(multiplicity as usize)
+                .into_iter()
+                .enumerate()
+                .map(|(index, chunk)| {
+                    let (evals, proofs, eval_proofs): (Vec<_>, _, _) = chunk.multiunzip();
+                    Share {
+                        index: index as u32,
+                        evals: evals.into_iter().flatten().collect::<Vec<_>>(),
+                        aggregate_proofs: proofs,
+                        eval_proofs,
+                    }
+                })
+                .collect()
+        };
+        end_timer!(assemblage_timer);
+        end_timer!(disperse_time);
+
+        Ok(VidDisperse {
+            shares,
+            common,
+            commit,
+        })
     }
 
     fn pseudorandom_scalar(
@@ -770,33 +820,46 @@ where
         Ok(PrimeField::from_le_bytes_mod_order(&hasher.finalize()))
     }
 
-    fn bytes_to_polys(&self, payload: &[u8]) -> Vec<DensePolynomial<<E as Pairing>::ScalarField>>
+    /// Partition payload into polynomial coefficients
+    fn bytes_to_polys(
+        &self,
+        payload: &[u8],
+    ) -> VidResult<Vec<DensePolynomial<<E as Pairing>::ScalarField>>>
     where
         E: Pairing,
     {
-        let chunk_size = (self.recovery_threshold * self.multiplicity) as usize;
         let elem_bytes_len = bytes_to_field::elem_byte_capacity::<<E as Pairing>::ScalarField>();
-        let eval_domain_ref = &self.eval_domain;
+        let domain_size =
+            usize::try_from(self.min_multiplicity(payload.len())? * self.recovery_threshold)
+                .map_err(vid)?;
 
-        parallelizable_chunks(payload, chunk_size * elem_bytes_len)
+        let bytes_to_polys_time = start_timer!(|| "encode payload bytes into polynomials");
+        let result = parallelizable_chunks(payload, domain_size * elem_bytes_len)
             .map(|chunk| {
-                Self::polynomial_internal(
-                    eval_domain_ref,
-                    chunk_size,
-                    bytes_to_field::<_, KzgEval<E>>(chunk),
-                )
+                Self::interpolate_polynomial(bytes_to_field::<_, KzgEval<E>>(chunk), domain_size)
             })
-            .collect()
+            .collect::<VidResult<Vec<_>>>();
+        end_timer!(bytes_to_polys_time);
+        result
     }
 
-    // This is an associated function, not a method, doesn't take in `self`, thus
-    // more friendly to cross-thread `Sync`, especially when on of the generic
-    // param of `Self` didn't implement `Sync`
-    fn polynomial_internal<I>(
-        domain_ref: &Radix2EvaluationDomain<KzgPoint<E>>,
-        chunk_size: usize,
-        coeffs: I,
-    ) -> KzgPolynomial<E>
+    /// Consume `evals` and return a polynomial that interpolates `evals` on a
+    /// evaluation domain of size `domain_size`.
+    ///
+    /// Return an error if the length of `evals` exceeds `domain_size`.
+    ///
+    /// The degree-plus-1 of the returned polynomial is always a power of two
+    /// because:
+    ///
+    /// - We use FFT to interpolate, so `domain_size` is rounded up to the next
+    ///   power of two.
+    /// - [`KzgPolynomial`] implementation is stored in coefficient form.
+    ///
+    /// See https://github.com/EspressoSystems/jellyfish/issues/339
+    ///
+    /// Why is this method an associated function of `Self`? Because we want to
+    /// use a generic parameter of `Self`.
+    fn interpolate_polynomial<I>(evals: I, domain_size: usize) -> VidResult<KzgPolynomial<E>>
     where
         I: Iterator,
         I::Item: Borrow<KzgEval<E>>,
@@ -804,31 +867,60 @@ where
         // TODO TEMPORARY: use FFT to encode polynomials in eval form
         // Remove these FFTs after we get KZG in eval form
         // https://github.com/EspressoSystems/jellyfish/issues/339
-        let mut coeffs_vec: Vec<_> = coeffs.map(|c| *c.borrow()).collect();
-        let pre_fft_len = coeffs_vec.len();
-        EvaluationDomain::ifft_in_place(domain_ref, &mut coeffs_vec);
-
-        // sanity check: the fft did not resize coeffs.
-        // If pre_fft_len != self.recovery_threshold * self.multiplicity
-        // then we were not given the correct number of coeffs. In that case
-        // coeffs.len() could be anything, so there's nothing to sanity check.
-        if pre_fft_len == chunk_size {
-            assert_eq!(coeffs_vec.len(), pre_fft_len);
+        let mut evals_vec: Vec<_> = evals.map(|c| *c.borrow()).collect();
+        let pre_fft_len = evals_vec.len();
+        if pre_fft_len > domain_size {
+            return Err(VidError::Internal(anyhow::anyhow!(
+                "number of evals {} exceeds domain_size {}",
+                pre_fft_len,
+                domain_size
+            )));
         }
+        let domain = Self::eval_domain(domain_size)?;
+
+        domain.ifft_in_place(&mut evals_vec);
 
-        DenseUVPolynomial::from_coefficients_vec(coeffs_vec)
+        // sanity: the fft did not resize evals. If pre_fft_len < domain_size
+        // then we were given too few evals, in which case there's nothing to
+        // sanity check.
+        if pre_fft_len == domain_size && pre_fft_len != evals_vec.len() {
+            return Err(VidError::Internal(anyhow::anyhow!(
+                "unexpected output resize from {pre_fft_len} to {}",
+                evals_vec.len()
+            )));
+        }
+
+        Ok(DenseUVPolynomial::from_coefficients_vec(evals_vec))
     }
 
-    fn polynomial<I>(&self, coeffs: I) -> KzgPolynomial<E>
-    where
-        I: Iterator,
-        I::Item: Borrow<KzgEval<E>>,
-    {
-        Self::polynomial_internal(
-            &self.eval_domain,
-            (self.recovery_threshold * self.multiplicity) as usize,
-            coeffs,
-        )
+    fn min_multiplicity(&self, payload_byte_len: usize) -> VidResult<u32> {
+        let elem_bytes_len = bytes_to_field::elem_byte_capacity::<<E as Pairing>::ScalarField>();
+        let elems: u32 = payload_byte_len
+            .div_ceil(elem_bytes_len)
+            .try_into()
+            .map_err(vid)?;
+        if self.recovery_threshold * self.max_multiplicity < elems {
+            // payload is large. no change in multiplicity needed.
+            return Ok(self.max_multiplicity);
+        }
+
+        // payload is small: choose the smallest `m` such that `0 < m <
+        // multiplicity` and the entire payload fits into `m *
+        // recovery_threshold` elements.
+        let m = elems.div_ceil(self.recovery_threshold.max(1)).max(1);
+
+        // TODO TEMPORARY: enforce power-of-2
+        // https://github.com/EspressoSystems/jellyfish/issues/668
+        //
+        // Round up to the nearest power of 2.
+        //
+        // After the above issue is fixed: delete the following code and return
+        // `m` from above.
+        if m <= 1 {
+            Ok(1)
+        } else {
+            Ok(1 << ((m - 1).ilog2() + 1))
+        }
     }
 
     /// Derive a commitment from whatever data is needed.
@@ -864,46 +956,24 @@ where
         Ok(hasher.finalize().into())
     }
 
-    /// Assemble shares from evaluations and proofs.
-    ///
-    /// Each share contains (for multiplicity m):
-    /// 1. (m * num_poly) evaluations.
-    /// 2. a collection of m KZG proofs. TODO KZG aggregation https://github.com/EspressoSystems/jellyfish/issues/356
-    /// 3. a merkle tree membership proof.
-    fn assemble_shares(
+    fn multi_open_domain(
         &self,
-        all_storage_node_evals: Vec<Vec<<E as Pairing>::ScalarField>>,
-        aggregate_proofs: Vec<UnivariateKzgProof<E>>,
-        all_evals_commit: KzgEvalsMerkleTree<E, H>,
-    ) -> Result<Vec<Share<E, H>>, VidError>
-    where
-        E: Pairing,
-        H: HasherDigest,
-    {
-        let code_word_size = (self.num_storage_nodes * self.multiplicity) as usize;
-        let num_of_polys = all_storage_node_evals[0].len();
-        let mut shares = Vec::with_capacity(self.num_storage_nodes as usize);
-        let mut evals = Vec::with_capacity(num_of_polys * self.multiplicity as usize);
-        let mut proofs = Vec::with_capacity(self.multiplicity as usize);
-        let mut index = 0;
-        for i in 0..code_word_size {
-            evals.extend(all_storage_node_evals[i].iter());
-            proofs.push(aggregate_proofs[i].clone());
-            if (i + 1) % self.multiplicity as usize == 0 {
-                shares.push(Share {
-                    index,
-                    evals: mem::take(&mut evals),
-                    aggregate_proofs: mem::take(&mut proofs),
-                    evals_proof: all_evals_commit // TODO: check MT lookup for each index
-                        .lookup(KzgEvalsMerkleTreeIndex::<E, H>::from(index as u64))
-                        .expect_ok()
-                        .map_err(vid)?
-                        .1,
-                });
-                index += 1;
-            }
-        }
-        Ok(shares)
+        multiplicity: u32,
+    ) -> VidResult<Radix2EvaluationDomain<<E as Pairing>::ScalarField>> {
+        let chunk_size = usize::try_from(multiplicity * self.recovery_threshold).map_err(vid)?;
+        let code_word_size = usize::try_from(multiplicity * self.num_storage_nodes).map_err(vid)?;
+        UnivariateKzgPCS::<E>::multi_open_rou_eval_domain(chunk_size - 1, code_word_size)
+            .map_err(vid)
+    }
+
+    fn eval_domain(
+        domain_size: usize,
+    ) -> VidResult<Radix2EvaluationDomain<<E as Pairing>::ScalarField>> {
+        Radix2EvaluationDomain::<KzgPoint<E>>::new(domain_size).ok_or_else(|| {
+            VidError::Internal(anyhow::anyhow!(
+                "fail to construct domain of size {domain_size}"
+            ))
+        })
     }
 }
 
@@ -968,286 +1038,4 @@ where
 }
 
 #[cfg(test)]
-mod tests {
-    use super::{VidError::Argument, *};
-    use ark_bls12_381::Bls12_381;
-    use ark_bn254::Bn254;
-    use ark_std::{
-        rand::{CryptoRng, RngCore},
-        vec,
-    };
-    use jf_pcs::{checked_fft_size, prelude::UnivariateUniversalParams};
-    use sha2::Sha256;
-
-    #[ignore]
-    #[test]
-    fn disperse_timer() {
-        // run with 'print-trace' feature to see timer output
-        let (recovery_threshold, num_storage_nodes) = (256, 512);
-        let mut rng = jf_utils::test_rng();
-        let srs = init_srs(recovery_threshold as usize, &mut rng);
-        #[cfg(feature = "gpu-vid")]
-        let mut advz_gpu =
-            AdvzGPU::<'_, Bn254, Sha256>::new(num_storage_nodes, recovery_threshold, &srs).unwrap();
-        let mut advz =
-            Advz::<Bn254, Sha256>::new(num_storage_nodes, recovery_threshold, srs).unwrap();
-
-        let payload_random = init_random_payload(1 << 25, &mut rng);
-
-        #[cfg(feature = "gpu-vid")]
-        let _ = advz_gpu.disperse(payload_random.clone());
-        let _ = advz.disperse(payload_random);
-    }
-
-    #[ignore]
-    #[test]
-    fn commit_only_timer() {
-        // run with 'print-trace' feature to see timer output
-        let (recovery_threshold, num_storage_nodes) = (256, 512);
-        let mut rng = jf_utils::test_rng();
-        let srs = init_srs(recovery_threshold as usize, &mut rng);
-        #[cfg(feature = "gpu-vid")]
-        let mut advz_gpu =
-            AdvzGPU::<'_, Bn254, Sha256>::new(num_storage_nodes, recovery_threshold, &srs).unwrap();
-        let mut advz =
-            Advz::<Bn254, Sha256>::new(num_storage_nodes, recovery_threshold, srs).unwrap();
-
-        let payload_random = init_random_payload(1 << 25, &mut rng);
-
-        #[cfg(feature = "gpu-vid")]
-        let _ = advz_gpu.commit_only(payload_random.clone());
-
-        let _ = advz.commit_only(payload_random);
-    }
-
-    #[test]
-    fn sad_path_verify_share_corrupt_share() {
-        let (mut advz, bytes_random) = advz_init();
-        let disperse = advz.disperse(bytes_random).unwrap();
-        let (shares, common, commit) = (disperse.shares, disperse.common, disperse.commit);
-
-        for (i, share) in shares.iter().enumerate() {
-            // missing share eval
-            {
-                let share_missing_eval = Share {
-                    evals: share.evals[1..].to_vec(),
-                    ..share.clone()
-                };
-                assert_arg_err(
-                    advz.verify_share(&share_missing_eval, &common, &commit),
-                    "1 missing share should be arg error",
-                );
-            }
-
-            // corrupted share eval
-            {
-                let mut share_bad_eval = share.clone();
-                share_bad_eval.evals[0].double_in_place();
-                advz.verify_share(&share_bad_eval, &common, &commit)
-                    .unwrap()
-                    .expect_err("bad share value should fail verification");
-            }
-
-            // corrupted index, in bounds
-            {
-                let share_bad_index = Share {
-                    index: (share.index + 1) % advz.num_storage_nodes,
-                    ..share.clone()
-                };
-                advz.verify_share(&share_bad_index, &common, &commit)
-                    .unwrap()
-                    .expect_err("bad share index should fail verification");
-            }
-
-            // corrupted index, out of bounds
-            {
-                let share_bad_index = Share {
-                    index: share.index + advz.num_storage_nodes,
-                    ..share.clone()
-                };
-                advz.verify_share(&share_bad_index, &common, &commit)
-                    .unwrap()
-                    .expect_err("bad share index should fail verification");
-            }
-
-            // corrupt eval proof
-            {
-                // We have no way to corrupt a proof
-                // (without also causing a deserialization failure).
-                // So we use another share's proof instead.
-                let share_bad_evals_proof = Share {
-                    evals_proof: shares[(i + 1) % shares.len()].evals_proof.clone(),
-                    ..share.clone()
-                };
-                advz.verify_share(&share_bad_evals_proof, &common, &commit)
-                    .unwrap()
-                    .expect_err("bad share evals proof should fail verification");
-            }
-        }
-    }
-
-    #[test]
-    fn sad_path_verify_share_corrupt_commit() {
-        let (mut advz, bytes_random) = advz_init();
-        let disperse = advz.disperse(bytes_random).unwrap();
-        let (shares, common, commit) = (disperse.shares, disperse.common, disperse.commit);
-
-        // missing commit
-        let common_missing_item = Common {
-            poly_commits: common.poly_commits[1..].to_vec(),
-            ..common.clone()
-        };
-        assert_arg_err(
-            advz.verify_share(&shares[0], &common_missing_item, &commit),
-            "1 missing commit should be arg error",
-        );
-
-        // 1 corrupt commit, poly_commit
-        let common_1_poly_corruption = {
-            let mut corrupted = common.clone();
-            corrupted.poly_commits[0] = <Bls12_381 as Pairing>::G1Affine::zero().into();
-            corrupted
-        };
-        assert_arg_err(
-            advz.verify_share(&shares[0], &common_1_poly_corruption, &commit),
-            "corrupted commit should be arg error",
-        );
-
-        // 1 corrupt commit, all_evals_digest
-        let common_1_digest_corruption = {
-            let mut corrupted = common;
-            let mut digest_bytes = vec![0u8; corrupted.all_evals_digest.uncompressed_size()];
-            corrupted
-                .all_evals_digest
-                .serialize_uncompressed(&mut digest_bytes)
-                .expect("digest serialization should succeed");
-            digest_bytes[0] += 1;
-            corrupted.all_evals_digest =
-                HasherNode::deserialize_uncompressed(digest_bytes.as_slice())
-                    .expect("digest deserialization should succeed");
-            corrupted
-        };
-        advz.verify_share(&shares[0], &common_1_digest_corruption, &commit)
-            .unwrap()
-            .expect_err("1 corrupt all_evals_digest should fail verification");
-    }
-
-    #[test]
-    fn sad_path_verify_share_corrupt_share_and_commit() {
-        let (mut advz, bytes_random) = advz_init();
-        let disperse = advz.disperse(bytes_random).unwrap();
-        let (mut shares, mut common, commit) = (disperse.shares, disperse.common, disperse.commit);
-
-        common.poly_commits.pop();
-        shares[0].evals.pop();
-
-        // equal nonzero lengths for common, share
-        assert_arg_err(
-            advz.verify_share(&shares[0], &common, &commit),
-            "common inconsistent with commit should be arg error",
-        );
-
-        common.poly_commits.clear();
-        shares[0].evals.clear();
-
-        // zero length for common, share
-        assert_arg_err(
-            advz.verify_share(&shares[0], &common, &commit),
-            "expect arg error for common inconsistent with commit",
-        );
-    }
-
-    #[test]
-    fn sad_path_recover_payload_corrupt_shares() {
-        let (mut advz, bytes_random) = advz_init();
-        let disperse = advz.disperse(&bytes_random).unwrap();
-        let (shares, common) = (disperse.shares, disperse.common);
-
-        {
-            // unequal share eval lengths
-            let mut shares_missing_evals = shares.clone();
-            for i in 0..shares_missing_evals.len() - 1 {
-                shares_missing_evals[i].evals.pop();
-                assert_arg_err(
-                    advz.recover_payload(&shares_missing_evals, &common),
-                    format!("{} shares missing 1 eval should be arg error", i + 1).as_str(),
-                );
-            }
-
-            // 1 eval missing from all shares
-            shares_missing_evals.last_mut().unwrap().evals.pop();
-            assert_arg_err(
-                advz.recover_payload(&shares_missing_evals, &common),
-                format!(
-                    "shares contain {} but expected {}",
-                    shares_missing_evals[0].evals.len(),
-                    &common.poly_commits.len()
-                )
-                .as_str(),
-            );
-        }
-
-        // corrupted index, in bounds
-        {
-            let mut shares_bad_indices = shares.clone();
-
-            // permute indices to avoid duplicates and keep them in bounds
-            for share in &mut shares_bad_indices {
-                share.index = (share.index + 1) % advz.num_storage_nodes;
-            }
-
-            let bytes_recovered = advz
-                .recover_payload(&shares_bad_indices, &common)
-                .expect("recover_payload should succeed when indices are in bounds");
-            assert_ne!(bytes_recovered, bytes_random);
-        }
-
-        // corrupted index, out of bounds
-        {
-            let mut shares_bad_indices = shares.clone();
-            for i in 0..shares_bad_indices.len() {
-                shares_bad_indices[i].index +=
-                    u32::try_from(advz.multi_open_domain.size()).unwrap();
-                advz.recover_payload(&shares_bad_indices, &common)
-                    .expect_err("recover_payload should fail when indices are out of bounds");
-            }
-        }
-    }
-
-    /// Routine initialization tasks.
-    ///
-    /// Returns the following tuple:
-    /// 1. An initialized [`Advz`] instance.
-    /// 2. A `Vec<u8>` filled with random bytes.
-    pub(super) fn advz_init() -> (Advz<Bls12_381, Sha256>, Vec<u8>) {
-        let (recovery_threshold, num_storage_nodes) = (4, 6);
-        let mut rng = jf_utils::test_rng();
-        let srs = init_srs(recovery_threshold as usize, &mut rng);
-        let advz = Advz::new(num_storage_nodes, recovery_threshold, srs).unwrap();
-        let bytes_random = init_random_payload(4000, &mut rng);
-        (advz, bytes_random)
-    }
-
-    /// Convenience wrapper to assert [`VidError::Argument`] return value.
-    pub(super) fn assert_arg_err<T>(res: VidResult<T>, msg: &str) {
-        assert!(matches!(res, Err(Argument(_))), "{}", msg);
-    }
-
-    pub(super) fn init_random_payload<R>(len: usize, rng: &mut R) -> Vec<u8>
-    where
-        R: RngCore + CryptoRng,
-    {
-        let mut bytes_random = vec![0u8; len];
-        rng.fill_bytes(&mut bytes_random);
-        bytes_random
-    }
-
-    pub(super) fn init_srs<E, R>(num_coeffs: usize, rng: &mut R) -> UnivariateUniversalParams<E>
-    where
-        E: Pairing,
-        R: RngCore + CryptoRng,
-    {
-        UnivariateKzgPCS::gen_srs_for_testing(rng, checked_fft_size(num_coeffs - 1).unwrap())
-            .unwrap()
-    }
-}
+mod test;
diff --git a/vid/src/advz/payload_prover.rs b/vid/src/advz/payload_prover.rs
index 7a90e7841..46baae595 100644
--- a/vid/src/advz/payload_prover.rs
+++ b/vid/src/advz/payload_prover.rs
@@ -25,7 +25,7 @@ use crate::{
 };
 use anyhow::anyhow;
 use ark_ec::pairing::Pairing;
-use ark_poly::EvaluationDomain;
+use ark_poly::{EvaluationDomain, Radix2EvaluationDomain};
 use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
 use ark_std::{format, ops::Range};
 use itertools::Itertools;
@@ -66,6 +66,7 @@ impl<E, H, T> PayloadProver<SmallRangeProof<KzgProof<E>>> for AdvzInternal<E, H,
 where
     E: Pairing,
     H: HasherDigest,
+    T: Sync,
     AdvzInternal<E, H, T>: MaybeGPU<E>,
 {
     fn payload_proof<B>(
@@ -80,26 +81,38 @@ where
         check_range_nonempty_and_in_bounds(payload.len(), &range)?;
 
         // index conversion
+        let multiplicity = self.min_multiplicity(payload.len())?;
         let range_elem = self.range_byte_to_elem(&range);
-        let range_poly = self.range_elem_to_poly(&range_elem);
+        let range_poly = self.range_elem_to_poly(&range_elem, multiplicity);
         let range_elem_byte = self.range_elem_to_byte_clamped(&range_elem, payload.len());
-        let range_poly_byte = self.range_poly_to_byte_clamped(&range_poly, payload.len());
-        let offset_elem = self.offset_poly_to_elem(range_poly.start, range_elem.start);
+        let range_poly_byte =
+            self.range_poly_to_byte_clamped(&range_poly, payload.len(), multiplicity);
+        let offset_elem =
+            self.offset_poly_to_elem(range_poly.start, range_elem.start, multiplicity);
         let final_points_range_end =
-            self.final_poly_points_range_end(range_elem.len(), offset_elem);
+            self.final_poly_points_range_end(range_elem.len(), offset_elem, multiplicity);
 
         // prepare list of input points
-        // perf: we might not need all these points
-        let points: Vec<_> = self.eval_domain.elements().collect();
+        //
+        // perf: if payload is small enough to fit into a single polynomial then
+        // we don't need all the points in this domain.
+        let points: Vec<_> = Self::eval_domain(
+            usize::try_from(self.recovery_threshold * multiplicity).map_err(vid)?,
+        )?
+        .elements()
+        .collect();
 
         let elems_iter = bytes_to_field::<_, KzgEval<E>>(&payload[range_poly_byte]);
         let mut proofs = Vec::with_capacity(range_poly.len() * points.len());
         for (i, evals_iter) in elems_iter
-            .chunks(self.recovery_threshold as usize)
+            .chunks((self.recovery_threshold * multiplicity) as usize)
             .into_iter()
             .enumerate()
         {
-            let poly = self.polynomial(evals_iter);
+            let poly = Self::interpolate_polynomial(
+                evals_iter,
+                (self.recovery_threshold * multiplicity) as usize,
+            )?;
             let points_range = Range {
                 // first polynomial? skip to the start of the proof range
                 start: if i == 0 { offset_elem } else { 0 },
@@ -151,14 +164,24 @@ where
 
         // index conversion
         let range_elem = self.range_byte_to_elem(&stmt.range);
-        let range_poly = self.range_elem_to_poly(&range_elem);
-        let offset_elem = self.offset_poly_to_elem(range_poly.start, range_elem.start);
-        let final_points_range_end =
-            self.final_poly_points_range_end(range_elem.len(), offset_elem);
+        let range_poly = self.range_elem_to_poly(&range_elem, stmt.common.multiplicity);
+        let offset_elem =
+            self.offset_poly_to_elem(range_poly.start, range_elem.start, stmt.common.multiplicity);
+        let final_points_range_end = self.final_poly_points_range_end(
+            range_elem.len(),
+            offset_elem,
+            stmt.common.multiplicity,
+        );
 
         // prepare list of input points
-        // perf: we might not need all these points
-        let points: Vec<_> = self.eval_domain.elements().collect();
+        //
+        // perf: if payload is small enough to fit into a single polynomial then
+        // we don't need all the points in this domain.
+        let points: Vec<_> = Self::eval_domain(
+            usize::try_from(self.recovery_threshold * stmt.common.multiplicity).map_err(vid)?,
+        )?
+        .elements()
+        .collect();
 
         // verify proof
         let mut cur_proof_index = 0;
@@ -202,6 +225,7 @@ impl<E, H, T> PayloadProver<LargeRangeProof<KzgEval<E>>> for AdvzInternal<E, H,
 where
     E: Pairing,
     H: HasherDigest,
+    T: Sync,
     AdvzInternal<E, H, T>: MaybeGPU<E>,
 {
     fn payload_proof<B>(
@@ -216,11 +240,14 @@ where
         check_range_nonempty_and_in_bounds(payload.len(), &range)?;
 
         // index conversion
+        let multiplicity = self.min_multiplicity(payload.len())?;
         let range_elem = self.range_byte_to_elem(&range);
-        let range_poly = self.range_elem_to_poly(&range_elem);
+        let range_poly = self.range_elem_to_poly(&range_elem, multiplicity);
         let range_elem_byte = self.range_elem_to_byte_clamped(&range_elem, payload.len());
-        let range_poly_byte = self.range_poly_to_byte_clamped(&range_poly, payload.len());
-        let offset_elem = self.offset_poly_to_elem(range_poly.start, range_elem.start);
+        let range_poly_byte =
+            self.range_poly_to_byte_clamped(&range_poly, payload.len(), multiplicity);
+        let offset_elem =
+            self.offset_poly_to_elem(range_poly.start, range_elem.start, multiplicity);
 
         // compute the prefix and suffix elems
         let mut elems_iter = bytes_to_field::<_, KzgEval<E>>(payload[range_poly_byte].iter());
@@ -243,7 +270,7 @@ where
         Self::check_stmt_consistency(&stmt)?;
 
         // index conversion
-        let range_poly = self.range_byte_to_poly(&stmt.range);
+        let range_poly = self.range_byte_to_poly(&stmt.range, stmt.common.multiplicity);
 
         // rebuild the needed payload elements from statement and proof
         let elems_iter = proof
@@ -258,14 +285,16 @@ where
                     .chain(proof.suffix_bytes.iter()),
             ))
             .chain(proof.suffix_elems.iter().cloned());
-
         // rebuild the poly commits, check against `common`
         for (commit_index, evals_iter) in range_poly.into_iter().zip(
             elems_iter
-                .chunks(self.recovery_threshold as usize)
+                .chunks((self.recovery_threshold * stmt.common.multiplicity) as usize)
                 .into_iter(),
         ) {
-            let poly = self.polynomial(evals_iter);
+            let poly = Self::interpolate_polynomial(
+                evals_iter,
+                (stmt.common.multiplicity * self.recovery_threshold) as usize,
+            )?;
             let poly_commit = UnivariateKzgPCS::commit(&self.ck, &poly).map_err(vid)?;
             if poly_commit != stmt.common.poly_commits[commit_index] {
                 return Ok(Err(()));
@@ -279,6 +308,7 @@ impl<E, H, T> AdvzInternal<E, H, T>
 where
     E: Pairing,
     H: HasherDigest,
+    T: Sync,
     AdvzInternal<E, H, T>: MaybeGPU<E>,
 {
     // lots of index manipulation
@@ -292,34 +322,49 @@ where
             ..result
         }
     }
-    fn range_elem_to_poly(&self, range: &Range<usize>) -> Range<usize> {
-        range_coarsen(range, self.recovery_threshold as usize)
+    fn range_elem_to_poly(&self, range: &Range<usize>, multiplicity: u32) -> Range<usize> {
+        range_coarsen(range, (self.recovery_threshold * multiplicity) as usize)
     }
-    fn range_byte_to_poly(&self, range: &Range<usize>) -> Range<usize> {
+    fn range_byte_to_poly(&self, range: &Range<usize>, multiplicity: u32) -> Range<usize> {
         range_coarsen(
             range,
-            self.recovery_threshold as usize * elem_byte_capacity::<KzgEval<E>>(),
+            (self.recovery_threshold * multiplicity) as usize * elem_byte_capacity::<KzgEval<E>>(),
         )
     }
-    fn range_poly_to_byte_clamped(&self, range: &Range<usize>, len: usize) -> Range<usize> {
+    fn range_poly_to_byte_clamped(
+        &self,
+        range: &Range<usize>,
+        len: usize,
+        multiplicity: u32,
+    ) -> Range<usize> {
         let result = range_refine(
             range,
-            self.recovery_threshold as usize * elem_byte_capacity::<KzgEval<E>>(),
+            (self.recovery_threshold * multiplicity) as usize * elem_byte_capacity::<KzgEval<E>>(),
         );
         Range {
             end: ark_std::cmp::min(result.end, len),
             ..result
         }
     }
-    fn offset_poly_to_elem(&self, range_poly_start: usize, range_elem_start: usize) -> usize {
+    fn offset_poly_to_elem(
+        &self,
+        range_poly_start: usize,
+        range_elem_start: usize,
+        multiplicity: u32,
+    ) -> usize {
         let start_poly_byte = index_refine(
             range_poly_start,
-            self.recovery_threshold as usize * elem_byte_capacity::<KzgEval<E>>(),
+            (self.recovery_threshold * multiplicity) as usize * elem_byte_capacity::<KzgEval<E>>(),
         );
         range_elem_start - index_coarsen(start_poly_byte, elem_byte_capacity::<KzgEval<E>>())
     }
-    fn final_poly_points_range_end(&self, range_elem_len: usize, offset_elem: usize) -> usize {
-        (range_elem_len + offset_elem - 1) % self.recovery_threshold as usize + 1
+    fn final_poly_points_range_end(
+        &self,
+        range_elem_len: usize,
+        offset_elem: usize,
+        multiplicity: u32,
+    ) -> usize {
+        (range_elem_len + offset_elem - 1) % (self.recovery_threshold * multiplicity) as usize + 1
     }
 
     fn check_stmt_consistency(stmt: &Statement<Self>) -> VidResult<()> {
@@ -385,7 +430,7 @@ mod tests {
         advz::{
             bytes_to_field::elem_byte_capacity,
             payload_prover::{LargeRangeProof, SmallRangeProof, Statement},
-            tests::*,
+            test::*,
             *,
         },
         payload_prover::PayloadProver,
@@ -400,17 +445,24 @@ mod tests {
         H: HasherDigest,
     {
         // play with these items
-        let (recovery_threshold, num_storage_nodes) = (4, 6);
+        let (recovery_threshold, num_storage_nodes, max_multiplicity) = (4, 6, 2);
         let num_polys = 3;
         let num_random_cases = 20;
 
         // more items as a function of the above
-        let payload_elems_len = num_polys * recovery_threshold as usize;
+        let poly_elems_len = recovery_threshold as usize * max_multiplicity as usize;
+        let payload_elems_len = num_polys * poly_elems_len;
+        let poly_bytes_len = poly_elems_len * elem_byte_capacity::<E::ScalarField>();
         let payload_bytes_base_len = payload_elems_len * elem_byte_capacity::<E::ScalarField>();
-        let poly_bytes_len = recovery_threshold as usize * elem_byte_capacity::<E::ScalarField>();
         let mut rng = jf_utils::test_rng();
         let srs = init_srs(payload_elems_len, &mut rng);
-        let mut advz = Advz::<E, H>::new(num_storage_nodes, recovery_threshold, srs).unwrap();
+        let mut advz = Advz::<E, H>::with_multiplicity(
+            num_storage_nodes,
+            recovery_threshold,
+            max_multiplicity,
+            srs,
+        )
+        .unwrap();
 
         // TEST: different payload byte lengths
         let payload_byte_len_noise_cases = vec![0, poly_bytes_len / 2, poly_bytes_len - 1];
@@ -441,9 +493,15 @@ mod tests {
         };
         let all_cases = [(edge_cases, "edge"), (random_cases, "rand")];
 
+        // at least one test case should have nontrivial multiplicity
+        let mut nontrivial_multiplicity = false;
+
         for payload_len_case in payload_len_cases {
             let payload = init_random_payload(payload_len_case, &mut rng);
             let d = advz.disperse(&payload).unwrap();
+            if d.common.multiplicity > 1 {
+                nontrivial_multiplicity = true;
+            }
             println!("payload byte len case: {}", payload.len());
 
             for cases in all_cases.iter() {
@@ -510,6 +568,11 @@ mod tests {
             }
         }
 
+        assert!(
+            nontrivial_multiplicity,
+            "at least one payload size should use multiplicity > 1"
+        );
+
         fn make_edge_cases(min: usize, max: usize) -> Vec<Range<usize>> {
             vec![
                 Range {
diff --git a/vid/src/advz/precomputable.rs b/vid/src/advz/precomputable.rs
index f53b9c283..777bcf50a 100644
--- a/vid/src/advz/precomputable.rs
+++ b/vid/src/advz/precomputable.rs
@@ -26,6 +26,7 @@ impl<E, H, T> Precomputable for AdvzInternal<E, H, T>
 where
     E: Pairing,
     H: HasherDigest,
+    T: Sync,
     AdvzInternal<E, H, T>: MaybeGPU<E>,
 {
     type PrecomputeData = PrecomputeData<E>;
@@ -38,7 +39,8 @@ where
         B: AsRef<[u8]>,
     {
         let payload = payload.as_ref();
-        let polys = self.bytes_to_polys(payload);
+        let multiplicity = self.min_multiplicity(payload.len());
+        let polys = self.bytes_to_polys(payload)?;
         let poly_commits: Vec<Commitment<E>> =
             UnivariateKzgPCS::batch_commit(&self.ck, &polys).map_err(vid)?;
         Ok((
@@ -56,88 +58,10 @@ where
         B: AsRef<[u8]>,
     {
         let payload = payload.as_ref();
-        let payload_byte_len = payload.len().try_into().map_err(vid)?;
-        let disperse_time = start_timer!(|| ark_std::format!(
-            "(PRECOMPUTE): VID disperse {} payload bytes to {} nodes",
-            payload_byte_len,
-            self.num_storage_nodes
-        ));
-        let _chunk_size = self.multiplicity * self.recovery_threshold;
-        let code_word_size = self.multiplicity * self.num_storage_nodes;
+        let polys = self.bytes_to_polys(payload)?;
+        let poly_commits = data.poly_commits.clone();
 
-        // partition payload into polynomial coefficients
-        // and count `elems_len` for later
-        let bytes_to_polys_time = start_timer!(|| "encode payload bytes into polynomials");
-        let polys = self.bytes_to_polys(payload);
-        end_timer!(bytes_to_polys_time);
-
-        // evaluate polynomials
-        let all_storage_node_evals_timer = start_timer!(|| ark_std::format!(
-            "compute all storage node evals for {} polynomials with {} coefficients",
-            polys.len(),
-            _chunk_size
-        ));
-        let all_storage_node_evals = self.evaluate_polys(&polys)?;
-        end_timer!(all_storage_node_evals_timer);
-
-        // vector commitment to polynomial evaluations
-        // TODO why do I need to compute the height of the merkle tree?
-        let all_evals_commit_timer =
-            start_timer!(|| "compute merkle root of all storage node evals");
-        let all_evals_commit =
-            KzgEvalsMerkleTree::<E, H>::from_elems(None, &all_storage_node_evals).map_err(vid)?;
-        end_timer!(all_evals_commit_timer);
-
-        let common_timer = start_timer!(|| ark_std::format!(
-            "(PRECOMPUTE): compute {} KZG commitments",
-            polys.len()
-        ));
-        let common = Common {
-            poly_commits: data.poly_commits.clone(),
-            all_evals_digest: all_evals_commit.commitment().digest(),
-            payload_byte_len,
-            num_storage_nodes: self.num_storage_nodes,
-            multiplicity: self.multiplicity,
-        };
-        end_timer!(common_timer);
-
-        let commit = Self::derive_commit(
-            &common.poly_commits,
-            payload_byte_len,
-            self.num_storage_nodes,
-        )?;
-        let pseudorandom_scalar = Self::pseudorandom_scalar(&common, &commit)?;
-
-        // Compute aggregate polynomial as a pseudorandom linear combo of polynomial via
-        // evaluation of the polynomial whose coefficients are polynomials and whose
-        // input point is the pseudorandom scalar.
-        let aggregate_poly =
-            polynomial_eval(polys.iter().map(PolynomialMultiplier), pseudorandom_scalar);
-
-        let agg_proofs_timer = start_timer!(|| ark_std::format!(
-            "compute aggregate proofs for {} storage nodes",
-            self.num_storage_nodes
-        ));
-        let aggregate_proofs = UnivariateKzgPCS::multi_open_rou_proofs(
-            &self.ck,
-            &aggregate_poly,
-            code_word_size as usize,
-            &self.multi_open_domain,
-        )
-        .map_err(vid)?;
-        end_timer!(agg_proofs_timer);
-
-        let assemblage_timer = start_timer!(|| "assemble shares for dispersal");
-        let shares =
-            self.assemble_shares(all_storage_node_evals, aggregate_proofs, all_evals_commit)?;
-        end_timer!(assemblage_timer);
-        end_timer!(disperse_time);
-
-        Ok(VidDisperse {
-            shares,
-            common,
-            commit,
-        })
+        self.disperse_with_polys_and_commits(payload, polys, poly_commits)
     }
 }
 
@@ -166,7 +90,7 @@ where
 mod tests {
     use crate::{
         advz::{
-            tests::{advz_init, init_random_payload, init_srs},
+            test::{advz_init, init_random_payload, init_srs},
             Advz,
         },
         precomputable::Precomputable,
diff --git a/vid/src/advz/test.rs b/vid/src/advz/test.rs
new file mode 100644
index 000000000..48a8a7c8b
--- /dev/null
+++ b/vid/src/advz/test.rs
@@ -0,0 +1,523 @@
+use super::{VidError::Argument, *};
+use ark_bn254::Bn254;
+use ark_std::{
+    rand::{CryptoRng, RngCore},
+    vec,
+};
+use jf_pcs::{
+    checked_fft_size,
+    prelude::{Commitment, UnivariateUniversalParams},
+};
+use jf_utils::field_byte_len;
+use sha2::Sha256;
+
+#[ignore]
+#[test]
+fn disperse_timer() {
+    // run with 'print-trace' feature to see timer output
+    let (recovery_threshold, num_storage_nodes) = (256, 512);
+    let mut rng = jf_utils::test_rng();
+    let srs = init_srs(recovery_threshold as usize, &mut rng);
+    #[cfg(feature = "gpu-vid")]
+    let mut advz_gpu =
+        AdvzGPU::<'_, Bn254, Sha256>::new(num_storage_nodes, recovery_threshold, &srs).unwrap();
+    let mut advz = Advz::<Bn254, Sha256>::new(num_storage_nodes, recovery_threshold, srs).unwrap();
+
+    let payload_random = init_random_payload(1 << 25, &mut rng);
+
+    #[cfg(feature = "gpu-vid")]
+    let _ = advz_gpu.disperse(payload_random.clone());
+    let _ = advz.disperse(payload_random);
+}
+
+#[ignore]
+#[test]
+fn commit_only_timer() {
+    // run with 'print-trace' feature to see timer output
+    let (recovery_threshold, num_storage_nodes) = (256, 512);
+    let mut rng = jf_utils::test_rng();
+    let srs = init_srs(recovery_threshold as usize, &mut rng);
+    #[cfg(feature = "gpu-vid")]
+    let mut advz_gpu =
+        AdvzGPU::<'_, Bn254, Sha256>::new(num_storage_nodes, recovery_threshold, &srs).unwrap();
+    let mut advz = Advz::<Bn254, Sha256>::new(num_storage_nodes, recovery_threshold, srs).unwrap();
+
+    let payload_random = init_random_payload(1 << 25, &mut rng);
+
+    #[cfg(feature = "gpu-vid")]
+    let _ = advz_gpu.commit_only(payload_random.clone());
+
+    let _ = advz.commit_only(payload_random);
+}
+
+#[test]
+fn sad_path_verify_share_corrupt_share() {
+    let (mut advz, bytes_random) = advz_init();
+    let disperse = advz.disperse(bytes_random).unwrap();
+    let (shares, common, commit) = (disperse.shares, disperse.common, disperse.commit);
+
+    for (i, share) in shares.iter().enumerate() {
+        // missing share eval
+        {
+            let share_missing_eval = Share {
+                evals: share.evals[1..].to_vec(),
+                ..share.clone()
+            };
+            assert_arg_err(
+                advz.verify_share(&share_missing_eval, &common, &commit),
+                "1 missing share should be arg error",
+            );
+        }
+
+        // corrupted share eval
+        {
+            let mut share_bad_eval = share.clone();
+            share_bad_eval.evals[0].double_in_place();
+            advz.verify_share(&share_bad_eval, &common, &commit)
+                .unwrap()
+                .expect_err("bad share value should fail verification");
+        }
+
+        // corrupted index, in bounds
+        {
+            let share_bad_index = Share {
+                index: (share.index + 1) % advz.num_storage_nodes,
+                ..share.clone()
+            };
+            advz.verify_share(&share_bad_index, &common, &commit)
+                .unwrap()
+                .expect_err("bad share index should fail verification");
+        }
+
+        // corrupted index, out of bounds
+        {
+            let share_bad_index = Share {
+                index: share.index + advz.num_storage_nodes,
+                ..share.clone()
+            };
+            advz.verify_share(&share_bad_index, &common, &commit)
+                .unwrap()
+                .expect_err("bad share index should fail verification");
+        }
+
+        // corrupt eval proof
+        {
+            // We have no way to corrupt a proof
+            // (without also causing a deserialization failure).
+            // So we use another share's proof instead.
+            let share_bad_evals_proof = Share {
+                eval_proofs: shares[(i + 1) % shares.len()].eval_proofs.clone(),
+                ..share.clone()
+            };
+            advz.verify_share(&share_bad_evals_proof, &common, &commit)
+                .unwrap()
+                .expect_err("bad share evals proof should fail verification");
+        }
+    }
+}
+
+#[test]
+fn sad_path_verify_share_corrupt_commit() {
+    let (mut advz, bytes_random) = advz_init();
+    let disperse = advz.disperse(bytes_random).unwrap();
+    let (shares, common, commit) = (disperse.shares, disperse.common, disperse.commit);
+
+    // missing commit
+    let common_missing_item = Common {
+        poly_commits: common.poly_commits[1..].to_vec(),
+        ..common.clone()
+    };
+    assert_arg_err(
+        advz.verify_share(&shares[0], &common_missing_item, &commit),
+        "1 missing commit should be arg error",
+    );
+
+    // 1 corrupt commit, poly_commit
+    let common_1_poly_corruption = {
+        let mut corrupted = common.clone();
+        corrupted.poly_commits[0] = <Bn254 as Pairing>::G1Affine::zero().into();
+        corrupted
+    };
+    assert_arg_err(
+        advz.verify_share(&shares[0], &common_1_poly_corruption, &commit),
+        "corrupted commit should be arg error",
+    );
+
+    // 1 corrupt commit, all_evals_digest
+    let common_1_digest_corruption = {
+        let mut corrupted = common;
+        let mut digest_bytes = vec![0u8; corrupted.all_evals_digest.uncompressed_size()];
+        corrupted
+            .all_evals_digest
+            .serialize_uncompressed(&mut digest_bytes)
+            .expect("digest serialization should succeed");
+        digest_bytes[0] += 1;
+        corrupted.all_evals_digest = HasherNode::deserialize_uncompressed(digest_bytes.as_slice())
+            .expect("digest deserialization should succeed");
+        corrupted
+    };
+    advz.verify_share(&shares[0], &common_1_digest_corruption, &commit)
+        .unwrap()
+        .expect_err("1 corrupt all_evals_digest should fail verification");
+}
+
+#[test]
+fn sad_path_verify_share_corrupt_share_and_commit() {
+    let (mut advz, bytes_random) = advz_init();
+    let disperse = advz.disperse(bytes_random).unwrap();
+    let (mut shares, mut common, commit) = (disperse.shares, disperse.common, disperse.commit);
+
+    common.poly_commits.pop();
+    shares[0].evals.pop();
+
+    // equal nonzero lengths for common, share
+    assert_arg_err(
+        advz.verify_share(&shares[0], &common, &commit),
+        "common inconsistent with commit should be arg error",
+    );
+
+    common.poly_commits.clear();
+    shares[0].evals.clear();
+
+    // zero length for common, share
+    assert_arg_err(
+        advz.verify_share(&shares[0], &common, &commit),
+        "expect arg error for common inconsistent with commit",
+    );
+}
+
+#[test]
+fn sad_path_recover_payload_corrupt_shares() {
+    let (mut advz, bytes_random) = advz_init();
+    let disperse = advz.disperse(&bytes_random).unwrap();
+    let (shares, common) = (disperse.shares, disperse.common);
+
+    {
+        // unequal share eval lengths
+        let mut shares_missing_evals = shares.clone();
+        for i in 0..shares_missing_evals.len() - 1 {
+            shares_missing_evals[i].evals.pop();
+            assert_arg_err(
+                advz.recover_payload(&shares_missing_evals, &common),
+                format!("{} shares missing 1 eval should be arg error", i + 1).as_str(),
+            );
+        }
+
+        // 1 eval missing from all shares
+        shares_missing_evals.last_mut().unwrap().evals.pop();
+        assert_arg_err(
+            advz.recover_payload(&shares_missing_evals, &common),
+            format!(
+                "shares contain {} but expected {}",
+                shares_missing_evals[0].evals.len(),
+                &common.poly_commits.len()
+            )
+            .as_str(),
+        );
+    }
+
+    // corrupted index, in bounds
+    {
+        let mut shares_bad_indices = shares.clone();
+
+        // permute indices to avoid duplicates and keep them in bounds
+        for share in &mut shares_bad_indices {
+            share.index = (share.index + 1) % advz.num_storage_nodes;
+        }
+
+        let bytes_recovered = advz
+            .recover_payload(&shares_bad_indices, &common)
+            .expect("recover_payload should succeed when indices are in bounds");
+        assert_ne!(bytes_recovered, bytes_random);
+    }
+
+    // corrupted index, out of bounds
+    {
+        let mut shares_bad_indices = shares.clone();
+        let multi_open_domain_size = advz.multi_open_domain(common.multiplicity).unwrap().size();
+        for i in 0..shares_bad_indices.len() {
+            shares_bad_indices[i].index += u32::try_from(multi_open_domain_size).unwrap();
+            advz.recover_payload(&shares_bad_indices, &common)
+                .expect_err("recover_payload should fail when indices are out of bounds");
+        }
+    }
+}
+
+#[test]
+fn verify_share_with_multiplicity() {
+    let advz_params = AdvzParams {
+        recovery_threshold: 16,
+        num_storage_nodes: 20,
+        max_multiplicity: 4,
+        payload_len: 4000,
+    };
+    let (mut advz, payload) = advz_init_with::<Bn254>(advz_params);
+
+    let disperse = advz.disperse(payload).unwrap();
+    let (shares, common, commit) = (disperse.shares, disperse.common, disperse.commit);
+
+    for share in shares {
+        assert!(advz.verify_share(&share, &common, &commit).unwrap().is_ok())
+    }
+}
+
+#[test]
+fn sad_path_verify_share_with_multiplicity() {
+    // regression test for https://github.com/EspressoSystems/jellyfish/issues/654
+    let advz_params = AdvzParams {
+        recovery_threshold: 16,
+        num_storage_nodes: 20,
+        max_multiplicity: 32, // payload fitting into a single polynomial
+        payload_len: 8200,
+    };
+    let (mut advz, payload) = advz_init_with::<Bn254>(advz_params);
+
+    let disperse = advz.disperse(payload).unwrap();
+    let (shares, common, commit) = (disperse.shares, disperse.common, disperse.commit);
+    for (i, share) in shares.iter().enumerate() {
+        // corrupt the last evaluation of the share
+        {
+            let mut share_bad_eval = share.clone();
+            share_bad_eval.evals[common.multiplicity as usize - 1].double_in_place();
+            advz.verify_share(&share_bad_eval, &common, &commit)
+                .unwrap()
+                .expect_err("bad share value should fail verification");
+        }
+
+        // check that verification fails if any of the eval_proofs are
+        // inconsistent with the merkle root.
+        // corrupt the last eval proof of this share by assigning it to the value of
+        // last eval proof of the next share.
+        {
+            let mut share_bad_eval_proofs = share.clone();
+            let next_eval_proof = shares[(i + 1) % shares.len()].eval_proofs
+                [common.multiplicity as usize - 1]
+                .clone();
+            share_bad_eval_proofs.eval_proofs[common.multiplicity as usize - 1] = next_eval_proof;
+            advz.verify_share(&share_bad_eval_proofs, &common, &commit)
+                .unwrap()
+                .expect_err("bad share evals proof should fail verification");
+        }
+    }
+}
+
+#[test]
+fn verify_share_with_different_multiplicity() {
+    // leader_multiplicity < everyone else's multiplicity
+    verify_share_with_different_multiplicity_helper::<Bn254, Sha256>(4, 2);
+    // leader_multiplicity > everyone else's multiplicity
+    verify_share_with_different_multiplicity_helper::<Bn254, Sha256>(2, 4);
+}
+
+fn verify_share_with_different_multiplicity_helper<E, H>(
+    multiplicity: u32,
+    leader_multiplicity: u32,
+) where
+    E: Pairing,
+    H: HasherDigest,
+{
+    // play with these items
+    let num_storage_nodes = 6;
+    let recovery_threshold = 4;
+
+    // more items as a function of the above
+    assert_ne!(
+        multiplicity, leader_multiplicity,
+        "leader_multiplicity should differ from multiplicity for this test"
+    );
+    let max_degree = recovery_threshold * multiplicity.max(leader_multiplicity);
+    let mut rng = jf_utils::test_rng();
+    let srs = init_srs(max_degree as usize, &mut rng);
+    let advz =
+        Advz::<E, H>::with_multiplicity(num_storage_nodes, recovery_threshold, multiplicity, &srs)
+            .unwrap();
+    let mut leader_advz = Advz::<E, H>::with_multiplicity(
+        num_storage_nodes,
+        recovery_threshold,
+        leader_multiplicity,
+        &srs,
+    )
+    .unwrap();
+    let payload = {
+        // ensure payload is large enough to fill at least 1 polynomial at
+        // maximum multiplicity.
+        let coeff_byte_len = field_byte_len::<<E as Pairing>::ScalarField>();
+        let payload_byte_len = max_degree as usize * coeff_byte_len;
+        init_random_payload(payload_byte_len, &mut rng)
+    };
+
+    // compute shares using `leader_multiplicity`
+    let disperse = leader_advz.disperse(payload).unwrap();
+    let (shares, common, commit) = (disperse.shares, disperse.common, disperse.commit);
+
+    // verify shares using `multiplicity` != `leader_multiplicity`
+    for share in shares {
+        assert_arg_err(
+            advz.verify_share(&share, &common, &commit),
+            format!("inconsistent multiplicities should be arg error").as_str(),
+        );
+    }
+}
+
+#[test]
+fn max_multiplicity() {
+    // regression test for https://github.com/EspressoSystems/jellyfish/issues/663
+
+    // play with these items
+    let num_storage_nodes = 6;
+    let recovery_threshold = 4;
+    let max_multiplicity = 1 << 5; // intentionally large so as to fit many payload sizes into a single polynomial
+
+    let payload_byte_lens = [0, 1, 100, 10_000];
+    type E = Bn254;
+
+    // more items as a function of the above
+    let (mut advz, payload_bytes) = advz_init_with::<E>(AdvzParams {
+        recovery_threshold,
+        num_storage_nodes,
+        max_multiplicity,
+        payload_len: *payload_byte_lens.iter().max().unwrap(),
+    });
+    let elem_byte_len = bytes_to_field::elem_byte_capacity::<<E as Pairing>::ScalarField>();
+    let (mut found_small_payload, mut found_large_payload) = (false, false);
+
+    for payload_byte_len in payload_byte_lens {
+        let payload = &payload_bytes[..payload_byte_len];
+        let num_payload_elems = payload_byte_len.div_ceil(elem_byte_len) as u32;
+
+        let disperse = advz.disperse(payload).unwrap();
+        let (shares, common, commit) = (disperse.shares, disperse.common, disperse.commit);
+
+        // test: multiplicity set correctly
+        assert!(
+            common.multiplicity <= max_multiplicity,
+            "derived multiplicity should never exceed max_multiplicity"
+        );
+        if num_payload_elems < max_multiplicity * recovery_threshold {
+            // small payload
+            found_small_payload = true;
+            assert!(
+                num_payload_elems <= common.multiplicity * advz.recovery_threshold,
+                "derived multiplicity too small"
+            );
+
+            if num_payload_elems > 0 {
+                // TODO TEMPORARY: enforce power-of-2
+                // https://github.com/EspressoSystems/jellyfish/issues/668
+                //
+                // After this issue is fixed the following test should use
+                // `common.multiplicity - 1` instead of `common.multiplicity / 2`.
+                assert!(
+                    num_payload_elems > common.multiplicity / 2 * advz.recovery_threshold,
+                    "derived multiplicity too large: payload_byte_len {}, common.multiplicity {}",
+                    payload_byte_len,
+                    common.multiplicity
+                );
+            } else {
+                assert_eq!(
+                    common.multiplicity, 1,
+                    "zero-length payload should have multiplicity 1, found {}",
+                    common.multiplicity
+                );
+            }
+
+            assert!(
+                common.poly_commits.len() <= 1,
+                "small payload should fit into a single polynomial"
+            );
+        } else {
+            // large payload
+            found_large_payload = true;
+            assert_eq!(
+                common.multiplicity, max_multiplicity,
+                "derived multiplicity should equal max_multiplicity for large payload"
+            );
+        }
+
+        // sanity: recover payload
+        let bytes_recovered = advz.recover_payload(&shares, &common).unwrap();
+        assert_eq!(bytes_recovered, payload);
+
+        // sanity: verify shares
+        for share in shares {
+            advz.verify_share(&share, &common, &commit)
+                .unwrap()
+                .unwrap();
+        }
+    }
+
+    assert!(found_large_payload, "missing test for large payload");
+    assert!(found_small_payload, "missing test for small payload");
+}
+
+struct AdvzParams {
+    recovery_threshold: u32,
+    num_storage_nodes: u32,
+    max_multiplicity: u32,
+    payload_len: usize,
+}
+
+/// Routine initialization tasks.
+///
+/// Returns the following tuple:
+/// 1. An initialized [`Advz`] instance.
+/// 2. A `Vec<u8>` filled with random bytes.
+pub(super) fn advz_init() -> (Advz<Bn254, Sha256>, Vec<u8>) {
+    let advz_params = AdvzParams {
+        recovery_threshold: 16,
+        num_storage_nodes: 20,
+        max_multiplicity: 1,
+        payload_len: 4000,
+    };
+    advz_init_with(advz_params)
+}
+
+fn advz_init_with<E: Pairing>(advz_params: AdvzParams) -> (Advz<E, Sha256>, Vec<u8>) {
+    let mut rng = jf_utils::test_rng();
+    let poly_len = advz_params.recovery_threshold * advz_params.max_multiplicity;
+    let srs = init_srs(poly_len as usize, &mut rng);
+    assert_ne!(
+        advz_params.max_multiplicity, 0,
+        "multiplicity should not be zero"
+    );
+    let advz = if advz_params.max_multiplicity > 1 {
+        Advz::with_multiplicity(
+            advz_params.num_storage_nodes,
+            advz_params.recovery_threshold,
+            advz_params.max_multiplicity,
+            srs,
+        )
+        .unwrap()
+    } else {
+        Advz::new(
+            advz_params.num_storage_nodes,
+            advz_params.recovery_threshold,
+            srs,
+        )
+        .unwrap()
+    };
+    let bytes_random = init_random_payload(advz_params.payload_len, &mut rng);
+    (advz, bytes_random)
+}
+
+/// Convenience wrapper to assert [`VidError::Argument`] return value.
+pub(super) fn assert_arg_err<T>(res: VidResult<T>, msg: &str) {
+    assert!(matches!(res, Err(Argument(_))), "{}", msg);
+}
+
+pub(super) fn init_random_payload<R>(len: usize, rng: &mut R) -> Vec<u8>
+where
+    R: RngCore + CryptoRng,
+{
+    let mut bytes_random = vec![0u8; len];
+    rng.fill_bytes(&mut bytes_random);
+    bytes_random
+}
+
+pub(super) fn init_srs<E, R>(num_coeffs: usize, rng: &mut R) -> UnivariateUniversalParams<E>
+where
+    E: Pairing,
+    R: RngCore + CryptoRng,
+{
+    UnivariateKzgPCS::gen_srs_for_testing(rng, checked_fft_size(num_coeffs - 1).unwrap()).unwrap()
+}
diff --git a/vid/tests/vid/mod.rs b/vid/tests/vid/mod.rs
index cf8c3a48a..fb3fe7c55 100644
--- a/vid/tests/vid/mod.rs
+++ b/vid/tests/vid/mod.rs
@@ -8,28 +8,32 @@ use jf_vid::{VidError, VidResult, VidScheme};
 
 /// Correctness test generic over anything that impls [`VidScheme`]
 ///
-/// `pub` visibility, but it's not part of this crate's public API
-/// because it's in an integration test.
+/// TODO this test should not have a `max_multiplicities` arg. It is intended to
+/// be generic over the [`VidScheme`] and a generic VID scheme does not have a
+/// multiplicity arg.
+///
+/// `pub` visibility, but it's not part of this crate's public
+/// API because it's in an integration test.
 /// <https://doc.rust-lang.org/book/ch11-03-test-organization.html#submodules-in-integration-tests>
 pub fn round_trip<V, R>(
     vid_factory: impl Fn(u32, u32, u32) -> V,
     vid_sizes: &[(u32, u32)],
-    multiplicities: &[u32],
+    max_multiplicities: &[u32],
     payload_byte_lens: &[u32],
     rng: &mut R,
 ) where
     V: VidScheme,
     R: RngCore + CryptoRng,
 {
-    for (&mult, &(recovery_threshold, num_storage_nodes)) in
-        zip(multiplicities.iter().cycle(), vid_sizes)
+    for (&max_multiplicity, &(recovery_threshold, num_storage_nodes)) in
+        zip(max_multiplicities.iter().cycle(), vid_sizes)
     {
-        let mut vid = vid_factory(recovery_threshold, num_storage_nodes, mult);
+        let mut vid = vid_factory(recovery_threshold, num_storage_nodes, max_multiplicity);
 
         for &len in payload_byte_lens {
             println!(
-                "m: {} n: {} mult: {} byte_len: {}",
-                recovery_threshold, num_storage_nodes, mult, len
+                "m: {} n: {} byte_len: {} max_mult: {}",
+                recovery_threshold, num_storage_nodes, len, max_multiplicity
             );
 
             let bytes_random = {
@@ -43,7 +47,7 @@ pub fn round_trip<V, R>(
             assert_eq!(shares.len(), num_storage_nodes as usize);
             assert_eq!(commit, vid.commit_only(&bytes_random).unwrap());
             assert_eq!(len, V::get_payload_byte_len(&common));
-            assert_eq!(mult, V::get_multiplicity(&common));
+            assert!(V::get_multiplicity(&common) <= max_multiplicity);
             assert_eq!(num_storage_nodes, V::get_num_storage_nodes(&common));
 
             for share in shares.iter() {