feat: Add min_replicas for SimpleScaler, apply_min_to_namespace config (

#3282) ## What ❔ Breaking change: config option `min_provers` is a number now! Add `apply_min_to_namespace` to specify which namespace is primary now, `min_*` is applied only to primary namespace. Add `min_replicas` for SimpleScaler.    ## Why ❔ To allow Witness Generators and Proof Compressors to always run, if needed. This will remove pod start delay of about 2 minutes when new job appears in the queue.   ## Checklist   - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Tests for the changes have been added / updated. - [x] Documentation comments have been added / updated. - [x] Code has been formatted via `zkstack dev fmt` and `zkstack dev lint`. ref ZKD-1855
matter-labs · Nov 14, 2024 · bc00c4a · bc00c4a
1 parent 6844651
commit bc00c4a
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 12 deletions.
diff --git a/prover/crates/bin/prover_autoscaler/README.md b/prover/crates/bin/prover_autoscaler/README.md
@@ -152,14 +152,16 @@ agent_config:
 - `protocol_versions` is a map namespaces to protocol version it processes. Should correspond binary versions running
   there!
 - `cluster_priorities` is a map cluster name to priority, the lower will be used first.
-- `min_provers` is a map namespace to minimum number of provers to run even if the queue is empty.
+- `apply_min_to_namespace` specifies current primary namespace to run min number of provers in it.
+- `min_provers` is a minimum number of provers to run even if the queue is empty. Default: 0.
 - `max_provers` is a map of cluster name to map GPU type to maximum number of provers.
 - `prover_speed` is a map GPU to speed divider. Default: 500.
 - `long_pending_duration` is time after a pending pod considered long pending and will be relocated to different
   cluster. Default: 10m.
 - `scaler_targets` subsection is a list of Simple targets:
   - `queue_report_field` is name of corresponding queue report section. See example for possible options.
   - `deployment` is name of a Deployment to scale.
+  - `min_replicas` is a minimum number of replicas to run even if the queue is empty. Default: 0.
   - `max_replicas` is a map of cluster name to maximum number of replicas.
   - `speed` is a divider for corresponding queue.
 
@@ -182,8 +184,8 @@ scaler_config:
     cluster1: 0
     cluster2: 100
     cluster3: 200
-  min_provers:
-    prover-new: 0
+  apply_min_to_namespace: prover-new
+  min_provers: 1
   max_provers:
     cluster1:
       L4: 1
@@ -201,6 +203,7 @@ scaler_config:
   scaler_targets:
     - queue_report_field: basic_witness_jobs
       deployment: witness-generator-basic-fri
+      min_replicas: 1
       max_replicas:
         cluster1: 10
         cluster2: 20

diff --git a/prover/crates/bin/prover_autoscaler/src/config.rs b/prover/crates/bin/prover_autoscaler/src/config.rs
@@ -59,8 +59,11 @@ pub struct ProverAutoscalerScalerConfig {
     pub prover_speed: HashMap<Gpu, u32>,
     /// Maximum number of provers which can be run per cluster/GPU.
     pub max_provers: HashMap<String, HashMap<Gpu, u32>>,
-    /// Minimum number of provers per namespace.
-    pub min_provers: HashMap<String, u32>,
+    /// Minimum number of provers globally.
+    #[serde(default)]
+    pub min_provers: u32,
+    /// Name of primary namespace, all min numbers are applied to it.
+    pub apply_min_to_namespace: Option<String>,
     /// Duration after which pending pod considered long pending.
     #[serde(
         with = "humantime_serde",
@@ -132,6 +135,9 @@ pub enum QueueReportFields {
 pub struct ScalerTarget {
     pub queue_report_field: QueueReportFields,
     pub deployment: String,
+    /// Min replicas globally.
+    #[serde(default)]
+    pub min_replicas: usize,
     /// Max replicas per cluster.
     pub max_replicas: HashMap<String, usize>,
     /// The queue will be divided by the speed and rounded up to get number of replicas.

diff --git a/prover/crates/bin/prover_autoscaler/src/global/scaler.rs b/prover/crates/bin/prover_autoscaler/src/global/scaler.rs
@@ -73,7 +73,8 @@ pub struct Scaler {
 pub struct GpuScaler {
     /// Which cluster to use first.
     cluster_priorities: HashMap<String, u32>,
-    min_provers: HashMap<String, u32>,
+    apply_min_to_namespace: Option<String>,
+    min_provers: u32,
     max_provers: HashMap<String, HashMap<Gpu, u32>>,
     prover_speed: HashMap<Gpu, u32>,
     long_pending_duration: chrono::Duration,
@@ -84,6 +85,8 @@ pub struct SimpleScaler {
     deployment: String,
     /// Which cluster to use first.
     cluster_priorities: HashMap<String, u32>,
+    apply_min_to_namespace: Option<String>,
+    min_replicas: usize,
     max_replicas: HashMap<String, usize>,
     speed: usize,
     long_pending_duration: chrono::Duration,
@@ -126,6 +129,7 @@ impl Scaler {
             simple_scalers.push(SimpleScaler::new(
                 c,
                 config.cluster_priorities.clone(),
+                config.apply_min_to_namespace.clone(),
                 chrono::Duration::seconds(config.long_pending_duration.as_secs() as i64),
             ))
         }
@@ -144,6 +148,7 @@ impl GpuScaler {
     pub fn new(config: ProverAutoscalerScalerConfig) -> Self {
         Self {
             cluster_priorities: config.cluster_priorities,
+            apply_min_to_namespace: config.apply_min_to_namespace,
             min_provers: config.min_provers,
             max_provers: config.max_provers,
             prover_speed: config.prover_speed,
@@ -287,10 +292,12 @@ impl GpuScaler {
 
         // Increase queue size, if it's too small, to make sure that required min_provers are
         // running.
-        let queue: u64 = self.min_provers.get(namespace).map_or(queue, |min| {
+        let queue: u64 = if self.apply_min_to_namespace.as_deref() == Some(namespace.as_str()) {
             self.normalize_queue(Gpu::L4, queue)
-                .max(self.provers_to_speed(Gpu::L4, *min))
-        });
+                .max(self.provers_to_speed(Gpu::L4, self.min_provers))
+        } else {
+            queue
+        };
 
         let mut total: i64 = 0;
         let mut provers: HashMap<GPUPoolKey, u32> = HashMap::new();
@@ -424,12 +431,15 @@ impl SimpleScaler {
     pub fn new(
         config: &ScalerTarget,
         cluster_priorities: HashMap<String, u32>,
+        apply_min_to_namespace: Option<String>,
         long_pending_duration: chrono::Duration,
     ) -> Self {
         Self {
             queue_report_field: config.queue_report_field,
             deployment: config.deployment.clone(),
             cluster_priorities,
+            apply_min_to_namespace,
+            min_replicas: config.min_replicas,
             max_replicas: config.max_replicas.clone(),
             speed: config.speed,
             long_pending_duration,
@@ -521,6 +531,15 @@ impl SimpleScaler {
             &sorted_clusters
         );
 
+        // Increase queue size, if it's too small, to make sure that required min_provers are
+        // running.
+        let queue: u64 = if self.apply_min_to_namespace.as_deref() == Some(namespace.as_str()) {
+            self.normalize_queue(queue)
+                .max(self.pods_to_speed(self.min_replicas))
+        } else {
+            queue
+        };
+
         let mut total: i64 = 0;
         let mut pods: HashMap<String, usize> = HashMap::new();
         for cluster in &sorted_clusters {
@@ -719,7 +738,8 @@ mod tests {
     fn test_run() {
         let scaler = GpuScaler::new(ProverAutoscalerScalerConfig {
             cluster_priorities: [("foo".into(), 0), ("bar".into(), 10)].into(),
-            min_provers: [("prover-other".into(), 2)].into(),
+            apply_min_to_namespace: Some("prover-other".into()),
+            min_provers: 2,
             max_provers: [
                 ("foo".into(), [(Gpu::L4, 100)].into()),
                 ("bar".into(), [(Gpu::L4, 100)].into()),
@@ -857,7 +877,8 @@ mod tests {
     fn test_run_min_provers() {
         let scaler = GpuScaler::new(ProverAutoscalerScalerConfig {
             cluster_priorities: [("foo".into(), 0), ("bar".into(), 10)].into(),
-            min_provers: [("prover".into(), 2)].into(),
+            apply_min_to_namespace: Some("prover".into()),
+            min_provers: 2,
             max_provers: [
                 ("foo".into(), [(Gpu::L4, 100)].into()),
                 ("bar".into(), [(Gpu::L4, 100)].into()),
@@ -1052,7 +1073,8 @@ mod tests {
     fn test_run_need_move() {
         let scaler = GpuScaler::new(ProverAutoscalerScalerConfig {
             cluster_priorities: [("foo".into(), 0), ("bar".into(), 10)].into(),
-            min_provers: [("prover".into(), 2)].into(),
+            apply_min_to_namespace: Some("prover".into()),
+            min_provers: 2,
             max_provers: [
                 ("foo".into(), [(Gpu::L4, 100)].into()),
                 ("bar".into(), [(Gpu::L4, 100)].into()),