Skip to content

Commit aa46f89

Browse files
authored
fix(prover): Correctly calculate NeedToMove and recent scale errors (matter-labs#3783)
## What ❔ Correctly calculate NeedToMove and recent scale errors. Always run all clusters evaluation to makes sure that metrics area correct and min_replicas works. Add `scale_errors_duration` and `need_to_move_duration` optional config fields. <!-- What are the changes this PR brings about? --> <!-- Example: This PR adds a PR template to the repo. --> <!-- (For bigger PRs adding more context is appreciated) --> ## Why ❔ To prevent oscillation pods between clusters, fix min_replicas and output metrics during 0 load. <!-- Why are these changes done? What goal do they contribute to? What are the principles behind them? --> <!-- The `Why` has to be clear to non-Matter Labs entities running their own ZK Chain --> <!-- Example: PR templates ensure PR reviewers, observers, and future iterators are in context about the evolution of repos. --> ## Is this a breaking change? - [ ] Yes - [x] No ## Operational changes <!-- Any config changes? Any new flags? Any changes to any scripts? --> <!-- Please add anything that non-Matter Labs entities running their own ZK Chain may need to know --> ## Checklist <!-- Check your PR fulfills the following items. --> <!-- For draft PRs check the boxes as you complete them. --> - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Tests for the changes have been added / updated. - [x] Documentation comments have been added / updated. - [x] Code has been formatted via `zkstack dev fmt` and `zkstack dev lint`. ref ZKD-2533
1 parent d1bac66 commit aa46f89

File tree

4 files changed

+163
-61
lines changed

4 files changed

+163
-61
lines changed

prover/crates/bin/prover_autoscaler/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ agent_config:
155155
- `apply_min_to_namespace` specifies current primary namespace to run min number of provers in it.
156156
- `long_pending_duration` is time after a pending pod considered long pending and will be relocated to different
157157
cluster. Default: 10m.
158+
- `scale_errors_duration` defines the time window for including scale errors in Autoscaler calculations. Clusters will
159+
be sorted by number of the errors. It should be between 20m and 2h. Default: 1h.
160+
- `need_to_move_duration` defines the time window for which Autoscaler forces pending pod migration due to scale errors.
161+
This prevents pending pods from indefinitely waiting for nodes in a busy cluster. Should be at least x2 of
162+
`scaler_run_interval`. Default: 4m.
158163
- `scaler_targets` subsection is a list of non-GPU targets:
159164
- `scaler_target_type` specifies the type, possible options: `Simple` (default) and `Gpu`.
160165
- `queue_report_field` is name of corresponding queue report section. See example for possible options.
@@ -185,6 +190,8 @@ scaler_config:
185190
cluster3: 200
186191
apply_min_to_namespace: prover-new
187192
long_pending_duration: 10m
193+
scale_errors_duration: 1h
194+
need_to_move_duration: 4m
188195
scaler_targets:
189196
- queue_report_field: prover_jobs
190197
scaler_target_type: Gpu

prover/crates/bin/prover_autoscaler/src/config.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,18 @@ pub struct ProverAutoscalerScalerConfig {
6767
default = "ProverAutoscalerScalerConfig::default_long_pending_duration"
6868
)]
6969
pub long_pending_duration: Duration,
70+
/// Time window for including scale errors in Autoscaler calculations. Clusters will be sorted by number of the errors.
71+
#[serde(
72+
with = "humantime_serde",
73+
default = "ProverAutoscalerScalerConfig::default_scale_errors_duration"
74+
)]
75+
pub scale_errors_duration: Duration,
76+
/// Time window for which Autoscaler forces pending pod migration due to scale errors.
77+
#[serde(
78+
with = "humantime_serde",
79+
default = "ProverAutoscalerScalerConfig::default_need_to_move_duration"
80+
)]
81+
pub need_to_move_duration: Duration,
7082
/// List of simple autoscaler targets.
7183
pub scaler_targets: Vec<ScalerTarget>,
7284
/// If dry-run enabled don't send any scale requests.
@@ -177,6 +189,16 @@ impl ProverAutoscalerScalerConfig {
177189
pub fn default_long_pending_duration() -> Duration {
178190
Duration::from_secs(600)
179191
}
192+
193+
/// Default long_pending_duration -- 1h
194+
pub fn default_scale_errors_duration() -> Duration {
195+
Duration::from_secs(3600)
196+
}
197+
198+
/// Default long_pending_duration -- 4m
199+
pub fn default_need_to_move_duration() -> Duration {
200+
Duration::from_secs(4 * 60)
201+
}
180202
}
181203

182204
impl ScalerTarget {

prover/crates/bin/prover_autoscaler/src/global/manager.rs

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
use std::collections::HashMap;
1+
use std::{collections::HashMap, sync::Arc};
22

33
use super::{
44
queuer,
5-
scaler::{Scaler, ScalerTrait},
5+
scaler::{Scaler, ScalerConfig, ScalerTrait},
66
watcher,
77
};
88
use crate::{
99
agent::ScaleRequest,
10-
cluster_types::{ClusterName, Clusters, NamespaceName},
10+
cluster_types::{ClusterName, NamespaceName},
1111
config::{ProverAutoscalerScalerConfig, QueueReportFields, ScalerTargetType},
1212
key::{GpuKey, NoKey},
1313
metrics::AUTOSCALER_METRICS,
@@ -40,6 +40,21 @@ impl Manager {
4040

4141
let mut scalers: Vec<Box<dyn ScalerTrait + Sync + Send>> = Vec::default();
4242
let mut jobs = Vec::default();
43+
44+
let scaler_config = Arc::new(ScalerConfig {
45+
cluster_priorities: config.cluster_priorities,
46+
apply_min_to_namespace: config.apply_min_to_namespace,
47+
long_pending_duration: chrono::Duration::seconds(
48+
config.long_pending_duration.as_secs() as i64,
49+
),
50+
scale_errors_duration: chrono::Duration::seconds(
51+
config.scale_errors_duration.as_secs() as i64,
52+
),
53+
need_to_move_duration: chrono::Duration::seconds(
54+
config.need_to_move_duration.as_secs() as i64,
55+
),
56+
});
57+
4358
for c in &config.scaler_targets {
4459
jobs.push(c.queue_report_field);
4560
match c.scaler_target_type {
@@ -52,9 +67,7 @@ impl Manager {
5267
.map(|(k, v)| (k.clone(), v.into_map_gpukey()))
5368
.collect(),
5469
c.speed.into_map_gpukey(),
55-
config.cluster_priorities.clone(),
56-
config.apply_min_to_namespace.clone(),
57-
chrono::Duration::seconds(config.long_pending_duration.as_secs() as i64),
70+
scaler_config.clone(),
5871
))),
5972
ScalerTargetType::Simple => scalers.push(Box::new(Scaler::<NoKey>::new(
6073
c.queue_report_field,
@@ -65,9 +78,7 @@ impl Manager {
6578
.map(|(k, v)| (k.clone(), v.into_map_nokey()))
6679
.collect(),
6780
c.speed.into_map_nokey(),
68-
config.cluster_priorities.clone(),
69-
config.apply_min_to_namespace.clone(),
70-
chrono::Duration::seconds(config.long_pending_duration.as_secs() as i64),
81+
scaler_config.clone(),
7182
))),
7283
};
7384
}
@@ -81,22 +92,6 @@ impl Manager {
8192
}
8293
}
8394

84-
/// is_namespace_running returns true if there are some pods running in it.
85-
fn is_namespace_running(namespace: &NamespaceName, clusters: &Clusters) -> bool {
86-
clusters
87-
.clusters
88-
.values()
89-
.flat_map(|v| v.namespaces.iter())
90-
.filter_map(|(k, v)| if k == namespace { Some(v) } else { None })
91-
.flat_map(|v| v.deployments.values())
92-
.map(
93-
|d| d.running + d.desired, // If there is something running or expected to run, we
94-
// should re-evaluate the namespace.
95-
)
96-
.sum::<usize>()
97-
> 0
98-
}
99-
10095
#[async_trait::async_trait]
10196
impl Task for Manager {
10297
async fn invoke(&self) -> anyhow::Result<()> {
@@ -123,9 +118,7 @@ impl Task for Manager {
123118
"Running eval for namespace {ns}, PPV {ppv}, scaler {} found queue {q}",
124119
scaler.deployment()
125120
);
126-
if q > 0 || is_namespace_running(ns, &guard.clusters) {
127-
scaler.run(ns, q, &guard.clusters, &mut scale_requests);
128-
}
121+
scaler.run(ns, q, &guard.clusters, &mut scale_requests);
129122
}
130123
}
131124
} // Unlock self.watcher.data.

0 commit comments

Comments
 (0)