Skip to content

Commit e94985f

Browse files
authored
fix(prover): Remove deleted pods from autoscaler-agent cluster cache (matter-labs#3739)
## What ❔ Remove deleted pods from autoscaler-agent cluster cache. <!-- What are the changes this PR brings about? --> <!-- Example: This PR adds a PR template to the repo. --> <!-- (For bigger PRs adding more context is appreciated) --> ## Why ❔ Pods deleted in status Pending could cause incorrect evaluation of the cluster capacity. <!-- Why are these changes done? What goal do they contribute to? What are the principles behind them? --> <!-- The `Why` has to be clear to non-Matter Labs entities running their own ZK Chain --> <!-- Example: PR templates ensure PR reviewers, observers, and future iterators are in context about the evolution of repos. --> ## Is this a breaking change? - [ ] Yes - [x] No ## Operational changes <!-- Any config changes? Any new flags? Any changes to any scripts? --> <!-- Please add anything that non-Matter Labs entities running their own ZK Chain may need to know --> ## Checklist <!-- Check your PR fulfills the following items. --> <!-- For draft PRs check the boxes as you complete them. --> - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [ ] Tests for the changes have been added / updated. - [ ] Documentation comments have been added / updated. - [x] Code has been formatted via `zkstack dev fmt` and `zkstack dev lint`.
1 parent 8f7f831 commit e94985f

File tree

4 files changed

+85
-59
lines changed

4 files changed

+85
-59
lines changed

prover/Cargo.lock

Lines changed: 73 additions & 52 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

prover/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ humantime = "2.1"
3333
humantime-serde = "1.1"
3434
indicatif = "0.16"
3535
jemallocator = "0.5"
36-
k8s-openapi = { version = "0.23.0", features = ["v1_30"] }
37-
kube = { version = "0.96.0", features = ["runtime", "derive"] }
36+
k8s-openapi = { version = "0.24.0", features = ["v1_30"] }
37+
kube = { version = "0.99.0", features = ["runtime", "derive"] }
3838
log = "0.4.20"
3939
md5 = "0.7.0"
4040
once_cell = "1.18"

prover/crates/bin/prover_autoscaler/src/global/scaler.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ mod tests {
398398

399399
#[tracing_test::traced_test]
400400
#[test]
401-
fn test_run() {
401+
fn test_calculate() {
402402
let scaler = Scaler::<GpuKey>::new(
403403
QueueReportFields::prover_jobs,
404404
"circuit-prover-gpu".into(),
@@ -540,7 +540,7 @@ mod tests {
540540

541541
#[tracing_test::traced_test]
542542
#[test]
543-
fn test_run_min_provers() {
543+
fn test_calculate_min_provers() {
544544
let scaler = Scaler::new(
545545
QueueReportFields::prover_jobs,
546546
"circuit-prover-gpu".into(),
@@ -739,7 +739,7 @@ mod tests {
739739

740740
#[tracing_test::traced_test]
741741
#[test]
742-
fn test_run_need_move() {
742+
fn test_calculate_need_move() {
743743
let scaler = Scaler::new(
744744
QueueReportFields::prover_jobs,
745745
"circuit-prover-gpu".into(),
@@ -859,7 +859,7 @@ mod tests {
859859

860860
#[tracing_test::traced_test]
861861
#[test]
862-
fn test_run_nokey() {
862+
fn test_calculate_nokey() {
863863
let scaler = Scaler::<NoKey>::new(
864864
QueueReportFields::prover_jobs,
865865
"some-deployment".into(),

prover/crates/bin/prover_autoscaler/src/k8s/watcher.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use k8s_openapi::api;
77
use kube::{
88
api::{Api, ResourceExt},
99
runtime::{watcher, WatchStreamExt},
10+
Resource,
1011
};
1112
use reqwest::{
1213
header::{HeaderMap, HeaderValue},
@@ -172,8 +173,12 @@ impl Watcher {
172173
}
173174
pod.status = phase;
174175

175-
if pod.status == "Succeeded" || pod.status == "Failed" {
176+
if p.meta().deletion_timestamp.is_some()
177+
|| pod.status == "Succeeded"
178+
|| pod.status == "Failed"
179+
{
176180
// Cleaning up list of pods.
181+
tracing::debug!("Remove pod: {}", &p.name_any());
177182
v.pods.remove(&p.name_any());
178183
}
179184

0 commit comments

Comments
 (0)