Skip to content

Improving the default PDB implementation. #8780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 65 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
280634c
Nearly functional implementation.
naemono Jul 23, 2025
50c1aaa
Adding additional tests.
naemono Jul 23, 2025
85c661a
Move to dfs.
naemono Jul 24, 2025
e8367cb
Restore old disruption behavior.
naemono Jul 25, 2025
0d65a39
Fix get most conservative role
naemono Jul 25, 2025
657121e
Optimization
naemono Jul 25, 2025
d9dcc1e
Adding additional unit tests.
naemono Jul 29, 2025
4fa170a
Simplify the sorting.
naemono Jul 29, 2025
b3a166b
Simplify further.
naemono Jul 29, 2025
a4951be
Comments update; wrap the error.
naemono Jul 29, 2025
ec17cd3
Remove some comments.
naemono Jul 29, 2025
33fe7d0
Optimizations
naemono Jul 30, 2025
fc7059d
Break the dfs tasks into smaller funcs
naemono Jul 31, 2025
51aab4b
revert license adjustment
naemono Jul 31, 2025
30e223f
remove comment
naemono Jul 31, 2025
b59475a
adjust var name
naemono Jul 31, 2025
f6aa60e
updating comments.
naemono Jul 31, 2025
6d70499
comment update.
naemono Jul 31, 2025
2d15efd
remove tab
naemono Jul 31, 2025
ac580ec
pre-allocate empty slices of slices.
naemono Jul 31, 2025
a4aad89
fix lint issues.
naemono Jul 31, 2025
eb25e46
Update CRD comments/docs
naemono Jul 31, 2025
08b882e
Adjust some wording according to review notes.
naemono Aug 4, 2025
cc71d0e
Restore old behavior for single pdb for a whole cluster.
naemono Aug 4, 2025
b021c97
Use single data roles grouping
naemono Aug 4, 2025
27cc308
Ensure pdbs that should not exist are deleted.
naemono Aug 4, 2025
43a0e7f
Ensure checks are role-specific.
naemono Aug 5, 2025
675bd2e
Just build coord logic into the func itself
naemono Aug 5, 2025
3fdc4dc
naming
naemono Aug 5, 2025
2f15c01
wip migrating to different algorithm
naemono Aug 5, 2025
a65b56a
fixing unit tests
naemono Aug 5, 2025
f0c0b1a
Fixing all unit tests
naemono Aug 5, 2025
e755bc1
Merge branch 'main' into 2936/default-pdb-improvements
naemono Aug 5, 2025
3a0d3d0
Fix the ordering issue
naemono Aug 5, 2025
27277a1
revert test name change.
naemono Aug 5, 2025
49a9faf
Comments fix
naemono Aug 6, 2025
bcc802c
renaming files.
naemono Aug 6, 2025
1b6adff
rename again
naemono Aug 6, 2025
0a3699c
rename test file also.
naemono Aug 6, 2025
f53346c
Nearly fixed all unit tests
naemono Aug 7, 2025
83d674b
Unit tests passing.
naemono Aug 7, 2025
982efef
use nodeRoles not strings
naemono Aug 7, 2025
4b2d695
naming
naemono Aug 7, 2025
41ecaed
comments
naemono Aug 7, 2025
96e3e46
Handle both v1 and v1beta1 PDB objects.
naemono Aug 7, 2025
5af35a4
Use sets instead
naemono Aug 7, 2025
2bb18a4
Create a coordinating nodeRole and use it.
naemono Aug 7, 2025
0176f96
Use existing ownerref func
naemono Aug 8, 2025
6361851
Move the naming of the pdb func.
naemono Aug 8, 2025
8466261
expired license comment.
naemono Aug 8, 2025
091ce74
Fix unit tests
naemono Aug 8, 2025
254b60c
Move priority slice to Noderole slices
naemono Aug 8, 2025
30966c8
Fix some lint issues
naemono Aug 8, 2025
cc76108
make generate
naemono Aug 8, 2025
6ddc18c
Fix some linting issues
naemono Aug 8, 2025
4954437
Fix notice
naemono Aug 8, 2025
dcbb908
Move to using types, not strings.
naemono Aug 8, 2025
c8d8099
uncomment a test
naemono Aug 8, 2025
512fb2e
Removing v1b1 pdb logic.
naemono Aug 13, 2025
d37c3d0
Fix unit tests
naemono Aug 14, 2025
f81768e
Fix issue when defining max disruptions allowed.
naemono Aug 14, 2025
357be7c
fixing unit tests
naemono Aug 14, 2025
b6a45c4
Ensure all roles vs coordinating roles is treated properly
naemono Aug 15, 2025
5e1d2f5
make generate
naemono Aug 15, 2025
8bd29c0
remove unneeded version file.
naemono Aug 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions pkg/controller/common/statefulset/fixtures.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ type TestSset struct {
Master bool
Data bool
Ingest bool
ML bool
Transform bool
RemoteClusterClient bool
DataHot bool
DataWarm bool
DataCold bool
DataContent bool
DataFrozen bool
Status appsv1.StatefulSetStatus
ResourceVersion string
}
Expand Down Expand Up @@ -54,6 +62,14 @@ func (t TestSset) Build() appsv1.StatefulSet {
label.NodeTypesMasterLabelName.Set(t.Master, labels)
label.NodeTypesDataLabelName.Set(t.Data, labels)
label.NodeTypesIngestLabelName.Set(t.Ingest, labels)
label.NodeTypesMLLabelName.Set(t.ML, labels)
label.NodeTypesTransformLabelName.Set(t.Transform, labels)
label.NodeTypesRemoteClusterClientLabelName.Set(t.RemoteClusterClient, labels)
label.NodeTypesDataHotLabelName.Set(t.DataHot, labels)
label.NodeTypesDataWarmLabelName.Set(t.DataWarm, labels)
label.NodeTypesDataColdLabelName.Set(t.DataCold, labels)
label.NodeTypesDataContentLabelName.Set(t.DataContent, labels)
label.NodeTypesDataFrozenLabelName.Set(t.DataFrozen, labels)
statefulSet := appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: t.Name,
Expand Down Expand Up @@ -95,6 +111,14 @@ type TestPod struct {
Master bool
Data bool
Ingest bool
ML bool
Transform bool
RemoteClusterClient bool
DataHot bool
DataWarm bool
DataCold bool
DataContent bool
DataFrozen bool
Ready bool
RestartCount int32
Phase corev1.PodPhase
Expand All @@ -111,6 +135,14 @@ func (t TestPod) Build() corev1.Pod {
label.NodeTypesMasterLabelName.Set(t.Master, labels)
label.NodeTypesDataLabelName.Set(t.Data, labels)
label.NodeTypesIngestLabelName.Set(t.Ingest, labels)
label.NodeTypesMLLabelName.Set(t.ML, labels)
label.NodeTypesTransformLabelName.Set(t.Transform, labels)
label.NodeTypesRemoteClusterClientLabelName.Set(t.RemoteClusterClient, labels)
label.NodeTypesDataHotLabelName.Set(t.DataHot, labels)
label.NodeTypesDataWarmLabelName.Set(t.DataWarm, labels)
label.NodeTypesDataColdLabelName.Set(t.DataCold, labels)
label.NodeTypesDataContentLabelName.Set(t.DataContent, labels)
label.NodeTypesDataFrozenLabelName.Set(t.DataFrozen, labels)

status := corev1.PodStatus{
// assume Running by default
Expand Down
153 changes: 153 additions & 0 deletions pkg/controller/elasticsearch/pdb/dfs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License 2.0;
// you may not use this file except in compliance with the Elastic License 2.0.

package pdb

import (
"slices"

appsv1 "k8s.io/api/apps/v1"

esv1 "github.com/elastic/cloud-on-k8s/v3/pkg/apis/elasticsearch/v1"
"github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/sset"
)

var (
dataRoles = []string{
string(esv1.DataRole),
string(esv1.DataHotRole),
string(esv1.DataWarmRole),
string(esv1.DataColdRole),
string(esv1.DataContentRole),
// Note: DataFrozenRole is excluded as it has different disruption rules (yellow+ health)
}
)

// normalizeRole returns the normalized form of a role where any data role
// is normalized to the same data role.
func normalizeRole(role string) string {
if slices.Contains(dataRoles, role) {
return string(esv1.DataRole)
}
return role
}

// groupBySharedRoles groups StatefulSets that share at least one role by first building an adjacency list based
// on shared roles and then using a depth-first search (DFS) to find connected components.
//
// Why an adjacency list?
// 1. It's a simple way to represent connected components.
//
// Example:
// With the following StatefulSets:
// - StatefulSet A (idx 0) with roles ["master", "data"]
// - StatefulSet B (idx 1) with roles ["data_cold"]
// - StatefulSet C (idx 2) with roles ["data"]
// - StatefulSet D (idx 3) with roles ["coordinating"]
//
// The adjacency list would be:
// [
//
// [1, 2] # sts idx 0 is connected to sts idx 1 and 2
// [0, 2] # sts idx 1 is connected to sts idx 0 and 2
// [0, 1] # sts idx 2 is connected to sts idx 0 and 1
// [] # sts idx 3 is not connected to any other sts'
//
// ]
//
// Why DFS?
// 1. It's a well known, simple algorithm for traversing or searching tree or graph data structures.
// 2. It's efficient enough for exploring all connected components in a graph.
// (I believe "union-find" is slightly more efficient, but at this data size it doesn't matter.)
func groupBySharedRoles(statefulSets sset.StatefulSetList) [][]appsv1.StatefulSet {
n := len(statefulSets)
if n == 0 {
return [][]appsv1.StatefulSet{}
}
rolesToIndices := buildRolesToIndicesMap(statefulSets)
adjList := buildAdjacencyList(rolesToIndices, n)

return buildConnectedStatefulSets(statefulSets, adjList, n)
}

// buildConnectedStatefulSets uses iterative DFS (avoiding recursion) to find connected statefulSets.
func buildConnectedStatefulSets(statefulSets sset.StatefulSetList, adjList [][]int, size int) [][]appsv1.StatefulSet {
var result [][]appsv1.StatefulSet
visited := make([]bool, size)

for i := range statefulSets {
if visited[i] {
continue
}

group := []appsv1.StatefulSet{}
stack := []int{i}

for len(stack) > 0 {
// Retrieve the top node from the stack
stsIdx := stack[len(stack)-1]
// Remove the top node from the stack
stack = stack[:len(stack)-1]

if visited[stsIdx] {
continue
}

// Mark statefulSet as visited and add to group
visited[stsIdx] = true
group = append(group, statefulSets[stsIdx])

// Using the adjacency list previously built, push all unvisited statefulSets onto the stack
// so they are visited on the next iteration.
for _, sts := range adjList[stsIdx] {
if !visited[sts] {
stack = append(stack, sts)
}
}
}

result = append(result, group)
}

return result
}

// buildRolesToIndicesMap maps roles to StatefulSet indices which will be used to build an adjacency list.
func buildRolesToIndicesMap(statefulSets sset.StatefulSetList) map[string][]int {
rolesToIndices := make(map[string][]int)
for i, sset := range statefulSets {
roles := getRolesFromStatefulSetPodTemplate(sset)
if len(roles) == 0 {
// StatefulSets with no roles are coordinating nodes - group them together
rolesToIndices["coordinating"] = append(rolesToIndices["coordinating"], i)
continue
}
for _, role := range roles {
normalizedRole := normalizeRole(string(role))
rolesToIndices[normalizedRole] = append(rolesToIndices[normalizedRole], i)
}
}
return rolesToIndices
}

// buildAdjacencyList builds an adjacency list from the given roles to indices map
// and the size of the statefulsets.
func buildAdjacencyList(roleToIndices map[string][]int, size int) [][]int {
adjList := make([][]int, size)
// Populate the adjacency list with each StatefulSet index, and the slice of StatefulSet
// indices which share roles.
for _, indices := range roleToIndices {
for i := 1; i < len(indices); i++ {
adjList[indices[0]] = append(adjList[indices[0]], indices[i])
adjList[indices[i]] = append(adjList[indices[i]], indices[0])
for j := 1; j < len(indices); j++ {
if indices[i] != indices[j] && !slices.Contains(adjList[indices[i]], indices[j]) {
adjList[indices[i]] = append(adjList[indices[i]], indices[j])
adjList[indices[j]] = append(adjList[indices[j]], indices[i])
}
}
}
}
return adjList
}
Loading