From 9ef9ea6414b7d492b46f0355f5ab346cc4339751 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Fri, 1 Aug 2025 13:01:17 -0400 Subject: [PATCH 01/31] Support k3d in deployer/e2e tests Signed-off-by: Michael Montgomery --- .buildkite/e2e/release-branch-matrix.yaml | 7 +- .../scripts/test/set-deployer-config.sh | 9 +- hack/deployer/cmd/create.go | 2 + hack/deployer/config/plans.yml | 362 +++++++++--------- hack/deployer/runner/k3d.go | 158 ++++++++ hack/deployer/runner/settings.go | 6 + 6 files changed, 369 insertions(+), 175 deletions(-) create mode 100644 hack/deployer/runner/k3d.go diff --git a/.buildkite/e2e/release-branch-matrix.yaml b/.buildkite/e2e/release-branch-matrix.yaml index ddac752c55..eae69fdd2e 100644 --- a/.buildkite/e2e/release-branch-matrix.yaml +++ b/.buildkite/e2e/release-branch-matrix.yaml @@ -1,4 +1,3 @@ - - label: stack fixed: E2E_PROVIDER: gke @@ -41,6 +40,12 @@ - DEPLOYER_KIND_NODE_IMAGE: kindest/node:v1.33.1@sha256:050072256b9a903bd914c0b2866828150cb229cea0efe5892e2b644d5dd3b34f DEPLOYER_KIND_IP_FAMILY: ipv6 +- label: k3d + fixed: + E2E_PROVIDER: k3d + mixed: + - DEPLOYER_K3D_NODE_IMAGE: "rancher/k3s:v1.33.3+k3s1" + - label: gke fixed: E2E_PROVIDER: gke diff --git a/.buildkite/scripts/test/set-deployer-config.sh b/.buildkite/scripts/test/set-deployer-config.sh index 4cbd55e14b..07c7d64b56 100755 --- a/.buildkite/scripts/test/set-deployer-config.sh +++ b/.buildkite/scripts/test/set-deployer-config.sh @@ -25,7 +25,7 @@ ROOT="$WD/../../.." w() { echo "$@" >> "$ROOT/deployer-config.yml"; } -write_deployer_config() { +write_deployer_config() { :> "$ROOT/deployer-config.yml" w "id: ${E2E_PROVIDER}-ci" @@ -36,7 +36,7 @@ write_deployer_config() { w " operation: ${DEPLOYER_OPERATION:-create}" w " clusterName: ${CLUSTER_NAME}" - # k8s version for ocp, kind + # k8s version for ocp, kind if [[ "${DEPLOYER_CLIENT_VERSION:-}" != "" ]]; then w ' clientVersion: "'"${DEPLOYER_CLIENT_VERSION}"'"' fi @@ -58,6 +58,11 @@ write_deployer_config() { w " nodeImage: ${DEPLOYER_KIND_NODE_IMAGE}" w " ipFamily: ${DEPLOYER_KIND_IP_FAMILY:-ipv4}" fi + + if [[ "${DEPLOYER_K3D_NODE_IMAGE:-}" ]]; then + w " k3d:" + w " nodeImage: ${DEPLOYER_K3D_NODE_IMAGE}" + fi } write_deployer_config diff --git a/hack/deployer/cmd/create.go b/hack/deployer/cmd/create.go index 8babc6cfa5..ac6c03ad68 100644 --- a/hack/deployer/cmd/create.go +++ b/hack/deployer/cmd/create.go @@ -61,6 +61,8 @@ func CreateCommand() *cobra.Command { cfgData = fmt.Sprintf(runner.DefaultEKSRunConfigTemplate, user, vaultAddr, token) case runner.KindDriverID: cfgData = fmt.Sprintf(runner.DefaultKindRunConfigTemplate, user) + case runner.K3dDriverID: + cfgData = fmt.Sprintf(runner.DefaultK3dRunConfigTemplate, user) default: return fmt.Errorf("unknown provider %s", provider) } diff --git a/hack/deployer/config/plans.yml b/hack/deployer/config/plans.yml index 931c688fa9..b3fdc191c5 100644 --- a/hack/deployer/config/plans.yml +++ b/hack/deployer/config/plans.yml @@ -1,173 +1,191 @@ plans: -- id: gke-ci - operation: create - clusterName: ci - provider: gke - kubernetesVersion: 1.33 - machineType: n1-standard-8 - serviceAccount: true - enforceSecurityPolicies: true - # use kustomize in GKE to remove the NVMe provisioning already taken care of by the platform - diskSetup: kubectl apply -k hack/deployer/config/local-disks - gke: - region: us-central1 - localSsdCount: 1 - nodeCountPerZone: 1 - gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append -- id: gke-autopilot-ci - operation: create - clusterName: ci-autopilot - provider: gke - kubernetesVersion: 1.33 - serviceAccount: true - enforceSecurityPolicies: true - # this is disabled in autopilot: container provisioner is privileged; not allowed in Autopilot - # diskSetup: kubectl apply -k hack/deployer/config/local-disks - gke: - autopilot: true - region: us-central1 - gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append -- id: gke-dev - operation: create - clusterName: dev - provider: gke - kubernetesVersion: 1.33 - machineType: n1-standard-8 - serviceAccount: false - enforceSecurityPolicies: true - gke: - region: europe-west1 - localSsdCount: 1 - nodeCountPerZone: 1 - # Uncomment option below to enable network policy enforcement in GKE. - # networkPolicy: true - # Uncomment option below to create a private GKE cluster. - # Note that when a cluster is private you must: - # 1. Create a firewall rule so that the webhook can be accessed from the API server (see https://github.com/elastic/cloud-on-k8s/issues/1673#issuecomment-528449682) - # 2. Create a VM to access the subnet and authorize the VM to access the master, see https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#private_master - # private: true - # gke creates a secondary IP range for all Pods of a cluster - # gke defaults to a /14 subnet, which allows 262k Pods per cluster, but only 62 subnets to be created - # /20 allows 4094 subnets, with up to 4094 IPs (Pods) per subnet - # more clusters can therefore be created in the same VPC network. - # we set a default of /20 that can be overridden here - # clusterIpv4Cidr: /20 - # servicesIpv4Cidr: /20 - gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append -- id: gke-autopilot-dev - operation: create - clusterName: dev-autopilot - provider: gke - kubernetesVersion: 1.33 - serviceAccount: false - enforceSecurityPolicies: true - gke: - autopilot: true - region: europe-west1 - gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append -- id: aks-ci - operation: create - clusterName: ci - provider: aks - kubernetesVersion: 1.32.4 - machineType: Standard_D8s_v3 - serviceAccount: true - enforceSecurityPolicies: true - diskSetup: kubectl apply -k hack/deployer/config/local-disks - aks: - nodeCount: 3 - location: westeurope - zones: "1 2 3" -- id: aks-dev - operation: create - clusterName: dev - provider: aks - kubernetesVersion: 1.32.4 - machineType: Standard_D8s_v3 - serviceAccount: false - enforceSecurityPolicies: true - aks: - nodeCount: 3 - location: northeurope - zones: "1 2 3" -- id: ocp-ci - operation: create - clusterName: ci - clientVersion: 4.19.2 - provider: ocp - machineType: n1-standard-8 - serviceAccount: true - ocp: - region: europe-west6 - nodeCount: 3 -- id: ocp-dev - operation: create - clusterName: dev - clientVersion: 4.19.2 - provider: ocp - machineType: n1-standard-8 - serviceAccount: true - ocp: - region: europe-west1 - nodeCount: 3 -- id: eks-ci - operation: create - clusterName: ci - provider: eks - machineType: c5d.2xlarge - serviceAccount: false - enforceSecurityPolicies: true - kubernetesVersion: 1.33 - diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml - eks: - region: ap-northeast-3 - nodeCount: 3 - nodeAMI: auto -- id: eks-arm-ci - operation: create - clusterName: arm-ci - provider: eks - machineType: m6gd.2xlarge - serviceAccount: false - enforceSecurityPolicies: true - kubernetesVersion: 1.33 - diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml - eks: - region: eu-west-1 - nodeCount: 3 - nodeAMI: auto -- id: eks-dev - operation: create - clusterName: dev - provider: eks - machineType: c5d.2xlarge - serviceAccount: false - kubernetesVersion: 1.33 - diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml - enforceSecurityPolicies: true - eks: - region: eu-west-2 - nodeCount: 3 - nodeAMI: auto -- id: kind-dev - operation: create - clusterName: eck - clientVersion: 0.29.0 - provider: kind - kubernetesVersion: 1.31.1 - enforceSecurityPolicies: true - kind: - nodeCount: 3 - nodeImage: kindest/node:v1.33.1@sha256:050072256b9a903bd914c0b2866828150cb229cea0efe5892e2b644d5dd3b34f - ipFamily: ipv4 -- id: kind-ci - operation: create - clusterName: kind-ci - clientVersion: 0.29.0 - provider: kind - kubernetesVersion: 1.33.1 - enforceSecurityPolicies: true - kind: - nodeCount: 3 - nodeImage: kindest/node:v1.33.1@sha256:050072256b9a903bd914c0b2866828150cb229cea0efe5892e2b644d5dd3b34f - ipFamily: ipv4 + - id: gke-ci + operation: create + clusterName: ci + provider: gke + kubernetesVersion: 1.33 + machineType: n1-standard-8 + serviceAccount: true + enforceSecurityPolicies: true + # use kustomize in GKE to remove the NVMe provisioning already taken care of by the platform + diskSetup: kubectl apply -k hack/deployer/config/local-disks + gke: + region: us-central1 + localSsdCount: 1 + nodeCountPerZone: 1 + gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append + - id: gke-autopilot-ci + operation: create + clusterName: ci-autopilot + provider: gke + kubernetesVersion: 1.33 + serviceAccount: true + enforceSecurityPolicies: true + # this is disabled in autopilot: container provisioner is privileged; not allowed in Autopilot + # diskSetup: kubectl apply -k hack/deployer/config/local-disks + gke: + autopilot: true + region: us-central1 + gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append + - id: gke-dev + operation: create + clusterName: dev + provider: gke + kubernetesVersion: 1.33 + machineType: n1-standard-8 + serviceAccount: false + enforceSecurityPolicies: true + gke: + region: europe-west1 + localSsdCount: 1 + nodeCountPerZone: 1 + # Uncomment option below to enable network policy enforcement in GKE. + # networkPolicy: true + # Uncomment option below to create a private GKE cluster. + # Note that when a cluster is private you must: + # 1. Create a firewall rule so that the webhook can be accessed from the API server (see https://github.com/elastic/cloud-on-k8s/issues/1673#issuecomment-528449682) + # 2. Create a VM to access the subnet and authorize the VM to access the master, see https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#private_master + # private: true + # gke creates a secondary IP range for all Pods of a cluster + # gke defaults to a /14 subnet, which allows 262k Pods per cluster, but only 62 subnets to be created + # /20 allows 4094 subnets, with up to 4094 IPs (Pods) per subnet + # more clusters can therefore be created in the same VPC network. + # we set a default of /20 that can be overridden here + # clusterIpv4Cidr: /20 + # servicesIpv4Cidr: /20 + gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append + - id: gke-autopilot-dev + operation: create + clusterName: dev-autopilot + provider: gke + kubernetesVersion: 1.33 + serviceAccount: false + enforceSecurityPolicies: true + gke: + autopilot: true + region: europe-west1 + gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append + - id: aks-ci + operation: create + clusterName: ci + provider: aks + kubernetesVersion: 1.32.4 + machineType: Standard_D8s_v3 + serviceAccount: true + enforceSecurityPolicies: true + diskSetup: kubectl apply -k hack/deployer/config/local-disks + aks: + nodeCount: 3 + location: westeurope + zones: "1 2 3" + - id: aks-dev + operation: create + clusterName: dev + provider: aks + kubernetesVersion: 1.32.4 + machineType: Standard_D8s_v3 + serviceAccount: false + enforceSecurityPolicies: true + aks: + nodeCount: 3 + location: northeurope + zones: "1 2 3" + - id: ocp-ci + operation: create + clusterName: ci + clientVersion: 4.19.2 + provider: ocp + machineType: n1-standard-8 + serviceAccount: true + ocp: + region: europe-west6 + nodeCount: 3 + - id: ocp-dev + operation: create + clusterName: dev + clientVersion: 4.19.2 + provider: ocp + machineType: n1-standard-8 + serviceAccount: true + ocp: + region: europe-west1 + nodeCount: 3 + - id: eks-ci + operation: create + clusterName: ci + provider: eks + machineType: c5d.2xlarge + serviceAccount: false + enforceSecurityPolicies: true + kubernetesVersion: 1.33 + diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml + eks: + region: ap-northeast-3 + nodeCount: 3 + nodeAMI: auto + - id: eks-arm-ci + operation: create + clusterName: arm-ci + provider: eks + machineType: m6gd.2xlarge + serviceAccount: false + enforceSecurityPolicies: true + kubernetesVersion: 1.33 + diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml + eks: + region: eu-west-1 + nodeCount: 3 + nodeAMI: auto + - id: eks-dev + operation: create + clusterName: dev + provider: eks + machineType: c5d.2xlarge + serviceAccount: false + kubernetesVersion: 1.33 + diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml + enforceSecurityPolicies: true + eks: + region: eu-west-2 + nodeCount: 3 + nodeAMI: auto + - id: kind-dev + operation: create + clusterName: eck + clientVersion: 0.29.0 + provider: kind + kubernetesVersion: 1.31.1 + enforceSecurityPolicies: true + kind: + nodeCount: 3 + nodeImage: kindest/node:v1.33.1@sha256:050072256b9a903bd914c0b2866828150cb229cea0efe5892e2b644d5dd3b34f + ipFamily: ipv4 + - id: kind-ci + operation: create + clusterName: kind-ci + clientVersion: 0.29.0 + provider: kind + kubernetesVersion: 1.33.1 + enforceSecurityPolicies: true + kind: + nodeCount: 3 + nodeImage: kindest/node:v1.33.1@sha256:050072256b9a903bd914c0b2866828150cb229cea0efe5892e2b644d5dd3b34f + ipFamily: ipv4 + - id: k3d-dev + operation: create + clusterName: eck + clientVersion: v5.8.3 + provider: k3d + kubernetesVersion: 1.33.1 + k3d: + nodeCount: 3 + nodeImage: rancher/k3s:v1.33.3+k3s1 + - id: k3d-ci + operation: create + clusterName: k3d-ci + clientVersion: v5.8.3 + provider: k3d + kubernetesVersion: 1.33.1 + k3d: + nodeCount: 3 + nodeImage: rancher/k3s:v1.33.3+k3s1 diff --git a/hack/deployer/runner/k3d.go b/hack/deployer/runner/k3d.go new file mode 100644 index 0000000000..321411676c --- /dev/null +++ b/hack/deployer/runner/k3d.go @@ -0,0 +1,158 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +package runner + +import ( + "fmt" + "os" + "time" + + "github.com/elastic/cloud-on-k8s/v3/hack/deployer/exec" + "github.com/elastic/cloud-on-k8s/v3/pkg/utils/vault" +) + +const ( + K3dDriverID = "k3d" + + DefaultK3dRunConfigTemplate = `id: k3d-dev +overrides: + clusterName: %s-dev-cluster +` +) + +func init() { + drivers[K3dDriverID] = &K3dDriverFactory{} +} + +type K3dDriverFactory struct{} + +var _ DriverFactory = &K3dDriverFactory{} + +func (k K3dDriverFactory) Create(plan Plan) (Driver, error) { + return &K3dDriver{ + plan: plan, + vaultClient: vault.NewClientProvider(), + }, nil +} + +type K3dDriver struct { + plan Plan + clientImage string + vaultClient vault.ClientProvider +} + +func (k *K3dDriver) Execute() error { + switch k.plan.Operation { + case CreateAction: + return k.create() + case DeleteAction: + return k.delete() + } + return nil +} + +func (k *K3dDriver) create() error { + // Delete any previous e2e k3d cluster with the same name + err := k.delete() + if err != nil { + return err + } + + cmd := k.cmd("cluster", "create", "--image", k.plan.K3d.NodeImage) + if cmd == nil { + return fmt.Errorf("failed to create k3d cluster") + } + err = cmd.Run() + if err != nil { + return err + } + + // Get kubeconfig from k3d + kubeCfg, err := k.getKubeConfig() + if err != nil { + return err + } + defer os.Remove(kubeCfg.Name()) + + return nil +} + +func (k *K3dDriver) delete() error { + return k.cmd("cluster", "delete").Run() +} + +func (k *K3dDriver) cmd(args ...string) *exec.Command { + params := map[string]interface{}{ + "ClusterName": k.plan.ClusterName, + "Args": args, + } + + // on macOS, the docker socket is located in $HOME + // dockerSocket := "/var/run/docker.sock" + // if runtime.GOOS == "darwin" { + // dockerSocket = "$HOME/.docker/run/docker.sock" + // } + // We need the docker socket so that kind can bootstrap + // --userns=host to support Docker daemon host configured to run containers only in user namespaces + cmd := exec.NewCommand(`k3d {{Join .Args " "}} {{.ClusterName}}`) + // cmd := exec.NewCommand(`docker run --rm \ + // --userns=host \ + // -v {{.SharedVolume}}:/home \ + // -v /var/run/docker.sock:` + dockerSocket + ` \ + // -e HOME=/home \ + // -e PATH=/ \ + // {{.KindClientImage}} \ + // /kind {{Join .Args " "}} --name {{.ClusterName}}`) + cmd = cmd.AsTemplate(params) + return cmd +} + +func (k *K3dDriver) getKubeConfig() (*os.File, error) { + // Get kubeconfig from kind + output, err := k.cmd("kubeconfig", "get").WithoutStreaming().Output() + if err != nil { + return nil, err + } + + // Persist kubeconfig for reliability in following kubectl commands + kubeCfg, err := os.CreateTemp("", "kubeconfig") + if err != nil { + return nil, err + } + + _, err = kubeCfg.Write([]byte(output)) + if err != nil { + return nil, err + } + return kubeCfg, nil +} + +func (k *K3dDriver) GetCredentials() error { + if err := k.ensureClientImage(); err != nil { + return err + } + + config, err := k.getKubeConfig() + if err != nil { + return err + } + defer os.Remove(config.Name()) + return mergeKubeconfig(config.Name()) +} + +func (k *K3dDriver) ensureClientImage() error { + image, err := ensureClientImage(K3dDriverID, k.vaultClient, k.plan.ClientVersion, k.plan.ClientBuildDefDir) + if err != nil { + return err + } + k.clientImage = image + return nil +} + +func (k *K3dDriver) Cleanup(string, time.Duration) error { + return fmt.Errorf("unimplemented") +} + +var _ Driver = &K3dDriver{} diff --git a/hack/deployer/runner/settings.go b/hack/deployer/runner/settings.go index e29f956930..f082f6c172 100644 --- a/hack/deployer/runner/settings.go +++ b/hack/deployer/runner/settings.go @@ -32,6 +32,7 @@ type Plan struct { Ocp *OCPSettings `yaml:"ocp,omitempty"` Eks *EKSSettings `yaml:"eks,omitempty"` Kind *KindSettings `yaml:"kind,omitempty"` + K3d *K3dSettings `yaml:"k3d,omitempty"` ServiceAccount bool `yaml:"serviceAccount"` EnforceSecurityPolicies bool `yaml:"enforceSecurityPolicies"` DiskSetup string `yaml:"diskSetup"` @@ -86,6 +87,11 @@ type KindSettings struct { IPFamily string `yaml:"ipFamily"` } +type K3dSettings struct { + NodeCount int `yaml:"nodeCount"` + NodeImage string `yaml:"nodeImage"` +} + // RunConfig encapsulates Id used to choose a plan and a map of overrides to apply to the plan, expected to map to a file type RunConfig struct { Id string `yaml:"id"` //nolint:revive From 355e24335efb361b1d17e74307643ed5d095523f Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Fri, 1 Aug 2025 13:19:46 -0400 Subject: [PATCH 02/31] optionally install k3d Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index 0d36b4c874..eadf9ee582 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -19,7 +19,9 @@ steps: commands: - .buildkite/scripts/test/set-deployer-config.sh - + {{- if eq $test.Provider "k3d" }} + - "wget -q -O - https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | TAG=v5.8.3 bash" + {{- end }} {{- if $test.Dind }} - make -C .buildkite TARGET="run-deployer" ci {{- else }} From 43edf03681eaed9038da8a6e7264d98bfc4a852e Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Fri, 1 Aug 2025 13:28:31 -0400 Subject: [PATCH 03/31] Use the right machine type on ci. Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index eadf9ee582..c00eb731eb 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -29,10 +29,10 @@ steps: {{- end }} agents: - {{- if $test.Dind }} + {{- if or ($test.Dind) (eq $test.Provider "k3d") }} provider: "gcp" image: "family/core-ubuntu-2004" - {{- if eq $test.Provider "kind" }} + {{- if or (eq $test.Provider "kind") (eq $test.Provider "k3d") }} machineType: "{{ .KindAgentsMachineType }}" {{- end }} {{- else }} From dfb6b337b61fc26ab0a843fb7b35ea82202db22d Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Fri, 1 Aug 2025 13:49:32 -0400 Subject: [PATCH 04/31] Fix the pipeline templating. Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/main.go | 2 +- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.buildkite/e2e/pipeline-gen/main.go b/.buildkite/e2e/pipeline-gen/main.go index 79bdb3f02d..4bfd250776 100644 --- a/.buildkite/e2e/pipeline-gen/main.go +++ b/.buildkite/e2e/pipeline-gen/main.go @@ -52,7 +52,7 @@ var ( pipelineTemplate string // providersInDocker are k8s providers that require the deployer to run in Docker - providersInDocker = []string{"kind", "aks", "ocp"} + providersInDocker = []string{"kind", "aks", "ocp", "k3d"} // providersNoCleanup are k8s providers that do not require the cluster to be deleted after use providersNoCleanup = []string{"kind"} // providers are k8s providers for which it is not possible to retrieve the kube config after cluster creation diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index c00eb731eb..ce5c5d2090 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -29,11 +29,11 @@ steps: {{- end }} agents: - {{- if or ($test.Dind) (eq $test.Provider "k3d") }} + {{- if $test.Dind }} provider: "gcp" image: "family/core-ubuntu-2004" {{- if or (eq $test.Provider "kind") (eq $test.Provider "k3d") }} - machineType: "{{ .KindAgentsMachineType }}" + machineType: "{{ $.KindAgentsMachineType }}" {{- end }} {{- else }} image: docker.elastic.co/ci-agent-images/cloud-k8s-operator/buildkite-agent:c39fad65 From 7c620c40b685a0035b74ca8db1218ceb6cd1107e Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Fri, 1 Aug 2025 13:58:34 -0400 Subject: [PATCH 05/31] Fix more things. Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index ce5c5d2090..5fe7efe367 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -68,6 +68,10 @@ steps: - .buildkite/scripts/test/set-deployer-config.sh + {{- if eq $test.Provider "k3d" }} + - "wget -q -O - https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | TAG=v5.8.3 bash" + {{- end }} + {{- $deployerCommand := "run-deployer" }} {{- if $test.RemoteKubeconfig }} {{- $deployerCommand = "set-kubeconfig" }} @@ -83,7 +87,7 @@ steps: {{- if $test.Dind }} provider: "gcp" image: "family/core-ubuntu-2004" - {{- if eq $test.Provider "kind" }} + {{- if or (eq $test.Provider "kind") (eq $test.Provider "k3d") }} machineType: "{{ $.KindAgentsMachineType }}" diskSizeGb: 150 {{- end }} @@ -121,6 +125,9 @@ steps: soft_fail: true commands: - .buildkite/scripts/test/set-deployer-config.sh + {{- if eq $test.Provider "k3d" }} + - "wget -q -O - https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | TAG=v5.8.3 bash" + {{- end }} {{- if not $test.Dind }} - make run-deployer agents: From 71e7d1fcb45599db7451438ba4824ec498a4f7b0 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Fri, 8 Aug 2025 13:30:41 -0500 Subject: [PATCH 06/31] test force k3d install Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index 5fe7efe367..ae16623d81 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -129,6 +129,7 @@ steps: - "wget -q -O - https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | TAG=v5.8.3 bash" {{- end }} {{- if not $test.Dind }} + - "wget -q -O - https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | TAG=v5.8.3 bash" - make run-deployer agents: image: docker.elastic.co/ci-agent-images/cloud-k8s-operator/buildkite-agent:c39fad65 From c3a1d2fb3804ad448ecaedb782311b913aa2ba2a Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Fri, 8 Aug 2025 13:42:46 -0500 Subject: [PATCH 07/31] don't run k3d DID Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/e2e/pipeline-gen/main.go b/.buildkite/e2e/pipeline-gen/main.go index 4bfd250776..80af6359e3 100644 --- a/.buildkite/e2e/pipeline-gen/main.go +++ b/.buildkite/e2e/pipeline-gen/main.go @@ -52,9 +52,9 @@ var ( pipelineTemplate string // providersInDocker are k8s providers that require the deployer to run in Docker - providersInDocker = []string{"kind", "aks", "ocp", "k3d"} + providersInDocker = []string{"kind", "aks", "ocp"} // providersNoCleanup are k8s providers that do not require the cluster to be deleted after use - providersNoCleanup = []string{"kind"} + providersNoCleanup = []string{"kind", "k3d"} // providers are k8s providers for which it is not possible to retrieve the kube config after cluster creation providersNoRemoteConfig = []string{"kind"} From 42b21aaaa6255ef8c3ed9bcf4816813feddb0e1a Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Fri, 8 Aug 2025 14:45:52 -0500 Subject: [PATCH 08/31] Fix k3d Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/main.go | 2 +- hack/deployer/config/plans.yml | 6 ++-- hack/deployer/runner/k3d.go | 51 +++++++++++++---------------- hack/deployer/runner/settings.go | 5 +-- 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/.buildkite/e2e/pipeline-gen/main.go b/.buildkite/e2e/pipeline-gen/main.go index 80af6359e3..7924576e33 100644 --- a/.buildkite/e2e/pipeline-gen/main.go +++ b/.buildkite/e2e/pipeline-gen/main.go @@ -52,7 +52,7 @@ var ( pipelineTemplate string // providersInDocker are k8s providers that require the deployer to run in Docker - providersInDocker = []string{"kind", "aks", "ocp"} + providersInDocker = []string{"kind", "aks", "ocp", "k3d"} // providersNoCleanup are k8s providers that do not require the cluster to be deleted after use providersNoCleanup = []string{"kind", "k3d"} // providers are k8s providers for which it is not possible to retrieve the kube config after cluster creation diff --git a/hack/deployer/config/plans.yml b/hack/deployer/config/plans.yml index b3fdc191c5..0e5f686b2e 100644 --- a/hack/deployer/config/plans.yml +++ b/hack/deployer/config/plans.yml @@ -179,7 +179,8 @@ plans: kubernetesVersion: 1.33.1 k3d: nodeCount: 3 - nodeImage: rancher/k3s:v1.33.3+k3s1 + clientImage: ghcr.io/k3d-io/k3d:5.8.3 + nodeImage: rancher/k3s:v1.33.2-k3s1 - id: k3d-ci operation: create clusterName: k3d-ci @@ -188,4 +189,5 @@ plans: kubernetesVersion: 1.33.1 k3d: nodeCount: 3 - nodeImage: rancher/k3s:v1.33.3+k3s1 + clientImage: ghcr.io/k3d-io/k3d:5.8.3 + nodeImage: rancher/k3s:v1.33.2-k3s1 diff --git a/hack/deployer/runner/k3d.go b/hack/deployer/runner/k3d.go index 321411676c..e3a8ba85df 100644 --- a/hack/deployer/runner/k3d.go +++ b/hack/deployer/runner/k3d.go @@ -7,9 +7,11 @@ package runner import ( "fmt" "os" + "runtime" "time" "github.com/elastic/cloud-on-k8s/v3/hack/deployer/exec" + "github.com/elastic/cloud-on-k8s/v3/hack/deployer/runner/env" "github.com/elastic/cloud-on-k8s/v3/pkg/utils/vault" ) @@ -34,6 +36,8 @@ func (k K3dDriverFactory) Create(plan Plan) (Driver, error) { return &K3dDriver{ plan: plan, vaultClient: vault.NewClientProvider(), + clientImage: plan.K3d.ClientImage, + nodeImage: plan.K3d.NodeImage, }, nil } @@ -41,6 +45,7 @@ type K3dDriver struct { plan Plan clientImage string vaultClient vault.ClientProvider + nodeImage string } func (k *K3dDriver) Execute() error { @@ -85,26 +90,29 @@ func (k *K3dDriver) delete() error { func (k *K3dDriver) cmd(args ...string) *exec.Command { params := map[string]interface{}{ - "ClusterName": k.plan.ClusterName, - "Args": args, + "ClusterName": k.plan.ClusterName, + "SharedVolume": env.SharedVolumeName(), + "K3dClientImage": k.clientImage, + "K3dNodeImage": k.nodeImage, + "Args": args, } // on macOS, the docker socket is located in $HOME - // dockerSocket := "/var/run/docker.sock" - // if runtime.GOOS == "darwin" { - // dockerSocket = "$HOME/.docker/run/docker.sock" - // } + dockerSocket := "/var/run/docker.sock" + if runtime.GOOS == "darwin" { + dockerSocket = "$HOME/.docker/run/docker.sock" + } // We need the docker socket so that kind can bootstrap // --userns=host to support Docker daemon host configured to run containers only in user namespaces - cmd := exec.NewCommand(`k3d {{Join .Args " "}} {{.ClusterName}}`) - // cmd := exec.NewCommand(`docker run --rm \ - // --userns=host \ - // -v {{.SharedVolume}}:/home \ - // -v /var/run/docker.sock:` + dockerSocket + ` \ - // -e HOME=/home \ - // -e PATH=/ \ - // {{.KindClientImage}} \ - // /kind {{Join .Args " "}} --name {{.ClusterName}}`) + command := `docker run --rm \ + --userns=host \ + -v {{.SharedVolume}}:/home \ + -v /var/run/docker.sock:` + dockerSocket + ` \ + -e HOME=/home \ + -e PATH=/ \ + {{.K3dClientImage}} \ + {{Join .Args " "}} {{.ClusterName}}` + cmd := exec.NewCommand(command) cmd = cmd.AsTemplate(params) return cmd } @@ -130,10 +138,6 @@ func (k *K3dDriver) getKubeConfig() (*os.File, error) { } func (k *K3dDriver) GetCredentials() error { - if err := k.ensureClientImage(); err != nil { - return err - } - config, err := k.getKubeConfig() if err != nil { return err @@ -142,15 +146,6 @@ func (k *K3dDriver) GetCredentials() error { return mergeKubeconfig(config.Name()) } -func (k *K3dDriver) ensureClientImage() error { - image, err := ensureClientImage(K3dDriverID, k.vaultClient, k.plan.ClientVersion, k.plan.ClientBuildDefDir) - if err != nil { - return err - } - k.clientImage = image - return nil -} - func (k *K3dDriver) Cleanup(string, time.Duration) error { return fmt.Errorf("unimplemented") } diff --git a/hack/deployer/runner/settings.go b/hack/deployer/runner/settings.go index f082f6c172..76ed6396c4 100644 --- a/hack/deployer/runner/settings.go +++ b/hack/deployer/runner/settings.go @@ -88,8 +88,9 @@ type KindSettings struct { } type K3dSettings struct { - NodeCount int `yaml:"nodeCount"` - NodeImage string `yaml:"nodeImage"` + ClientImage string `yaml:"clientImage"` + NodeCount int `yaml:"nodeCount"` + NodeImage string `yaml:"nodeImage"` } // RunConfig encapsulates Id used to choose a plan and a map of overrides to apply to the plan, expected to map to a file From 5947ebb1b2f50174674dfca2cb72cf732eb3e409 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Mon, 11 Aug 2025 08:33:10 -0500 Subject: [PATCH 09/31] make cluster + e2e run in one step. Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/e2e/pipeline-gen/main.go b/.buildkite/e2e/pipeline-gen/main.go index 7924576e33..830aa4c36c 100644 --- a/.buildkite/e2e/pipeline-gen/main.go +++ b/.buildkite/e2e/pipeline-gen/main.go @@ -56,7 +56,7 @@ var ( // providersNoCleanup are k8s providers that do not require the cluster to be deleted after use providersNoCleanup = []string{"kind", "k3d"} // providers are k8s providers for which it is not possible to retrieve the kube config after cluster creation - providersNoRemoteConfig = []string{"kind"} + providersNoRemoteConfig = []string{"kind", "k3d"} semverRE = regexp.MustCompile(`\d*\.\d*\.\d*(-\w*)?`) chars = []rune("abcdefghijklmnopqrstuvwxyz") From 30af68f561a3a0857bfab991e417d567989ef436 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Mon, 11 Aug 2025 09:09:06 -0500 Subject: [PATCH 10/31] Handle storageclass properly Signed-off-by: Michael Montgomery --- hack/deployer/runner/k3d.go | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/hack/deployer/runner/k3d.go b/hack/deployer/runner/k3d.go index e3a8ba85df..8bf302dd4e 100644 --- a/hack/deployer/runner/k3d.go +++ b/hack/deployer/runner/k3d.go @@ -6,7 +6,9 @@ package runner import ( "fmt" + "io/fs" "os" + "path/filepath" "runtime" "time" @@ -81,7 +83,17 @@ func (k *K3dDriver) create() error { } defer os.Remove(kubeCfg.Name()) - return nil + // Delete standard storage class but ignore error if not found + if err := kubectl("--kubeconfig", kubeCfg.Name(), "delete", "storageclass", "standard"); err != nil { + return err + } + + tmpStorageClass, err := k.createTmpStorageClass() + if err != nil { + return err + } + + return kubectl("--kubeconfig", kubeCfg.Name(), "apply", "-f", tmpStorageClass) } func (k *K3dDriver) delete() error { @@ -146,6 +158,12 @@ func (k *K3dDriver) GetCredentials() error { return mergeKubeconfig(config.Name()) } +func (k *K3dDriver) createTmpStorageClass() (string, error) { + tmpFile := filepath.Join(os.Getenv("HOME"), storageClassFileName) + err := os.WriteFile(tmpFile, []byte(storageClass), fs.ModePerm) + return tmpFile, err +} + func (k *K3dDriver) Cleanup(string, time.Duration) error { return fmt.Errorf("unimplemented") } From d2f4fc3c84272306c0dda142e1fb331bc12acc0e Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Mon, 18 Aug 2025 15:09:58 -0500 Subject: [PATCH 11/31] Add some debugging Signed-off-by: Michael Montgomery --- test/e2e/test/elasticsearch/checks_es.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/e2e/test/elasticsearch/checks_es.go b/test/e2e/test/elasticsearch/checks_es.go index d6e4bf7dc5..b73f05c4f1 100644 --- a/test/e2e/test/elasticsearch/checks_es.go +++ b/test/e2e/test/elasticsearch/checks_es.go @@ -370,15 +370,18 @@ func compareCgroupMemoryLimit(topologyElement esv1.NodeSet, nodeStats client.Nod memoryLimit = c.Resources.Limits.Memory() } } + fmt.Printf("ES manifest memory limit: %v\n", memoryLimit) if memoryLimit == nil || memoryLimit.IsZero() { // no expected memory, consider it's ok return nil } + fmt.Printf("ES returned cgroup memory limit in bytes: %s\n", nodeStats.OS.CGroup.Memory.LimitInBytes) // ES returns a string, parse it as an int64, base10 actualCgroupMemoryLimit, err := strconv.ParseInt( nodeStats.OS.CGroup.Memory.LimitInBytes, 10, 64, ) + fmt.Printf("ES parsed cgroup memory limit in bytes: %d\n", actualCgroupMemoryLimit) if err != nil { return fmt.Errorf("while parsing cgroup memory limit: %w", err) } @@ -397,13 +400,17 @@ func compareCgroupCPULimit(topologyElement esv1.NodeSet, nodeStats client.NodeSt expectedCPULimit = c.Resources.Limits.Cpu() } } + + fmt.Printf("ES manifest cpu limit: %v\n", expectedCPULimit) if expectedCPULimit == nil || expectedCPULimit.IsZero() { // no expected cpu, consider it's ok return nil } cgroupCPU := nodeStats.OS.CGroup.CPU + fmt.Printf("ES returned cgroup cpu limit: %v\n", cgroupCPU) actualCgroupCPULimit := float64(cgroupCPU.CFSQuotaMicros) / float64(cgroupCPU.CFSPeriodMicros) + fmt.Printf("ES calculated cgroup cpu limit: %v\n", actualCgroupCPULimit) if expectedCPULimit.AsApproximateFloat64() != actualCgroupCPULimit { return fmt.Errorf("expected cgroup CPU limit [%f], got [%f]", expectedCPULimit.AsApproximateFloat64(), actualCgroupCPULimit) } From 76fa9c6c92edf4073ca34aa584734091ed7e68a4 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Mon, 18 Aug 2025 15:41:29 -0500 Subject: [PATCH 12/31] more detailed debugging Signed-off-by: Michael Montgomery --- test/e2e/test/elasticsearch/checks_es.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/e2e/test/elasticsearch/checks_es.go b/test/e2e/test/elasticsearch/checks_es.go index b73f05c4f1..099f059e3b 100644 --- a/test/e2e/test/elasticsearch/checks_es.go +++ b/test/e2e/test/elasticsearch/checks_es.go @@ -408,7 +408,8 @@ func compareCgroupCPULimit(topologyElement esv1.NodeSet, nodeStats client.NodeSt } cgroupCPU := nodeStats.OS.CGroup.CPU - fmt.Printf("ES returned cgroup cpu limit: %v\n", cgroupCPU) + fmt.Printf("ES returned cgroup cfs_period_micros %d\n", cgroupCPU.CFSPeriodMicros) + fmt.Printf("ES returned cgroup cfs_quota_micros %d\n", cgroupCPU.CFSQuotaMicros) actualCgroupCPULimit := float64(cgroupCPU.CFSQuotaMicros) / float64(cgroupCPU.CFSPeriodMicros) fmt.Printf("ES calculated cgroup cpu limit: %v\n", actualCgroupCPULimit) if expectedCPULimit.AsApproximateFloat64() != actualCgroupCPULimit { From 7645b8ed1dec7dc6d125f485dceb01bdf8dbf1e4 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Tue, 19 Aug 2025 08:12:09 -0500 Subject: [PATCH 13/31] Debugging cgroupsv2 Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index ae16623d81..e1ddb57a95 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -19,9 +19,6 @@ steps: commands: - .buildkite/scripts/test/set-deployer-config.sh - {{- if eq $test.Provider "k3d" }} - - "wget -q -O - https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | TAG=v5.8.3 bash" - {{- end }} {{- if $test.Dind }} - make -C .buildkite TARGET="run-deployer" ci {{- else }} @@ -68,15 +65,14 @@ steps: - .buildkite/scripts/test/set-deployer-config.sh - {{- if eq $test.Provider "k3d" }} - - "wget -q -O - https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | TAG=v5.8.3 bash" - {{- end }} - {{- $deployerCommand := "run-deployer" }} {{- if $test.RemoteKubeconfig }} {{- $deployerCommand = "set-kubeconfig" }} {{- end }} + # Double check that we are running on a node with cgroup v2 + - ls -l /sys/fs/cgroup/ + {{- if $test.Dind }} - make -C .buildkite TARGET="{{ $deployerCommand }} e2e-run" ci {{- else }} From 6679a2a1b5e320a5493f15897476b4e0c446265f Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Tue, 19 Aug 2025 08:21:49 -0500 Subject: [PATCH 14/31] Run k3s config checker Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index e1ddb57a95..4e1eaeeeed 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -73,6 +73,10 @@ steps: # Double check that we are running on a node with cgroup v2 - ls -l /sys/fs/cgroup/ + # Check k3s configuration + - wget https://github.com/k3s-io/k3s/releases/download/v1.33.3%2Bk3s1/k3s && chmod +x k3s + - ./k3s check-config + {{- if $test.Dind }} - make -C .buildkite TARGET="{{ $deployerCommand }} e2e-run" ci {{- else }} From 937c3814a695ae44a15d59da9ae2fc1fcea2ba06 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Tue, 19 Aug 2025 08:54:15 -0500 Subject: [PATCH 15/31] execute a find on cgroups cpu dir. Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index 4e1eaeeeed..7131673bc8 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -72,6 +72,7 @@ steps: # Double check that we are running on a node with cgroup v2 - ls -l /sys/fs/cgroup/ + - find /sys/fs/cgroup/cpu -ls # Check k3s configuration - wget https://github.com/k3s-io/k3s/releases/download/v1.33.3%2Bk3s1/k3s && chmod +x k3s From bb3be2aed7128b687762729347889c6b0f57a86a Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Tue, 19 Aug 2025 09:05:24 -0500 Subject: [PATCH 16/31] more debugging Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index 7131673bc8..0f8f519746 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -72,7 +72,7 @@ steps: # Double check that we are running on a node with cgroup v2 - ls -l /sys/fs/cgroup/ - - find /sys/fs/cgroup/cpu -ls + - ls -l "/sys/fs/cgroup/cpu,cpuacct/*" # Check k3s configuration - wget https://github.com/k3s-io/k3s/releases/download/v1.33.3%2Bk3s1/k3s && chmod +x k3s From 5127bb3482aa45f6124fceacb9603804f71b6c2a Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Tue, 19 Aug 2025 09:20:57 -0500 Subject: [PATCH 17/31] One more try Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index 0f8f519746..8ce4df6d2d 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -72,7 +72,7 @@ steps: # Double check that we are running on a node with cgroup v2 - ls -l /sys/fs/cgroup/ - - ls -l "/sys/fs/cgroup/cpu,cpuacct/*" + - ls -l "/sys/fs/cgroup/cpu/*" # Check k3s configuration - wget https://github.com/k3s-io/k3s/releases/download/v1.33.3%2Bk3s1/k3s && chmod +x k3s From 010d074f00a3e1595496dd1c440ca865a716ee4b Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Tue, 19 Aug 2025 09:32:07 -0500 Subject: [PATCH 18/31] try find on root. Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index 8ce4df6d2d..6edf14eab2 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -71,8 +71,7 @@ steps: {{- end }} # Double check that we are running on a node with cgroup v2 - - ls -l /sys/fs/cgroup/ - - ls -l "/sys/fs/cgroup/cpu/*" + - find /sys/fs/cgroup/ -ls # Check k3s configuration - wget https://github.com/k3s-io/k3s/releases/download/v1.33.3%2Bk3s1/k3s && chmod +x k3s From 0508b00183041269a9a099d5491210cf986f9379 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Tue, 19 Aug 2025 13:26:12 -0500 Subject: [PATCH 19/31] Change family Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index 6edf14eab2..a79bd31f1b 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -28,7 +28,7 @@ steps: agents: {{- if $test.Dind }} provider: "gcp" - image: "family/core-ubuntu-2004" + image: "family/core-ubuntu-2204" {{- if or (eq $test.Provider "kind") (eq $test.Provider "k3d") }} machineType: "{{ $.KindAgentsMachineType }}" {{- end }} From 1951bfd2fe8d600f51cb22ae096c11c1f427addb Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Tue, 19 Aug 2025 13:34:32 -0500 Subject: [PATCH 20/31] try difference base machine type. Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/e2e/pipeline-gen/main.go b/.buildkite/e2e/pipeline-gen/main.go index 830aa4c36c..9a209a29a0 100644 --- a/.buildkite/e2e/pipeline-gen/main.go +++ b/.buildkite/e2e/pipeline-gen/main.go @@ -44,7 +44,7 @@ const ( EnvVarOperatorImage = "OPERATOR_IMAGE" EnvVarE2EImage = "E2E_IMG" - KindAgentsMachineType = "n1-standard-16" + KindAgentsMachineType = "n4-standard-16" ) var ( From 34703ae58792852413ea03cc347ec455cbdf587c Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Wed, 20 Aug 2025 07:49:18 -0500 Subject: [PATCH 21/31] diff machine that supports ssd disk type. Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/e2e/pipeline-gen/main.go b/.buildkite/e2e/pipeline-gen/main.go index 9a209a29a0..c82e2a3c32 100644 --- a/.buildkite/e2e/pipeline-gen/main.go +++ b/.buildkite/e2e/pipeline-gen/main.go @@ -44,7 +44,7 @@ const ( EnvVarOperatorImage = "OPERATOR_IMAGE" EnvVarE2EImage = "E2E_IMG" - KindAgentsMachineType = "n4-standard-16" + KindAgentsMachineType = "n2d-standard-16" ) var ( From e5a6c1c5adaaa523a4bdeec8caa7339704cb24f9 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Wed, 20 Aug 2025 08:49:09 -0500 Subject: [PATCH 22/31] add step to get cgroup info from ES Signed-off-by: Michael Montgomery --- test/e2e/test/run_mutation.go | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/test/e2e/test/run_mutation.go b/test/e2e/test/run_mutation.go index 6899517224..26155f5305 100644 --- a/test/e2e/test/run_mutation.go +++ b/test/e2e/test/run_mutation.go @@ -5,7 +5,17 @@ package test import ( + "errors" + "fmt" "testing" + + "k8s.io/apimachinery/pkg/labels" + k8sclient "sigs.k8s.io/controller-runtime/pkg/client" + + commonv1 "github.com/elastic/cloud-on-k8s/v3/pkg/apis/common/v1" + "github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/v3/pkg/utils/k8s" + // testes "github.com/elastic/cloud-on-k8s/v3/test/e2e/test/elasticsearch" ) // RunMutations tests resources changes on given resources. @@ -36,6 +46,37 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B steps = steps.WithSteps(toCreate.DeletionTestSteps(k)) } + steps = steps.WithSteps(StepList{ + { + Name: "get cgroup information from elasticsearch", + Test: Eventually(func() error { + ctx := Ctx() + namespace := fmt.Sprintf("%s-%s", ctx.TestRun, "mercury") + listOptions := k8sclient.ListOptions{ + Namespace: namespace, + LabelSelector: labels.SelectorFromSet(labels.Set{ + commonv1.TypeLabelName: label.Type, + }), + } + pods, err := k.GetPods(&listOptions) + if err != nil { + return err + } + if len(pods) == 0 { + return errors.New("no pods found") + } + + // exec into the pod to list keystore entries + stdout, _, err := k.Exec(k8s.ExtractNamespacedName(&pods[0]), + []string{"cat", "/proc/1/cgroup"}) + if err != nil { + return err + } + fmt.Printf("cgroup data: %s", stdout) + return nil + }), + }}) + steps.RunSequential(t) } From 96a3f6385914bbfb6871e26188e5bd1255e0fd21 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Wed, 20 Aug 2025 09:05:55 -0500 Subject: [PATCH 23/31] change order of steps Signed-off-by: Michael Montgomery --- test/e2e/test/run_mutation.go | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/test/e2e/test/run_mutation.go b/test/e2e/test/run_mutation.go index 26155f5305..a261e5575a 100644 --- a/test/e2e/test/run_mutation.go +++ b/test/e2e/test/run_mutation.go @@ -32,20 +32,6 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B for _, toCreate := range creationBuilders { steps = steps.WithSteps(toCreate.CreationTestSteps(k)) } - for _, toCreate := range creationBuilders { - steps = steps.WithSteps(CheckTestSteps(toCreate, k)) - } - - // Trigger some mutations - for _, mutateTo := range mutationBuilders { - steps = steps.WithSteps(mutateTo.MutationTestSteps(k)) - } - - // Delete using the original builder (so that we can use it as a mutation builder as well) - for _, toCreate := range creationBuilders { - steps = steps.WithSteps(toCreate.DeletionTestSteps(k)) - } - steps = steps.WithSteps(StepList{ { Name: "get cgroup information from elasticsearch", @@ -76,6 +62,19 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B return nil }), }}) + for _, toCreate := range creationBuilders { + steps = steps.WithSteps(CheckTestSteps(toCreate, k)) + } + + // Trigger some mutations + for _, mutateTo := range mutationBuilders { + steps = steps.WithSteps(mutateTo.MutationTestSteps(k)) + } + + // Delete using the original builder (so that we can use it as a mutation builder as well) + for _, toCreate := range creationBuilders { + steps = steps.WithSteps(toCreate.DeletionTestSteps(k)) + } steps.RunSequential(t) } From d7000d17f2c40bc257db3803d386ab51eef4b261 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Wed, 20 Aug 2025 09:55:56 -0500 Subject: [PATCH 24/31] remove debugging. Add check for full path to cgorup accounting file. Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 3 --- test/e2e/test/elasticsearch/checks_es.go | 3 --- test/e2e/test/run_mutation.go | 18 ++++++++++++++++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index a79bd31f1b..0190404133 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -70,9 +70,6 @@ steps: {{- $deployerCommand = "set-kubeconfig" }} {{- end }} - # Double check that we are running on a node with cgroup v2 - - find /sys/fs/cgroup/ -ls - # Check k3s configuration - wget https://github.com/k3s-io/k3s/releases/download/v1.33.3%2Bk3s1/k3s && chmod +x k3s - ./k3s check-config diff --git a/test/e2e/test/elasticsearch/checks_es.go b/test/e2e/test/elasticsearch/checks_es.go index 099f059e3b..d0966f3542 100644 --- a/test/e2e/test/elasticsearch/checks_es.go +++ b/test/e2e/test/elasticsearch/checks_es.go @@ -370,18 +370,15 @@ func compareCgroupMemoryLimit(topologyElement esv1.NodeSet, nodeStats client.Nod memoryLimit = c.Resources.Limits.Memory() } } - fmt.Printf("ES manifest memory limit: %v\n", memoryLimit) if memoryLimit == nil || memoryLimit.IsZero() { // no expected memory, consider it's ok return nil } - fmt.Printf("ES returned cgroup memory limit in bytes: %s\n", nodeStats.OS.CGroup.Memory.LimitInBytes) // ES returns a string, parse it as an int64, base10 actualCgroupMemoryLimit, err := strconv.ParseInt( nodeStats.OS.CGroup.Memory.LimitInBytes, 10, 64, ) - fmt.Printf("ES parsed cgroup memory limit in bytes: %d\n", actualCgroupMemoryLimit) if err != nil { return fmt.Errorf("while parsing cgroup memory limit: %w", err) } diff --git a/test/e2e/test/run_mutation.go b/test/e2e/test/run_mutation.go index a261e5575a..77bda93d30 100644 --- a/test/e2e/test/run_mutation.go +++ b/test/e2e/test/run_mutation.go @@ -7,6 +7,9 @@ package test import ( "errors" "fmt" + "os" + "path" + "strings" "testing" "k8s.io/apimachinery/pkg/labels" @@ -58,7 +61,22 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B if err != nil { return err } + var cpuAcctData string fmt.Printf("cgroup data: %s", stdout) + for _, line := range strings.Split(stdout, "\n") { + for _, controlGroup := range strings.Split(line, ":") { + if strings.Contains(controlGroup, "cpuacct") { + cpuAcctData = strings.Split(line, ":")[2] + } + } + } + fullPath := path.Join("/sys/fs/cgroup/cpu,cpuacct", cpuAcctData, "cpuacct.usage") + if _, err := os.Stat(fullPath); err != nil { + return fmt.Errorf("while attempting to stat %s: %w", fullPath, err) + } else { + fmt.Printf("cpuacct.usage file exists") + } + return nil }), }}) From 6de0902d4a3f60d37d6c8d0188501015429e1074 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Wed, 20 Aug 2025 10:03:20 -0500 Subject: [PATCH 25/31] fix linter Signed-off-by: Michael Montgomery --- test/e2e/test/run_mutation.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/e2e/test/run_mutation.go b/test/e2e/test/run_mutation.go index 77bda93d30..18eb1ead52 100644 --- a/test/e2e/test/run_mutation.go +++ b/test/e2e/test/run_mutation.go @@ -73,10 +73,10 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B fullPath := path.Join("/sys/fs/cgroup/cpu,cpuacct", cpuAcctData, "cpuacct.usage") if _, err := os.Stat(fullPath); err != nil { return fmt.Errorf("while attempting to stat %s: %w", fullPath, err) - } else { - fmt.Printf("cpuacct.usage file exists") } + fmt.Printf("cpuacct.usage file exists") + return nil }), }}) From b2e0c99662d0d7284d5f150ca0867050de073ddd Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Wed, 20 Aug 2025 10:26:49 -0500 Subject: [PATCH 26/31] only print cgroup data once Signed-off-by: Michael Montgomery --- test/e2e/test/run_mutation.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/e2e/test/run_mutation.go b/test/e2e/test/run_mutation.go index 18eb1ead52..e4145a859e 100644 --- a/test/e2e/test/run_mutation.go +++ b/test/e2e/test/run_mutation.go @@ -21,6 +21,10 @@ import ( // testes "github.com/elastic/cloud-on-k8s/v3/test/e2e/test/elasticsearch" ) +var ( + printed bool +) + // RunMutations tests resources changes on given resources. // If the resource to mutate to is the same as the original resource, then all tests should still pass. // //nolint:thelper @@ -62,7 +66,10 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B return err } var cpuAcctData string - fmt.Printf("cgroup data: %s", stdout) + if !printed { + fmt.Printf("cgroup data: %s", stdout) + } + printed = true for _, line := range strings.Split(stdout, "\n") { for _, controlGroup := range strings.Split(line, ":") { if strings.Contains(controlGroup, "cpuacct") { @@ -71,6 +78,7 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B } } fullPath := path.Join("/sys/fs/cgroup/cpu,cpuacct", cpuAcctData, "cpuacct.usage") + fmt.Printf("cpuacct data full path: %s", fullPath) if _, err := os.Stat(fullPath); err != nil { return fmt.Errorf("while attempting to stat %s: %w", fullPath, err) } From 273fa06a4f385877cfd594a068070827cb505555 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Wed, 20 Aug 2025 11:08:42 -0500 Subject: [PATCH 27/31] clearly show when file doesn't exist Signed-off-by: Michael Montgomery --- test/e2e/test/run_mutation.go | 1 + 1 file changed, 1 insertion(+) diff --git a/test/e2e/test/run_mutation.go b/test/e2e/test/run_mutation.go index e4145a859e..c32658f1b2 100644 --- a/test/e2e/test/run_mutation.go +++ b/test/e2e/test/run_mutation.go @@ -80,6 +80,7 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B fullPath := path.Join("/sys/fs/cgroup/cpu,cpuacct", cpuAcctData, "cpuacct.usage") fmt.Printf("cpuacct data full path: %s", fullPath) if _, err := os.Stat(fullPath); err != nil { + fmt.Printf("cpuacct.usage file does not exist") return fmt.Errorf("while attempting to stat %s: %w", fullPath, err) } From 8e118135fecd3662e5ada5d24dbb2b20123117ba Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Wed, 20 Aug 2025 11:09:15 -0500 Subject: [PATCH 28/31] newlines Signed-off-by: Michael Montgomery --- test/e2e/test/run_mutation.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/e2e/test/run_mutation.go b/test/e2e/test/run_mutation.go index c32658f1b2..48aec57a29 100644 --- a/test/e2e/test/run_mutation.go +++ b/test/e2e/test/run_mutation.go @@ -78,13 +78,13 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B } } fullPath := path.Join("/sys/fs/cgroup/cpu,cpuacct", cpuAcctData, "cpuacct.usage") - fmt.Printf("cpuacct data full path: %s", fullPath) + fmt.Printf("cpuacct data full path: %s\n", fullPath) if _, err := os.Stat(fullPath); err != nil { - fmt.Printf("cpuacct.usage file does not exist") + fmt.Printf("cpuacct.usage file does not exist\n") return fmt.Errorf("while attempting to stat %s: %w", fullPath, err) } - fmt.Printf("cpuacct.usage file exists") + fmt.Printf("cpuacct.usage file exists\n") return nil }), From 45ec958cbd3d42fe235181b8f0732872f38f22f8 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Wed, 20 Aug 2025 11:42:12 -0500 Subject: [PATCH 29/31] More debugging data. Signed-off-by: Michael Montgomery --- test/e2e/test/run_mutation.go | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/test/e2e/test/run_mutation.go b/test/e2e/test/run_mutation.go index 48aec57a29..3cd026ef7f 100644 --- a/test/e2e/test/run_mutation.go +++ b/test/e2e/test/run_mutation.go @@ -69,7 +69,6 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B if !printed { fmt.Printf("cgroup data: %s", stdout) } - printed = true for _, line := range strings.Split(stdout, "\n") { for _, controlGroup := range strings.Split(line, ":") { if strings.Contains(controlGroup, "cpuacct") { @@ -77,15 +76,35 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B } } } - fullPath := path.Join("/sys/fs/cgroup/cpu,cpuacct", cpuAcctData, "cpuacct.usage") - fmt.Printf("cpuacct data full path: %s\n", fullPath) - if _, err := os.Stat(fullPath); err != nil { - fmt.Printf("cpuacct.usage file does not exist\n") - return fmt.Errorf("while attempting to stat %s: %w", fullPath, err) - } + if !printed { + fullPath := path.Join("/sys/fs/cgroup/cpu,cpuacct", cpuAcctData, "cpuacct.usage") + fmt.Printf("cpuacct data full path: %s\n", fullPath) + if _, err := os.Stat(fullPath); err != nil { + fmt.Printf("cpuacct.usage file does not exist\n") + printed = true + return fmt.Errorf("while attempting to stat %s: %w", fullPath, err) + } + + fmt.Printf("cpuacct.usage file exists\n") + + stdout, _, err = k.Exec(k8s.ExtractNamespacedName(&pods[0]), + []string{"find", "/sys/fs/cgroup", "-ls"}) + if err != nil { + return err + } + + fmt.Printf("full /sys/fs/cgroup output: %s\n", stdout) + + stdout, _, err = k.Exec(k8s.ExtractNamespacedName(&pods[0]), + []string{"cat", "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us"}) + if err != nil { + return err + } - fmt.Printf("cpuacct.usage file exists\n") + fmt.Printf("cpu.cfs_quota_us: %s\n", stdout) + } + printed = true return nil }), }}) From 3301f8dbd74a5031ec519ff6cdfb066c1072c468 Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Wed, 20 Aug 2025 13:31:48 -0500 Subject: [PATCH 30/31] don't return on err Signed-off-by: Michael Montgomery --- test/e2e/test/run_mutation.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/e2e/test/run_mutation.go b/test/e2e/test/run_mutation.go index 3cd026ef7f..3c8a035f00 100644 --- a/test/e2e/test/run_mutation.go +++ b/test/e2e/test/run_mutation.go @@ -80,9 +80,8 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B fullPath := path.Join("/sys/fs/cgroup/cpu,cpuacct", cpuAcctData, "cpuacct.usage") fmt.Printf("cpuacct data full path: %s\n", fullPath) if _, err := os.Stat(fullPath); err != nil { - fmt.Printf("cpuacct.usage file does not exist\n") + fmt.Printf("cpuacct.usage file err: %s\n", err) printed = true - return fmt.Errorf("while attempting to stat %s: %w", fullPath, err) } fmt.Printf("cpuacct.usage file exists\n") @@ -90,7 +89,7 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B stdout, _, err = k.Exec(k8s.ExtractNamespacedName(&pods[0]), []string{"find", "/sys/fs/cgroup", "-ls"}) if err != nil { - return err + fmt.Printf("find /sys/fs/cgroup err: %s\n", err) } fmt.Printf("full /sys/fs/cgroup output: %s\n", stdout) @@ -98,7 +97,7 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B stdout, _, err = k.Exec(k8s.ExtractNamespacedName(&pods[0]), []string{"cat", "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us"}) if err != nil { - return err + fmt.Printf("cpu.cfs_quota_us file err: %s\n", err) } fmt.Printf("cpu.cfs_quota_us: %s\n", stdout) From f4aecafd637ce8dd16170bd262741baa4d5560eb Mon Sep 17 00:00:00 2001 From: Michael Montgomery Date: Wed, 20 Aug 2025 16:38:24 -0500 Subject: [PATCH 31/31] Move all to 2204 Signed-off-by: Michael Montgomery --- .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index 0190404133..d5dbf0befb 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -83,7 +83,7 @@ steps: agents: {{- if $test.Dind }} provider: "gcp" - image: "family/core-ubuntu-2004" + image: "family/core-ubuntu-2204" {{- if or (eq $test.Provider "kind") (eq $test.Provider "k3d") }} machineType: "{{ $.KindAgentsMachineType }}" diskSizeGb: 150 @@ -135,7 +135,7 @@ steps: - make -C .buildkite TARGET="run-deployer" ci agents: provider: "gcp" - image: "family/core-ubuntu-2004" + image: "family/core-ubuntu-2204" {{- end }} {{- end }}