diff --git a/.buildkite/e2e/pipeline-gen/main.go b/.buildkite/e2e/pipeline-gen/main.go index 79bdb3f02d..c82e2a3c32 100644 --- a/.buildkite/e2e/pipeline-gen/main.go +++ b/.buildkite/e2e/pipeline-gen/main.go @@ -44,7 +44,7 @@ const ( EnvVarOperatorImage = "OPERATOR_IMAGE" EnvVarE2EImage = "E2E_IMG" - KindAgentsMachineType = "n1-standard-16" + KindAgentsMachineType = "n2d-standard-16" ) var ( @@ -52,11 +52,11 @@ var ( pipelineTemplate string // providersInDocker are k8s providers that require the deployer to run in Docker - providersInDocker = []string{"kind", "aks", "ocp"} + providersInDocker = []string{"kind", "aks", "ocp", "k3d"} // providersNoCleanup are k8s providers that do not require the cluster to be deleted after use - providersNoCleanup = []string{"kind"} + providersNoCleanup = []string{"kind", "k3d"} // providers are k8s providers for which it is not possible to retrieve the kube config after cluster creation - providersNoRemoteConfig = []string{"kind"} + providersNoRemoteConfig = []string{"kind", "k3d"} semverRE = regexp.MustCompile(`\d*\.\d*\.\d*(-\w*)?`) chars = []rune("abcdefghijklmnopqrstuvwxyz") diff --git a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml index 0d36b4c874..d5dbf0befb 100644 --- a/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml +++ b/.buildkite/e2e/pipeline-gen/pipeline.tpl.yaml @@ -19,7 +19,6 @@ steps: commands: - .buildkite/scripts/test/set-deployer-config.sh - {{- if $test.Dind }} - make -C .buildkite TARGET="run-deployer" ci {{- else }} @@ -29,9 +28,9 @@ steps: agents: {{- if $test.Dind }} provider: "gcp" - image: "family/core-ubuntu-2004" - {{- if eq $test.Provider "kind" }} - machineType: "{{ .KindAgentsMachineType }}" + image: "family/core-ubuntu-2204" + {{- if or (eq $test.Provider "kind") (eq $test.Provider "k3d") }} + machineType: "{{ $.KindAgentsMachineType }}" {{- end }} {{- else }} image: docker.elastic.co/ci-agent-images/cloud-k8s-operator/buildkite-agent:c39fad65 @@ -71,6 +70,10 @@ steps: {{- $deployerCommand = "set-kubeconfig" }} {{- end }} + # Check k3s configuration + - wget https://github.com/k3s-io/k3s/releases/download/v1.33.3%2Bk3s1/k3s && chmod +x k3s + - ./k3s check-config + {{- if $test.Dind }} - make -C .buildkite TARGET="{{ $deployerCommand }} e2e-run" ci {{- else }} @@ -80,8 +83,8 @@ steps: agents: {{- if $test.Dind }} provider: "gcp" - image: "family/core-ubuntu-2004" - {{- if eq $test.Provider "kind" }} + image: "family/core-ubuntu-2204" + {{- if or (eq $test.Provider "kind") (eq $test.Provider "k3d") }} machineType: "{{ $.KindAgentsMachineType }}" diskSizeGb: 150 {{- end }} @@ -119,7 +122,11 @@ steps: soft_fail: true commands: - .buildkite/scripts/test/set-deployer-config.sh + {{- if eq $test.Provider "k3d" }} + - "wget -q -O - https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | TAG=v5.8.3 bash" + {{- end }} {{- if not $test.Dind }} + - "wget -q -O - https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | TAG=v5.8.3 bash" - make run-deployer agents: image: docker.elastic.co/ci-agent-images/cloud-k8s-operator/buildkite-agent:c39fad65 @@ -128,7 +135,7 @@ steps: - make -C .buildkite TARGET="run-deployer" ci agents: provider: "gcp" - image: "family/core-ubuntu-2004" + image: "family/core-ubuntu-2204" {{- end }} {{- end }} diff --git a/.buildkite/e2e/release-branch-matrix.yaml b/.buildkite/e2e/release-branch-matrix.yaml index ddac752c55..eae69fdd2e 100644 --- a/.buildkite/e2e/release-branch-matrix.yaml +++ b/.buildkite/e2e/release-branch-matrix.yaml @@ -1,4 +1,3 @@ - - label: stack fixed: E2E_PROVIDER: gke @@ -41,6 +40,12 @@ - DEPLOYER_KIND_NODE_IMAGE: kindest/node:v1.33.1@sha256:050072256b9a903bd914c0b2866828150cb229cea0efe5892e2b644d5dd3b34f DEPLOYER_KIND_IP_FAMILY: ipv6 +- label: k3d + fixed: + E2E_PROVIDER: k3d + mixed: + - DEPLOYER_K3D_NODE_IMAGE: "rancher/k3s:v1.33.3+k3s1" + - label: gke fixed: E2E_PROVIDER: gke diff --git a/.buildkite/scripts/test/set-deployer-config.sh b/.buildkite/scripts/test/set-deployer-config.sh index 4cbd55e14b..07c7d64b56 100755 --- a/.buildkite/scripts/test/set-deployer-config.sh +++ b/.buildkite/scripts/test/set-deployer-config.sh @@ -25,7 +25,7 @@ ROOT="$WD/../../.." w() { echo "$@" >> "$ROOT/deployer-config.yml"; } -write_deployer_config() { +write_deployer_config() { :> "$ROOT/deployer-config.yml" w "id: ${E2E_PROVIDER}-ci" @@ -36,7 +36,7 @@ write_deployer_config() { w " operation: ${DEPLOYER_OPERATION:-create}" w " clusterName: ${CLUSTER_NAME}" - # k8s version for ocp, kind + # k8s version for ocp, kind if [[ "${DEPLOYER_CLIENT_VERSION:-}" != "" ]]; then w ' clientVersion: "'"${DEPLOYER_CLIENT_VERSION}"'"' fi @@ -58,6 +58,11 @@ write_deployer_config() { w " nodeImage: ${DEPLOYER_KIND_NODE_IMAGE}" w " ipFamily: ${DEPLOYER_KIND_IP_FAMILY:-ipv4}" fi + + if [[ "${DEPLOYER_K3D_NODE_IMAGE:-}" ]]; then + w " k3d:" + w " nodeImage: ${DEPLOYER_K3D_NODE_IMAGE}" + fi } write_deployer_config diff --git a/hack/deployer/cmd/create.go b/hack/deployer/cmd/create.go index 8babc6cfa5..ac6c03ad68 100644 --- a/hack/deployer/cmd/create.go +++ b/hack/deployer/cmd/create.go @@ -61,6 +61,8 @@ func CreateCommand() *cobra.Command { cfgData = fmt.Sprintf(runner.DefaultEKSRunConfigTemplate, user, vaultAddr, token) case runner.KindDriverID: cfgData = fmt.Sprintf(runner.DefaultKindRunConfigTemplate, user) + case runner.K3dDriverID: + cfgData = fmt.Sprintf(runner.DefaultK3dRunConfigTemplate, user) default: return fmt.Errorf("unknown provider %s", provider) } diff --git a/hack/deployer/config/plans.yml b/hack/deployer/config/plans.yml index 931c688fa9..0e5f686b2e 100644 --- a/hack/deployer/config/plans.yml +++ b/hack/deployer/config/plans.yml @@ -1,173 +1,193 @@ plans: -- id: gke-ci - operation: create - clusterName: ci - provider: gke - kubernetesVersion: 1.33 - machineType: n1-standard-8 - serviceAccount: true - enforceSecurityPolicies: true - # use kustomize in GKE to remove the NVMe provisioning already taken care of by the platform - diskSetup: kubectl apply -k hack/deployer/config/local-disks - gke: - region: us-central1 - localSsdCount: 1 - nodeCountPerZone: 1 - gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append -- id: gke-autopilot-ci - operation: create - clusterName: ci-autopilot - provider: gke - kubernetesVersion: 1.33 - serviceAccount: true - enforceSecurityPolicies: true - # this is disabled in autopilot: container provisioner is privileged; not allowed in Autopilot - # diskSetup: kubectl apply -k hack/deployer/config/local-disks - gke: - autopilot: true - region: us-central1 - gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append -- id: gke-dev - operation: create - clusterName: dev - provider: gke - kubernetesVersion: 1.33 - machineType: n1-standard-8 - serviceAccount: false - enforceSecurityPolicies: true - gke: - region: europe-west1 - localSsdCount: 1 - nodeCountPerZone: 1 - # Uncomment option below to enable network policy enforcement in GKE. - # networkPolicy: true - # Uncomment option below to create a private GKE cluster. - # Note that when a cluster is private you must: - # 1. Create a firewall rule so that the webhook can be accessed from the API server (see https://github.com/elastic/cloud-on-k8s/issues/1673#issuecomment-528449682) - # 2. Create a VM to access the subnet and authorize the VM to access the master, see https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#private_master - # private: true - # gke creates a secondary IP range for all Pods of a cluster - # gke defaults to a /14 subnet, which allows 262k Pods per cluster, but only 62 subnets to be created - # /20 allows 4094 subnets, with up to 4094 IPs (Pods) per subnet - # more clusters can therefore be created in the same VPC network. - # we set a default of /20 that can be overridden here - # clusterIpv4Cidr: /20 - # servicesIpv4Cidr: /20 - gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append -- id: gke-autopilot-dev - operation: create - clusterName: dev-autopilot - provider: gke - kubernetesVersion: 1.33 - serviceAccount: false - enforceSecurityPolicies: true - gke: - autopilot: true - region: europe-west1 - gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append -- id: aks-ci - operation: create - clusterName: ci - provider: aks - kubernetesVersion: 1.32.4 - machineType: Standard_D8s_v3 - serviceAccount: true - enforceSecurityPolicies: true - diskSetup: kubectl apply -k hack/deployer/config/local-disks - aks: - nodeCount: 3 - location: westeurope - zones: "1 2 3" -- id: aks-dev - operation: create - clusterName: dev - provider: aks - kubernetesVersion: 1.32.4 - machineType: Standard_D8s_v3 - serviceAccount: false - enforceSecurityPolicies: true - aks: - nodeCount: 3 - location: northeurope - zones: "1 2 3" -- id: ocp-ci - operation: create - clusterName: ci - clientVersion: 4.19.2 - provider: ocp - machineType: n1-standard-8 - serviceAccount: true - ocp: - region: europe-west6 - nodeCount: 3 -- id: ocp-dev - operation: create - clusterName: dev - clientVersion: 4.19.2 - provider: ocp - machineType: n1-standard-8 - serviceAccount: true - ocp: - region: europe-west1 - nodeCount: 3 -- id: eks-ci - operation: create - clusterName: ci - provider: eks - machineType: c5d.2xlarge - serviceAccount: false - enforceSecurityPolicies: true - kubernetesVersion: 1.33 - diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml - eks: - region: ap-northeast-3 - nodeCount: 3 - nodeAMI: auto -- id: eks-arm-ci - operation: create - clusterName: arm-ci - provider: eks - machineType: m6gd.2xlarge - serviceAccount: false - enforceSecurityPolicies: true - kubernetesVersion: 1.33 - diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml - eks: - region: eu-west-1 - nodeCount: 3 - nodeAMI: auto -- id: eks-dev - operation: create - clusterName: dev - provider: eks - machineType: c5d.2xlarge - serviceAccount: false - kubernetesVersion: 1.33 - diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml - enforceSecurityPolicies: true - eks: - region: eu-west-2 - nodeCount: 3 - nodeAMI: auto -- id: kind-dev - operation: create - clusterName: eck - clientVersion: 0.29.0 - provider: kind - kubernetesVersion: 1.31.1 - enforceSecurityPolicies: true - kind: - nodeCount: 3 - nodeImage: kindest/node:v1.33.1@sha256:050072256b9a903bd914c0b2866828150cb229cea0efe5892e2b644d5dd3b34f - ipFamily: ipv4 -- id: kind-ci - operation: create - clusterName: kind-ci - clientVersion: 0.29.0 - provider: kind - kubernetesVersion: 1.33.1 - enforceSecurityPolicies: true - kind: - nodeCount: 3 - nodeImage: kindest/node:v1.33.1@sha256:050072256b9a903bd914c0b2866828150cb229cea0efe5892e2b644d5dd3b34f - ipFamily: ipv4 + - id: gke-ci + operation: create + clusterName: ci + provider: gke + kubernetesVersion: 1.33 + machineType: n1-standard-8 + serviceAccount: true + enforceSecurityPolicies: true + # use kustomize in GKE to remove the NVMe provisioning already taken care of by the platform + diskSetup: kubectl apply -k hack/deployer/config/local-disks + gke: + region: us-central1 + localSsdCount: 1 + nodeCountPerZone: 1 + gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append + - id: gke-autopilot-ci + operation: create + clusterName: ci-autopilot + provider: gke + kubernetesVersion: 1.33 + serviceAccount: true + enforceSecurityPolicies: true + # this is disabled in autopilot: container provisioner is privileged; not allowed in Autopilot + # diskSetup: kubectl apply -k hack/deployer/config/local-disks + gke: + autopilot: true + region: us-central1 + gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append + - id: gke-dev + operation: create + clusterName: dev + provider: gke + kubernetesVersion: 1.33 + machineType: n1-standard-8 + serviceAccount: false + enforceSecurityPolicies: true + gke: + region: europe-west1 + localSsdCount: 1 + nodeCountPerZone: 1 + # Uncomment option below to enable network policy enforcement in GKE. + # networkPolicy: true + # Uncomment option below to create a private GKE cluster. + # Note that when a cluster is private you must: + # 1. Create a firewall rule so that the webhook can be accessed from the API server (see https://github.com/elastic/cloud-on-k8s/issues/1673#issuecomment-528449682) + # 2. Create a VM to access the subnet and authorize the VM to access the master, see https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#private_master + # private: true + # gke creates a secondary IP range for all Pods of a cluster + # gke defaults to a /14 subnet, which allows 262k Pods per cluster, but only 62 subnets to be created + # /20 allows 4094 subnets, with up to 4094 IPs (Pods) per subnet + # more clusters can therefore be created in the same VPC network. + # we set a default of /20 that can be overridden here + # clusterIpv4Cidr: /20 + # servicesIpv4Cidr: /20 + gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append + - id: gke-autopilot-dev + operation: create + clusterName: dev-autopilot + provider: gke + kubernetesVersion: 1.33 + serviceAccount: false + enforceSecurityPolicies: true + gke: + autopilot: true + region: europe-west1 + gcpScopes: https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append + - id: aks-ci + operation: create + clusterName: ci + provider: aks + kubernetesVersion: 1.32.4 + machineType: Standard_D8s_v3 + serviceAccount: true + enforceSecurityPolicies: true + diskSetup: kubectl apply -k hack/deployer/config/local-disks + aks: + nodeCount: 3 + location: westeurope + zones: "1 2 3" + - id: aks-dev + operation: create + clusterName: dev + provider: aks + kubernetesVersion: 1.32.4 + machineType: Standard_D8s_v3 + serviceAccount: false + enforceSecurityPolicies: true + aks: + nodeCount: 3 + location: northeurope + zones: "1 2 3" + - id: ocp-ci + operation: create + clusterName: ci + clientVersion: 4.19.2 + provider: ocp + machineType: n1-standard-8 + serviceAccount: true + ocp: + region: europe-west6 + nodeCount: 3 + - id: ocp-dev + operation: create + clusterName: dev + clientVersion: 4.19.2 + provider: ocp + machineType: n1-standard-8 + serviceAccount: true + ocp: + region: europe-west1 + nodeCount: 3 + - id: eks-ci + operation: create + clusterName: ci + provider: eks + machineType: c5d.2xlarge + serviceAccount: false + enforceSecurityPolicies: true + kubernetesVersion: 1.33 + diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml + eks: + region: ap-northeast-3 + nodeCount: 3 + nodeAMI: auto + - id: eks-arm-ci + operation: create + clusterName: arm-ci + provider: eks + machineType: m6gd.2xlarge + serviceAccount: false + enforceSecurityPolicies: true + kubernetesVersion: 1.33 + diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml + eks: + region: eu-west-1 + nodeCount: 3 + nodeAMI: auto + - id: eks-dev + operation: create + clusterName: dev + provider: eks + machineType: c5d.2xlarge + serviceAccount: false + kubernetesVersion: 1.33 + diskSetup: kubectl apply -f hack/deployer/config/local-disks/ssd-provisioner.yaml + enforceSecurityPolicies: true + eks: + region: eu-west-2 + nodeCount: 3 + nodeAMI: auto + - id: kind-dev + operation: create + clusterName: eck + clientVersion: 0.29.0 + provider: kind + kubernetesVersion: 1.31.1 + enforceSecurityPolicies: true + kind: + nodeCount: 3 + nodeImage: kindest/node:v1.33.1@sha256:050072256b9a903bd914c0b2866828150cb229cea0efe5892e2b644d5dd3b34f + ipFamily: ipv4 + - id: kind-ci + operation: create + clusterName: kind-ci + clientVersion: 0.29.0 + provider: kind + kubernetesVersion: 1.33.1 + enforceSecurityPolicies: true + kind: + nodeCount: 3 + nodeImage: kindest/node:v1.33.1@sha256:050072256b9a903bd914c0b2866828150cb229cea0efe5892e2b644d5dd3b34f + ipFamily: ipv4 + - id: k3d-dev + operation: create + clusterName: eck + clientVersion: v5.8.3 + provider: k3d + kubernetesVersion: 1.33.1 + k3d: + nodeCount: 3 + clientImage: ghcr.io/k3d-io/k3d:5.8.3 + nodeImage: rancher/k3s:v1.33.2-k3s1 + - id: k3d-ci + operation: create + clusterName: k3d-ci + clientVersion: v5.8.3 + provider: k3d + kubernetesVersion: 1.33.1 + k3d: + nodeCount: 3 + clientImage: ghcr.io/k3d-io/k3d:5.8.3 + nodeImage: rancher/k3s:v1.33.2-k3s1 diff --git a/hack/deployer/runner/k3d.go b/hack/deployer/runner/k3d.go new file mode 100644 index 0000000000..8bf302dd4e --- /dev/null +++ b/hack/deployer/runner/k3d.go @@ -0,0 +1,171 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +package runner + +import ( + "fmt" + "io/fs" + "os" + "path/filepath" + "runtime" + "time" + + "github.com/elastic/cloud-on-k8s/v3/hack/deployer/exec" + "github.com/elastic/cloud-on-k8s/v3/hack/deployer/runner/env" + "github.com/elastic/cloud-on-k8s/v3/pkg/utils/vault" +) + +const ( + K3dDriverID = "k3d" + + DefaultK3dRunConfigTemplate = `id: k3d-dev +overrides: + clusterName: %s-dev-cluster +` +) + +func init() { + drivers[K3dDriverID] = &K3dDriverFactory{} +} + +type K3dDriverFactory struct{} + +var _ DriverFactory = &K3dDriverFactory{} + +func (k K3dDriverFactory) Create(plan Plan) (Driver, error) { + return &K3dDriver{ + plan: plan, + vaultClient: vault.NewClientProvider(), + clientImage: plan.K3d.ClientImage, + nodeImage: plan.K3d.NodeImage, + }, nil +} + +type K3dDriver struct { + plan Plan + clientImage string + vaultClient vault.ClientProvider + nodeImage string +} + +func (k *K3dDriver) Execute() error { + switch k.plan.Operation { + case CreateAction: + return k.create() + case DeleteAction: + return k.delete() + } + return nil +} + +func (k *K3dDriver) create() error { + // Delete any previous e2e k3d cluster with the same name + err := k.delete() + if err != nil { + return err + } + + cmd := k.cmd("cluster", "create", "--image", k.plan.K3d.NodeImage) + if cmd == nil { + return fmt.Errorf("failed to create k3d cluster") + } + err = cmd.Run() + if err != nil { + return err + } + + // Get kubeconfig from k3d + kubeCfg, err := k.getKubeConfig() + if err != nil { + return err + } + defer os.Remove(kubeCfg.Name()) + + // Delete standard storage class but ignore error if not found + if err := kubectl("--kubeconfig", kubeCfg.Name(), "delete", "storageclass", "standard"); err != nil { + return err + } + + tmpStorageClass, err := k.createTmpStorageClass() + if err != nil { + return err + } + + return kubectl("--kubeconfig", kubeCfg.Name(), "apply", "-f", tmpStorageClass) +} + +func (k *K3dDriver) delete() error { + return k.cmd("cluster", "delete").Run() +} + +func (k *K3dDriver) cmd(args ...string) *exec.Command { + params := map[string]interface{}{ + "ClusterName": k.plan.ClusterName, + "SharedVolume": env.SharedVolumeName(), + "K3dClientImage": k.clientImage, + "K3dNodeImage": k.nodeImage, + "Args": args, + } + + // on macOS, the docker socket is located in $HOME + dockerSocket := "/var/run/docker.sock" + if runtime.GOOS == "darwin" { + dockerSocket = "$HOME/.docker/run/docker.sock" + } + // We need the docker socket so that kind can bootstrap + // --userns=host to support Docker daemon host configured to run containers only in user namespaces + command := `docker run --rm \ + --userns=host \ + -v {{.SharedVolume}}:/home \ + -v /var/run/docker.sock:` + dockerSocket + ` \ + -e HOME=/home \ + -e PATH=/ \ + {{.K3dClientImage}} \ + {{Join .Args " "}} {{.ClusterName}}` + cmd := exec.NewCommand(command) + cmd = cmd.AsTemplate(params) + return cmd +} + +func (k *K3dDriver) getKubeConfig() (*os.File, error) { + // Get kubeconfig from kind + output, err := k.cmd("kubeconfig", "get").WithoutStreaming().Output() + if err != nil { + return nil, err + } + + // Persist kubeconfig for reliability in following kubectl commands + kubeCfg, err := os.CreateTemp("", "kubeconfig") + if err != nil { + return nil, err + } + + _, err = kubeCfg.Write([]byte(output)) + if err != nil { + return nil, err + } + return kubeCfg, nil +} + +func (k *K3dDriver) GetCredentials() error { + config, err := k.getKubeConfig() + if err != nil { + return err + } + defer os.Remove(config.Name()) + return mergeKubeconfig(config.Name()) +} + +func (k *K3dDriver) createTmpStorageClass() (string, error) { + tmpFile := filepath.Join(os.Getenv("HOME"), storageClassFileName) + err := os.WriteFile(tmpFile, []byte(storageClass), fs.ModePerm) + return tmpFile, err +} + +func (k *K3dDriver) Cleanup(string, time.Duration) error { + return fmt.Errorf("unimplemented") +} + +var _ Driver = &K3dDriver{} diff --git a/hack/deployer/runner/settings.go b/hack/deployer/runner/settings.go index e29f956930..76ed6396c4 100644 --- a/hack/deployer/runner/settings.go +++ b/hack/deployer/runner/settings.go @@ -32,6 +32,7 @@ type Plan struct { Ocp *OCPSettings `yaml:"ocp,omitempty"` Eks *EKSSettings `yaml:"eks,omitempty"` Kind *KindSettings `yaml:"kind,omitempty"` + K3d *K3dSettings `yaml:"k3d,omitempty"` ServiceAccount bool `yaml:"serviceAccount"` EnforceSecurityPolicies bool `yaml:"enforceSecurityPolicies"` DiskSetup string `yaml:"diskSetup"` @@ -86,6 +87,12 @@ type KindSettings struct { IPFamily string `yaml:"ipFamily"` } +type K3dSettings struct { + ClientImage string `yaml:"clientImage"` + NodeCount int `yaml:"nodeCount"` + NodeImage string `yaml:"nodeImage"` +} + // RunConfig encapsulates Id used to choose a plan and a map of overrides to apply to the plan, expected to map to a file type RunConfig struct { Id string `yaml:"id"` //nolint:revive diff --git a/test/e2e/test/elasticsearch/checks_es.go b/test/e2e/test/elasticsearch/checks_es.go index d6e4bf7dc5..d0966f3542 100644 --- a/test/e2e/test/elasticsearch/checks_es.go +++ b/test/e2e/test/elasticsearch/checks_es.go @@ -397,13 +397,18 @@ func compareCgroupCPULimit(topologyElement esv1.NodeSet, nodeStats client.NodeSt expectedCPULimit = c.Resources.Limits.Cpu() } } + + fmt.Printf("ES manifest cpu limit: %v\n", expectedCPULimit) if expectedCPULimit == nil || expectedCPULimit.IsZero() { // no expected cpu, consider it's ok return nil } cgroupCPU := nodeStats.OS.CGroup.CPU + fmt.Printf("ES returned cgroup cfs_period_micros %d\n", cgroupCPU.CFSPeriodMicros) + fmt.Printf("ES returned cgroup cfs_quota_micros %d\n", cgroupCPU.CFSQuotaMicros) actualCgroupCPULimit := float64(cgroupCPU.CFSQuotaMicros) / float64(cgroupCPU.CFSPeriodMicros) + fmt.Printf("ES calculated cgroup cpu limit: %v\n", actualCgroupCPULimit) if expectedCPULimit.AsApproximateFloat64() != actualCgroupCPULimit { return fmt.Errorf("expected cgroup CPU limit [%f], got [%f]", expectedCPULimit.AsApproximateFloat64(), actualCgroupCPULimit) } diff --git a/test/e2e/test/run_mutation.go b/test/e2e/test/run_mutation.go index 6899517224..3c8a035f00 100644 --- a/test/e2e/test/run_mutation.go +++ b/test/e2e/test/run_mutation.go @@ -5,7 +5,24 @@ package test import ( + "errors" + "fmt" + "os" + "path" + "strings" "testing" + + "k8s.io/apimachinery/pkg/labels" + k8sclient "sigs.k8s.io/controller-runtime/pkg/client" + + commonv1 "github.com/elastic/cloud-on-k8s/v3/pkg/apis/common/v1" + "github.com/elastic/cloud-on-k8s/v3/pkg/controller/elasticsearch/label" + "github.com/elastic/cloud-on-k8s/v3/pkg/utils/k8s" + // testes "github.com/elastic/cloud-on-k8s/v3/test/e2e/test/elasticsearch" +) + +var ( + printed bool ) // RunMutations tests resources changes on given resources. @@ -22,6 +39,74 @@ func RunMutations(t *testing.T, creationBuilders []Builder, mutationBuilders []B for _, toCreate := range creationBuilders { steps = steps.WithSteps(toCreate.CreationTestSteps(k)) } + steps = steps.WithSteps(StepList{ + { + Name: "get cgroup information from elasticsearch", + Test: Eventually(func() error { + ctx := Ctx() + namespace := fmt.Sprintf("%s-%s", ctx.TestRun, "mercury") + listOptions := k8sclient.ListOptions{ + Namespace: namespace, + LabelSelector: labels.SelectorFromSet(labels.Set{ + commonv1.TypeLabelName: label.Type, + }), + } + pods, err := k.GetPods(&listOptions) + if err != nil { + return err + } + if len(pods) == 0 { + return errors.New("no pods found") + } + + // exec into the pod to list keystore entries + stdout, _, err := k.Exec(k8s.ExtractNamespacedName(&pods[0]), + []string{"cat", "/proc/1/cgroup"}) + if err != nil { + return err + } + var cpuAcctData string + if !printed { + fmt.Printf("cgroup data: %s", stdout) + } + for _, line := range strings.Split(stdout, "\n") { + for _, controlGroup := range strings.Split(line, ":") { + if strings.Contains(controlGroup, "cpuacct") { + cpuAcctData = strings.Split(line, ":")[2] + } + } + } + if !printed { + fullPath := path.Join("/sys/fs/cgroup/cpu,cpuacct", cpuAcctData, "cpuacct.usage") + fmt.Printf("cpuacct data full path: %s\n", fullPath) + if _, err := os.Stat(fullPath); err != nil { + fmt.Printf("cpuacct.usage file err: %s\n", err) + printed = true + } + + fmt.Printf("cpuacct.usage file exists\n") + + stdout, _, err = k.Exec(k8s.ExtractNamespacedName(&pods[0]), + []string{"find", "/sys/fs/cgroup", "-ls"}) + if err != nil { + fmt.Printf("find /sys/fs/cgroup err: %s\n", err) + } + + fmt.Printf("full /sys/fs/cgroup output: %s\n", stdout) + + stdout, _, err = k.Exec(k8s.ExtractNamespacedName(&pods[0]), + []string{"cat", "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_quota_us"}) + if err != nil { + fmt.Printf("cpu.cfs_quota_us file err: %s\n", err) + } + + fmt.Printf("cpu.cfs_quota_us: %s\n", stdout) + } + + printed = true + return nil + }), + }}) for _, toCreate := range creationBuilders { steps = steps.WithSteps(CheckTestSteps(toCreate, k)) }