Skip to content

feat: refactor AWX CronJobs as StatefulSets #195

feat: refactor AWX CronJobs as StatefulSets

feat: refactor AWX CronJobs as StatefulSets #195

name: Perform integration smoke tests for SRE incidents
on:
pull_request:
branches:
- main
paths:
- sre/dev/local_cluster/go.*
- sre/playbooks/**
- sre/roles/applications/**
- sre/roles/faults/**
- sre/roles/incidents/**
- sre/roles/tools/**
- sre/tools/kubernetes-topology-monitor/charts/kubernetes-topology-monitor/**
concurrency:
group: ci-integration-${{ github.ref }}
cancel-in-progress: true
jobs:
chaos-mesh:
name: Chaos Mesh Smoke Tests
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/chaos-mesh.yaml sre/group_vars/environment/tools.yaml
- name: Run installation smoke test
run: |
make -C sre deploy_tools
- name: Run re-initialization smoke test
run: |
make -C sre reinit_tools
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_tools
clickhouse:
name: Alinity Clickhouse Smoke Tests
needs:
- ingress
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/clickhouse.yaml sre/group_vars/environment/tools.yaml
- name: Run installation smoke test
run: |
make -C sre deploy_tools
- name: Run re-initialization smoke test
run: |
make -C sre reinit_tools
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_tools
data-recorder-jaeger:
name: Data Recorder (Jaeger) Smoke Tests
needs:
- jaeger
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4.2.2
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/jaeger.yaml sre/group_vars/environment/tools.yaml
- name: Install tools
run: |
make -C sre deploy_tools
- name: Run installation smoke test
run: |
make -C sre deploy_recorders
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_recorders
data-recorder-kubernetes-topology-monitor:
name: Data Recorder (Kubernetes Topology Monitor) Smoke Tests
needs:
- kubernetes-topology-monitor
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4.2.2
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/kubernetes-topology-monitor.yaml sre/group_vars/environment/tools.yaml
- name: Install tools
run: |
make -C sre deploy_tools
- name: Run installation smoke test
run: |
make -C sre deploy_recorders
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_recorders
data-recorder-prometheus:
name: Data Recorder (Prometheus) Smoke Tests
needs:
- prometheus
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4.2.2
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/prometheus.yaml sre/group_vars/environment/tools.yaml
- name: Install tools
run: |
make -C sre deploy_tools
- name: Run installation smoke test
run: |
make -C sre deploy_recorders
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_recorders
ingress:
name: Ingress Smoke Tests
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/ingress.yaml sre/group_vars/environment/tools.yaml
- name: Run installation smoke test
run: |
make -C sre deploy_tools
- name: Run re-initialization smoke test
run: |
make -C sre reinit_tools
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_tools
jaeger:
name: Jaeger Smoke Tests
needs:
- ingress
- opensearch
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/jaeger.yaml sre/group_vars/environment/tools.yaml
- name: Run installation smoke test
run: |
make -C sre deploy_tools
- name: Run re-initialization smoke test
run: |
make -C sre reinit_tools
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_tools
kubernetes-metrics-server:
name: Kubernetes Metrics Server Smoke Tests
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/kubernetes-metrics-server.yaml sre/group_vars/environment/tools.yaml
- name: Run installation smoke test
run: |
make -C sre deploy_tools
- name: Run re-initialization smoke test
run: |
make -C sre reinit_tools
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_tools
kubernetes-topology-monitor:
name: Kubernetes Topology Monitor Smoke Tests
needs:
- ingress
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/kubernetes-topology-monitor.yaml sre/group_vars/environment/tools.yaml
- name: Run installation smoke test
run: |
make -C sre deploy_tools
- name: Run re-initialization smoke test
run: |
make -C sre reinit_tools
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_tools
opencost:
name: OpenCost Smoke Tests
needs:
- ingress
- prometheus
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/opencost.yaml sre/group_vars/environment/tools.yaml
- name: Run installation smoke test
run: |
make -C sre deploy_tools
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_tools
opensearch:
name: OpenSearch Smoke Tests
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/opensearch.yaml sre/group_vars/environment/tools.yaml
- name: Run installation smoke test
run: |
make -C sre deploy_tools
- name: Run re-initialization smoke test
run: |
make -C sre reinit_tools
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_tools
opentelemetry:
name: OpenTelemetry Collector Smoke Tests
needs:
- clickhouse
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/opentelemetry.yaml sre/group_vars/environment/tools.yaml
- name: Run installation smoke test
run: |
make -C sre deploy_tools
- name: Run re-initialization smoke test
run: |
make -C sre reinit_tools
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_tools
otel-demo:
name: OpenTelemetry Demo (Astronomy Shop) Smoke Tests
needs:
- clickhouse
- jaeger
- prometheus
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/otel-demo.yaml sre/group_vars/environment/tools.yaml
- name: Install tools
run: |
INCIDENT_NUMBER=1 make -C sre deploy_tools
- name: Run installation smoke test
run: |
make -C sre deploy_applications
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_applications
prometheus:
name: Prometheus Smoke Tests
needs:
- ingress
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
mv sre/tests/files/prometheus.yaml sre/group_vars/environment/tools.yaml
- name: Run installation smoke test
run: |
make -C sre deploy_tools
- name: Run re-initialization smoke test
run: |
make -C sre reinit_tools
- name: Run uninstallation smoke test
run: |
make -C sre undeploy_tools
sre-incident-3:
name: SRE Incident 3 Smoke Test
needs:
- kubernetes-topology-monitor
- opentelemetry
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=3 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=3 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=3 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=3 make -C sre destroy_environment
sre-incident-16:
name: SRE Incident 16 Smoke Test
needs:
- kubernetes-topology-monitor
- opentelemetry
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=16 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=16 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=16 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=16 make -C sre destroy_environment
sre-incident-20:
name: SRE Incident 20 Smoke Test
needs:
- kubernetes-topology-monitor
- opentelemetry
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=20 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=20 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=20 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=20 make -C sre destroy_environment
sre-incident-23:
name: SRE Incident 23 Smoke Test
needs:
- kubernetes-topology-monitor
- opentelemetry
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=23 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=23 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=23 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=23 make -C sre destroy_environment
sre-incident-26:
name: SRE Incident 26 Smoke Test
needs:
- chaos-mesh
- kubernetes-topology-monitor
- opentelemetry
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=26 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=26 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=26 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=26 make -C sre destroy_environment
sre-incident-30:
name: SRE Incident 30 Smoke Test
needs:
- kubernetes-topology-monitor
- opentelemetry
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=30 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=30 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=30 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=30 make -C sre destroy_environment
sre-incident-31:
name: SRE Incident 31 Smoke Test
needs:
- kubernetes-topology-monitor
- opentelemetry
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=31 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=31 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=31 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=31 make -C sre destroy_environment
sre-incident-33:
name: SRE Incident 33 Smoke Test
needs:
- kubernetes-topology-monitor
- opentelemetry
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=33 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=33 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=33 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=33 make -C sre destroy_environment
sre-incident-34:
name: SRE Incident 34 Smoke Test
needs:
- kubernetes-topology-monitor
- opentelemetry
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=34 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=34 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=34 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=34 make -C sre destroy_environment
sre-incident-102:
name: SRE Incident 102 Smoke Test
needs:
- kubernetes-topology-monitor
- opentelemetry
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=102 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=102 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=102 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=102 make -C sre destroy_environment
sre-incident-105:
name: SRE Incident 105 Smoke Test
needs:
- kubernetes-topology-monitor
- opentelemetry
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=105 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=105 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=105 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=105 make -C sre destroy_environment
finops-incident-37:
name: FinOps Incident 37 Smoke Test
needs:
- kubernetes-metrics-server
- kubernetes-topology-monitor
- opentelemetry
- opencost
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=37 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=37 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=37 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=37 make -C sre destroy_environment
finops-incident-38:
name: FinOps Incident 38 Smoke Test
needs:
- kubernetes-metrics-server
- kubernetes-topology-monitor
- opentelemetry
- opencost
- otel-demo
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v5.0.0
- uses: actions/setup-python@v5.6.0
with:
python-version: '3.12'
- uses: actions/setup-go@v5.5.0
with:
go-version-file: sre/dev/local_cluster/go.mod
cache-dependency-path: sre/dev/local_cluster/go.sum
- uses: azure/setup-helm@v4.3.0
with:
version: v3.18.3
- name: Install Python and Ansible dependencies
run: |
pip install -r sre/requirements.txt
ansible-galaxy install -r sre/requirements.yaml
- name: Create Kind cluster
run: |
make -C sre/dev/local_cluster create_cluster
- name: Create group vars
run: |
make -C sre group_vars
- name: Create environment
run: |
INCIDENT_NUMBER=38 make -C sre create_environment
- name: Test fault injection
run: |
INCIDENT_NUMBER=38 make -C sre inject_incident_fault
- name: Test fault removal
run: |
INCIDENT_NUMBER=38 make -C sre remove_incident_fault
- name: Destroy environment
run: |
INCIDENT_NUMBER=38 make -C sre destroy_environment