From 6783a9f9acace6c0ce48c093186f0a4840b908c6 Mon Sep 17 00:00:00 2001 From: Matthew Christopher Date: Thu, 18 Jul 2024 11:06:40 -0700 Subject: [PATCH] Change how we read configuration Moving from a model where we have to explicitly set each secret in the pod env to a model where we just mount the whole secret into the pod. This has the added benefit of allowing us to autodetect secret changes and restart the pod AND means that users can totally omit the secret now as opposed to having to create the secret with empty values. --- ...ureserviceoperator-controller-manager.yaml | 111 +--------------- .../templates/secret.yaml | 6 + v2/cmd/controller/app/setup.go | 25 +++- v2/config/manager/manager_image_patch.yaml | 113 ++--------------- v2/internal/config/vars.go | 118 ++++++++++++------ v2/internal/config/vars_internal_test.go | 6 +- v2/internal/config/watcher.go | 81 ++++++++++++ 7 files changed, 206 insertions(+), 254 deletions(-) create mode 100644 v2/internal/config/watcher.go diff --git a/v2/charts/azure-service-operator/templates/apps_v1_deployment_azureserviceoperator-controller-manager.yaml b/v2/charts/azure-service-operator/templates/apps_v1_deployment_azureserviceoperator-controller-manager.yaml index 974b73f641d..42df349e5d5 100644 --- a/v2/charts/azure-service-operator/templates/apps_v1_deployment_azureserviceoperator-controller-manager.yaml +++ b/v2/charts/azure-service-operator/templates/apps_v1_deployment_azureserviceoperator-controller-manager.yaml @@ -67,111 +67,6 @@ spec: - --webhook-cert-dir={{ .Values.webhook.certDir }} {{- end }} env: - - name: AZURE_CLIENT_ID - valueFrom: - secretKeyRef: - key: AZURE_CLIENT_ID - name: aso-controller-settings - - name: AZURE_CLIENT_SECRET - valueFrom: - secretKeyRef: - key: AZURE_CLIENT_SECRET - name: aso-controller-settings - optional: true - - name: AZURE_TENANT_ID - valueFrom: - secretKeyRef: - key: AZURE_TENANT_ID - name: aso-controller-settings - - name: AZURE_SUBSCRIPTION_ID - valueFrom: - secretKeyRef: - key: AZURE_SUBSCRIPTION_ID - name: aso-controller-settings - - name: AZURE_CLIENT_CERTIFICATE - valueFrom: - secretKeyRef: - key: AZURE_CLIENT_CERTIFICATE - name: aso-controller-settings - optional: true - - name: AZURE_CLIENT_CERTIFICATE_PASSWORD - valueFrom: - secretKeyRef: - key: AZURE_CLIENT_CERTIFICATE_PASSWORD - name: aso-controller-settings - optional: true - - name: AZURE_AUTHORITY_HOST - valueFrom: - secretKeyRef: - key: AZURE_AUTHORITY_HOST - name: aso-controller-settings - optional: true - - name: AZURE_RESOURCE_MANAGER_ENDPOINT - valueFrom: - secretKeyRef: - key: AZURE_RESOURCE_MANAGER_ENDPOINT - name: aso-controller-settings - optional: true - - name: AZURE_RESOURCE_MANAGER_AUDIENCE - valueFrom: - secretKeyRef: - key: AZURE_RESOURCE_MANAGER_AUDIENCE - name: aso-controller-settings - optional: true - - name: AZURE_TARGET_NAMESPACES - valueFrom: - secretKeyRef: - key: AZURE_TARGET_NAMESPACES - name: aso-controller-settings - optional: true - - name: AZURE_OPERATOR_MODE - valueFrom: - secretKeyRef: - key: AZURE_OPERATOR_MODE - name: aso-controller-settings - optional: true - - name: AZURE_SYNC_PERIOD - valueFrom: - secretKeyRef: - key: AZURE_SYNC_PERIOD - name: aso-controller-settings - optional: true - - name: USE_WORKLOAD_IDENTITY_AUTH - valueFrom: - secretKeyRef: - key: USE_WORKLOAD_IDENTITY_AUTH - name: aso-controller-settings - optional: true - - name: AZURE_USER_AGENT_SUFFIX - valueFrom: - secretKeyRef: - key: AZURE_USER_AGENT_SUFFIX - name: aso-controller-settings - optional: true - - name: MAX_CONCURRENT_RECONCILES - valueFrom: - secretKeyRef: - key: MAX_CONCURRENT_RECONCILES - name: aso-controller-settings - optional: true - - name: RATE_LIMIT_MODE - valueFrom: - secretKeyRef: - key: RATE_LIMIT_MODE - name: aso-controller-settings - optional: true - - name: RATE_LIMIT_QPS - valueFrom: - secretKeyRef: - key: RATE_LIMIT_QPS - name: aso-controller-settings - optional: true - - name: RATE_LIMIT_BUCKET_SIZE - valueFrom: - secretKeyRef: - key: RATE_LIMIT_BUCKET_SIZE - name: aso-controller-settings - optional: true - name: POD_NAMESPACE valueFrom: fieldRef: @@ -216,6 +111,9 @@ spec: name: cert readOnly: true {{- end }} + - name: settings-volume + readOnly: true + mountPath: "/etc/aso-controller-settings" {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} @@ -244,3 +142,6 @@ spec: audience: api://AzureADTokenExchange expirationSeconds: 3600 path: azure-identity + - name: settings-volume + secret: + secretName: aso-controller-settings diff --git a/v2/charts/azure-service-operator/templates/secret.yaml b/v2/charts/azure-service-operator/templates/secret.yaml index 413f5bcc04d..9b00db79d97 100644 --- a/v2/charts/azure-service-operator/templates/secret.yaml +++ b/v2/charts/azure-service-operator/templates/secret.yaml @@ -6,9 +6,15 @@ metadata: namespace: {{.Release.Namespace}} type: Opaque data: + {{- if .Values.azureSubscriptionID }} AZURE_SUBSCRIPTION_ID: {{ .Values.azureSubscriptionID | b64enc | quote }} + {{- end }} + {{- if .Values.azureTenantID }} AZURE_TENANT_ID: {{ .Values.azureTenantID | b64enc | quote }} + {{- end }} + {{- if .Values.azureClientID }} AZURE_CLIENT_ID: {{ .Values.azureClientID | b64enc | quote }} + {{- end }} {{- if .Values.azureClientSecret }} AZURE_CLIENT_SECRET: {{ .Values.azureClientSecret | b64enc | quote }} {{- end }} diff --git a/v2/cmd/controller/app/setup.go b/v2/cmd/controller/app/setup.go index d93aab0016a..d79117844c0 100644 --- a/v2/cmd/controller/app/setup.go +++ b/v2/cmd/controller/app/setup.go @@ -52,7 +52,21 @@ import ( "github.com/Azure/azure-service-operator/v2/pkg/genruntime/conditions" ) -func SetupControllerManager(ctx context.Context, setupLog logr.Logger, flgs *Flags) manager.Manager { +type ManagerWrapper struct { + mgr manager.Manager + watcher *config.Watcher +} + +func (w *ManagerWrapper) Start(ctx context.Context) error { + err := w.watcher.Start(ctx) + if err != nil { + return errors.Wrap(err, "failed to start config watcher") + } + + return w.mgr.Start(ctx) // This blocks +} + +func SetupControllerManager(ctx context.Context, setupLog logr.Logger, flgs *Flags) ManagerWrapper { scheme := controllers.CreateScheme() _ = apiextensions.AddToScheme(scheme) // Used for managing CRDs @@ -212,7 +226,14 @@ func SetupControllerManager(ctx context.Context, setupLog logr.Logger, flgs *Fla setupLog.Error(err, "Failed setting up readyz check") os.Exit(1) } - return mgr + + // This watches the mounted secret and restart the pod if it changes + configWatcher := config.NewWatcher(setupLog) + + return ManagerWrapper{ + mgr: mgr, + watcher: configWatcher, + } } func getMetricsOpts(flags *Flags) server.Options { diff --git a/v2/config/manager/manager_image_patch.yaml b/v2/config/manager/manager_image_patch.yaml index 983ab04980a..641a5df40c7 100644 --- a/v2/config/manager/manager_image_patch.yaml +++ b/v2/config/manager/manager_image_patch.yaml @@ -8,119 +8,22 @@ spec: spec: nodeSelector: "kubernetes.io/os": linux + volumes: + - name: settings-volume + secret: + secretName: aso-controller-settings containers: # Change the value of image field below to your controller image URL - image: localhost:5000/azureserviceoperator:latest name: manager env: - - name: AZURE_CLIENT_ID - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_CLIENT_ID - - name: AZURE_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_CLIENT_SECRET - optional: true - - name: AZURE_TENANT_ID - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_TENANT_ID - - name: AZURE_SUBSCRIPTION_ID - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_SUBSCRIPTION_ID - - name: AZURE_CLIENT_CERTIFICATE - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_CLIENT_CERTIFICATE - optional: true - - name: AZURE_CLIENT_CERTIFICATE_PASSWORD - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_CLIENT_CERTIFICATE_PASSWORD - optional: true - - name: AZURE_AUTHORITY_HOST - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_AUTHORITY_HOST - optional: true - - name: AZURE_RESOURCE_MANAGER_ENDPOINT - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_RESOURCE_MANAGER_ENDPOINT - optional: true - - name: AZURE_RESOURCE_MANAGER_AUDIENCE - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_RESOURCE_MANAGER_AUDIENCE - optional: true - - name: AZURE_TARGET_NAMESPACES - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_TARGET_NAMESPACES - optional: true - - name: AZURE_OPERATOR_MODE - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_OPERATOR_MODE - optional: true - - name: AZURE_SYNC_PERIOD - valueFrom: - secretKeyRef: - name: aso-controller-settings - key: AZURE_SYNC_PERIOD - optional: true - - name: USE_WORKLOAD_IDENTITY_AUTH - valueFrom: - secretKeyRef: - key: USE_WORKLOAD_IDENTITY_AUTH - name: aso-controller-settings - optional: true - - name: AZURE_USER_AGENT_SUFFIX - valueFrom: - secretKeyRef: - key: AZURE_USER_AGENT_SUFFIX - name: aso-controller-settings - optional: true - - name: MAX_CONCURRENT_RECONCILES - valueFrom: - secretKeyRef: - key: MAX_CONCURRENT_RECONCILES - name: aso-controller-settings - optional: true - - name: RATE_LIMIT_MODE - valueFrom: - secretKeyRef: - key: RATE_LIMIT_MODE - name: aso-controller-settings - optional: true - - name: RATE_LIMIT_QPS - valueFrom: - secretKeyRef: - key: RATE_LIMIT_QPS - name: aso-controller-settings - optional: true - - name: RATE_LIMIT_BUCKET_SIZE - valueFrom: - secretKeyRef: - key: RATE_LIMIT_BUCKET_SIZE - name: aso-controller-settings - optional: true # Used for setting the operator-namespace annotation (and # for aad-pod-identity once we support it). - name: POD_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace + volumeMounts: + - name: settings-volume + readOnly: true + mountPath: "/etc/aso-controller-settings" diff --git a/v2/internal/config/vars.go b/v2/internal/config/vars.go index 74b6357f98b..0dce55b8494 100644 --- a/v2/internal/config/vars.go +++ b/v2/internal/config/vars.go @@ -6,6 +6,7 @@ package config import ( "fmt" "os" + "path/filepath" "strconv" "strings" "time" @@ -16,9 +17,10 @@ import ( "github.com/Azure/azure-service-operator/v2/pkg/common/config" ) +const MountPath = "/etc/aso-controller-settings" + // These are hardcoded because the init function that initializes them in azcore isn't in /cloud it's in /arm which // we don't import. - var ( DefaultEndpoint = "https://management.azure.com" DefaultAudience = "https://management.core.windows.net/" @@ -226,8 +228,48 @@ func (v Values) Cloud() cloud.Configuration { // ReadFromEnvironment loads configuration values from the AZURE_* // environment variables. func ReadFromEnvironment() (Values, error) { + r := newEnvReader() + return read(r) +} + +func ReadFromFile(path string) (Values, error) { + r := newMountedSecretReader(path) + return read(r) +} + +func newEnvReader() *reader { + return &reader{ + lookup: func(s string) (string, bool) { + return os.LookupEnv(s) + }, + } +} + +func newMountedSecretReader(path string) *reader { + return &reader{ + lookup: func(s string) (string, bool) { + fullPath := filepath.Join(path, s) + bytes, err := os.ReadFile(fullPath) + ok := err == nil + + return string(bytes), ok + }, + } +} + +type reader struct { // TODO: SHould this be an interface instead? + // get func(string) string + lookup func(string) (string, bool) +} + +func (r *reader) get(s string) string { + result, _ := r.lookup(s) + return result +} + +func read(r *reader) (Values, error) { var result Values - modeValue := os.Getenv(config.OperatorMode) + modeValue := r.get(config.OperatorMode) if modeValue == "" { result.OperatorMode = OperatorModeBoth } else { @@ -240,36 +282,36 @@ func ReadFromEnvironment() (Values, error) { var err error - result.SubscriptionID = os.Getenv(config.AzureSubscriptionID) - result.PodNamespace = os.Getenv(config.PodNamespace) - result.TargetNamespaces = parseTargetNamespaces(os.Getenv(config.TargetNamespaces)) - result.SyncPeriod, err = parseSyncPeriod() + result.SubscriptionID = r.get(config.AzureSubscriptionID) + result.PodNamespace = os.Getenv(config.PodNamespace) // This is always from env + result.TargetNamespaces = parseTargetNamespaces(r.get(config.TargetNamespaces)) + result.SyncPeriod, err = parseSyncPeriod(r) if err != nil { return result, errors.Wrapf(err, "parsing %q", config.SyncPeriod) } - result.ResourceManagerEndpoint = envOrDefault(config.ResourceManagerEndpoint, DefaultEndpoint) - result.ResourceManagerAudience = envOrDefault(config.ResourceManagerAudience, DefaultAudience) - result.AzureAuthorityHost = envOrDefault(config.AzureAuthorityHost, DefaultAADAuthorityHost) - result.ClientID = os.Getenv(config.AzureClientID) - result.TenantID = os.Getenv(config.AzureTenantID) - result.MaxConcurrentReconciles, err = envParseOrDefault(config.MaxConcurrentReconciles, DefaultMaxConcurrentReconciles) + result.ResourceManagerEndpoint = readOrDefault(r, config.ResourceManagerEndpoint, DefaultEndpoint) + result.ResourceManagerAudience = readOrDefault(r, config.ResourceManagerAudience, DefaultAudience) + result.AzureAuthorityHost = readOrDefault(r, config.AzureAuthorityHost, DefaultAADAuthorityHost) + result.ClientID = r.get(config.AzureClientID) + result.TenantID = r.get(config.AzureTenantID) + result.MaxConcurrentReconciles, err = readAndParseOrDefault(r, config.MaxConcurrentReconciles, DefaultMaxConcurrentReconciles) if err != nil { return result, err } // Ignoring error here, as any other value or empty value means we should default to false - result.UseWorkloadIdentityAuth, _ = strconv.ParseBool(os.Getenv(config.UseWorkloadIdentityAuth)) - result.UserAgentSuffix = os.Getenv(config.UserAgentSuffix) - result.RateLimit.Mode, err = ParseRateLimitMode(envOrDefault(config.RateLimitMode, string(RateLimitModeDisabled))) + result.UseWorkloadIdentityAuth, _ = strconv.ParseBool(r.get(config.UseWorkloadIdentityAuth)) + result.UserAgentSuffix = r.get(config.UserAgentSuffix) + result.RateLimit.Mode, err = ParseRateLimitMode(readOrDefault(r, config.RateLimitMode, string(RateLimitModeDisabled))) if err != nil { return result, err } - result.RateLimit.QPS, err = envParseOrDefault(config.RateLimitQPS, 5.0) + result.RateLimit.QPS, err = readAndParseOrDefault(r, config.RateLimitQPS, 5.0) if err != nil { return result, err } - result.RateLimit.BucketSize, err = envParseOrDefault(config.RateLimitBucketSize, 100) + result.RateLimit.BucketSize, err = readAndParseOrDefault(r, config.RateLimitBucketSize, 100) if err != nil { return result, err } @@ -282,7 +324,7 @@ func ReadFromEnvironment() (Values, error) { // ReadAndValidate loads the configuration values and checks that // they're consistent. func ReadAndValidate() (Values, error) { - result, err := ReadFromEnvironment() + result, err := ReadFromFile(MountPath) if err != nil { return Values{}, err } @@ -322,8 +364,8 @@ func parseTargetNamespaces(fromEnv string) []string { } // parseSyncPeriod parses the sync period from the environment -func parseSyncPeriod() (*time.Duration, error) { - syncPeriodStr := envOrDefault(config.SyncPeriod, "1h") +func parseSyncPeriod(r *reader) (*time.Duration, error) { + syncPeriodStr := readOrDefault(r, config.SyncPeriod, "1h") if syncPeriodStr == "never" { // magical string that means no sync return nil, nil } @@ -335,8 +377,20 @@ func parseSyncPeriod() (*time.Duration, error) { return &syncPeriod, nil } -func envParseOrDefault[T int | string | float64](env string, def T) (T, error) { - str, specified := os.LookupEnv(env) +func readOrDefault(r *reader, key string, def string) string { + result, specified := r.lookup(key) + if !specified { + return def + } + if result == "" { + return def + } + + return result +} + +func readAndParseOrDefault[T int | string | float64](r *reader, key string, def T) (T, error) { + str, specified := r.lookup(key) if !specified { return def, nil } @@ -349,7 +403,7 @@ func envParseOrDefault[T int | string | float64](env string, def T) (T, error) { case int: parsedVal, err := strconv.Atoi(str) if err != nil { - return def, errors.Wrapf(err, "failed to parse value %q for %q", str, env) + return def, errors.Wrapf(err, "failed to parse value %q for %q", str, key) } result = any(parsedVal).(T) case string: @@ -357,26 +411,12 @@ func envParseOrDefault[T int | string | float64](env string, def T) (T, error) { case float64: parsedVal, err := strconv.ParseFloat(str, 64) if err != nil { - return def, errors.Wrapf(err, "failed to parse value %q for %q", str, env) + return def, errors.Wrapf(err, "failed to parse value %q for %q", str, key) } result = any(parsedVal).(T) default: - return def, errors.Errorf("can't read unsupported type %T from env", def) + return def, errors.Errorf("can't lookup unsupported type %T from key", def) } return result, nil } - -// envOrDefault returns the value of the specified env variable or the default value if -// the env variable was not set. -func envOrDefault(env string, def string) string { - result, specified := os.LookupEnv(env) - if !specified { - return def - } - if result == "" { - return def - } - - return result -} diff --git a/v2/internal/config/vars_internal_test.go b/v2/internal/config/vars_internal_test.go index b1b64c2f231..1da128d7897 100644 --- a/v2/internal/config/vars_internal_test.go +++ b/v2/internal/config/vars_internal_test.go @@ -17,7 +17,7 @@ func Test_ParseSyncPeriod_ReturnsNever(t *testing.T) { g := NewGomegaWithT(t) t.Setenv(config.SyncPeriod, "never") // Can't run in parallel - dur, err := parseSyncPeriod() + dur, err := parseSyncPeriod(newEnvReader()) g.Expect(err).ToNot(HaveOccurred()) g.Expect(dur).To(BeNil()) // Nil means no sync @@ -28,7 +28,7 @@ func Test_ParseSyncPeriod_ReturnsDefaultWhenEmpty(t *testing.T) { g := NewGomegaWithT(t) t.Setenv(config.SyncPeriod, "") // Can't run in parallel - dur, err := parseSyncPeriod() + dur, err := parseSyncPeriod(newEnvReader()) g.Expect(err).ToNot(HaveOccurred()) g.Expect(dur).ToNot(BeNil()) @@ -40,7 +40,7 @@ func Test_ParseSyncPeriod_ReturnsValue(t *testing.T) { g := NewGomegaWithT(t) t.Setenv(config.SyncPeriod, "21m") // Can't run in parallel - dur, err := parseSyncPeriod() + dur, err := parseSyncPeriod(newEnvReader()) g.Expect(err).ToNot(HaveOccurred()) g.Expect(dur).ToNot(BeNil()) diff --git a/v2/internal/config/watcher.go b/v2/internal/config/watcher.go new file mode 100644 index 00000000000..9bc4d7f754d --- /dev/null +++ b/v2/internal/config/watcher.go @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package config + +import ( + "context" + "os" + + "github.com/fsnotify/fsnotify" + "github.com/go-logr/logr" + + . "github.com/Azure/azure-service-operator/v2/internal/logging" +) + +type Watcher struct { + path string + + ctx context.Context + cancel context.CancelFunc + logger logr.Logger +} + +func NewWatcher(logger logr.Logger) *Watcher { + logger = logger.WithName("configwatcher") + + return &Watcher{ + path: MountPath, + logger: logger, + } +} + +func (w *Watcher) Start(ctx context.Context) error { + w.ctx, w.cancel = context.WithCancel(ctx) + w.logger.V(Info).Info("starting watcher", "path", w.path) + go w.runImpl() + + return nil +} + +func (w *Watcher) Stop() { + w.logger.V(Info).Info("stopping watcher", "path", w.path) + w.cancel() +} + +func (w *Watcher) runImpl() { + watcher, err := fsnotify.NewWatcher() + if err != nil { + w.logger.Error(err, "failed to create fsnotify watcher") + return + } + defer watcher.Close() + + err = watcher.Add(w.path) + + for { + var ok bool + var event fsnotify.Event + + select { + case <-w.ctx.Done(): + w.logger.Error(err, "Aborting watcher") + return + case event, ok = <-watcher.Events: + if !ok { + w.logger.Error(err, "Watcher events channel closed") + return + } + if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove | fsnotify.Rename) { + w.logger.V(Info).Info("Secret updated, exiting...") + // TODO: Ideally we would give up the lease here too + os.Exit(0) + } + case err, ok = <-watcher.Errors: + if !ok { + w.logger.Error(err, "Watcher error channel closed") + return + } + } + } +}