Skip to content

Commit 6250930

Browse files
authored
COH 24183 Allow the Operator to resume Coherence services on Pod start-up (#506)
Allow the Operator to resume any suspended Coherence services on Pod start-up. The Operator will now only suspend Coherence services that have persistence enabled when deleting a Coherence resource, or scaling to zero.
1 parent ee3efb5 commit 6250930

20 files changed

+1142
-61
lines changed

api/v1/coherenceresourcespec_types.go

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
package v1
88

99
import (
10+
"bytes"
11+
"encoding/base64"
12+
"fmt"
1013
"github.com/oracle/coherence-operator/pkg/operator"
1114
appsv1 "k8s.io/api/apps/v1"
1215
corev1 "k8s.io/api/core/v1"
@@ -15,6 +18,7 @@ import (
1518
"k8s.io/apimachinery/pkg/util/intstr"
1619
"k8s.io/utils/pointer"
1720
"strconv"
21+
"strings"
1822
)
1923

2024
// NOTE: This file is used to generate the CRDs use by the Operator. The CRD files should not be manually edited
@@ -99,6 +103,26 @@ type CoherenceResourceSpec struct {
99103
// The default value if not specified is true.
100104
// +optional
101105
SuspendServicesOnShutdown *bool `json:"suspendServicesOnShutdown,omitempty"`
106+
// ResumeServicesOnStartup allows the Operator to resume suspended Coherence services when
107+
// the Coherence container is started. This only applies to storage enabled distributed cache
108+
// services. This ensures that services that are suspended due to the shutdown of a storage
109+
// tier, but those services are still running (albeit suspended) in other storage disabled
110+
// deployments, will be resumed when storage comes back.
111+
// Note that starting Pods with suspended partitioned cache services may stop the Pod reaching the ready state.
112+
// The default value if not specified is true.
113+
// +optional
114+
ResumeServicesOnStartup *bool `json:"resumeServicesOnStartup,omitempty"`
115+
// AutoResumeServices is a map of Coherence service names to allow more fine-grained control over
116+
// which services may be auto-resumed by the operator when a Coherence Pod starts.
117+
// The key to the map is the name of the Coherence service. This should be the fully qualified name
118+
// if scoped services are being used in Coherence. The value is a bool, set to `true` to allow the
119+
// service to be auto-resumed or `false` to not allow the service to be auto-resumed.
120+
// Adding service names to this list will override any value set in `ResumeServicesOnStartup`, so if the
121+
// `ResumeServicesOnStartup` field is `false` but there are service names in the `AutoResumeServices`, mapped
122+
// to `true`, those services will still be resumed.
123+
// Note that starting Pods with suspended partitioned cache services may stop the Pod reaching the ready state.
124+
// +optional
125+
AutoResumeServices map[string]bool `json:"autoResumeServices,omitempty"`
102126
// SuspendServiceTimeout sets the number of seconds to wait for the service suspend
103127
// call to return (the default is 60 seconds)
104128
// +optional
@@ -117,7 +141,7 @@ type CoherenceResourceSpec struct {
117141
// +listMapKey=name
118142
// +optional
119143
Env []corev1.EnvVar `json:"env,omitempty"`
120-
// The extra labels to add to the all of the Pods in this deployments.
144+
// The extra labels to add to the all the Pods in this deployment.
121145
// Labels here will add to or override those defined for the cluster.
122146
// More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
123147
// +optional
@@ -257,14 +281,14 @@ type CoherenceResourceSpec struct {
257281
// sets the serviceAccountName value in the Pod spec.
258282
// +optional
259283
ServiceAccountName string `json:"serviceAccountName,omitempty"`
260-
// Whether or not to auto-mount the Kubernetes API credentials for a service account
284+
// Whether to auto-mount the Kubernetes API credentials for a service account
261285
// +optional
262286
AutomountServiceAccountToken *bool `json:"automountServiceAccountToken,omitempty"`
263287
// The timeout to apply to REST requests made back to the Operator from Coherence Pods.
264288
// These requests are typically to obtain site and rack information for the Pod.
265289
// +optional
266290
OperatorRequestTimeout *int32 `json:"operatorRequestTimeout,omitempty"`
267-
// Whether or not to perform a StatusHA test on the cluster before performing an update or deletion.
291+
// Whether to perform a StatusHA test on the cluster before performing an update or deletion.
268292
// This field can be set to false to force through an update even when a Coherence deployment is in
269293
// an unstable state.
270294
// The default is true, to always check for StatusHA before updating a Coherence deployment.
@@ -871,6 +895,19 @@ func (in *CoherenceResourceSpec) CreateDefaultEnv(deployment *Coherence) []corev
871895
env = append(env, corev1.EnvVar{Name: EnvVarCohIdentity, Value: deployment.Name + "@" + deployment.Namespace})
872896
}
873897

898+
if deployment.Spec.ResumeServicesOnStartup != nil {
899+
env = append(env, corev1.EnvVar{Name: EnvVarOperatorAllowResume, Value: BoolPtrToString(deployment.Spec.ResumeServicesOnStartup)})
900+
}
901+
902+
if deployment.Spec.AutoResumeServices != nil {
903+
b := new(bytes.Buffer)
904+
for key, value := range deployment.Spec.AutoResumeServices {
905+
_, _ = fmt.Fprintf(b, "\"%s\"=%t,", strings.Replace(key, "\"", "\\\"", -1), value)
906+
}
907+
value := base64.StdEncoding.EncodeToString(b.Bytes())
908+
env = append(env, corev1.EnvVar{Name: EnvVarOperatorResumeServices, Value: value})
909+
}
910+
874911
return env
875912
}
876913

api/v1/constants.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ const (
157157
EnvVarAppMainArgs = "COH_MAIN_ARGS"
158158
EnvVarOperatorHost = "OPERATOR_HOST"
159159
EnvVarOperatorTimeout = "OPERATOR_REQUEST_TIMEOUT"
160+
EnvVarOperatorAllowResume = "OPERATOR_ALLOW_RESUME"
161+
EnvVarOperatorResumeServices = "OPERATOR_RESUME_SERVICES"
160162
EnvVarCoherenceHome = "COHERENCE_HOME"
161163
EnvVarCohDependencyModules = "DEPENDENCY_MODULES"
162164
EnvVarCohSkipVersionCheck = "COH_SKIP_VERSION_CHECK"

api/v1/create_statefulset_coherencespec_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,3 +303,35 @@ func TestCreateStatefulSetWithCoherenceSpecWithWkaDifferentNamespace(t *testing.
303303
// assert that the StatefulSet is as expected
304304
assertStatefulSetCreation(t, deployment, stsExpected)
305305
}
306+
307+
func TestCreateStatefulSetWithResumeServicesOnStartupTrue(t *testing.T) {
308+
309+
spec := coh.CoherenceResourceSpec{
310+
ResumeServicesOnStartup: boolPtr(true),
311+
}
312+
313+
// Create the test deployment
314+
deployment := createTestDeployment(spec)
315+
// Create expected StatefulSet
316+
stsExpected := createMinimalExpectedStatefulSet(deployment)
317+
addEnvVars(stsExpected, coh.ContainerNameCoherence, corev1.EnvVar{Name: coh.EnvVarOperatorAllowResume, Value: "true"})
318+
319+
// assert that the StatefulSet is as expected
320+
assertStatefulSetCreation(t, deployment, stsExpected)
321+
}
322+
323+
func TestCreateStatefulSetWithResumeServicesOnStartupFalse(t *testing.T) {
324+
325+
spec := coh.CoherenceResourceSpec{
326+
ResumeServicesOnStartup: boolPtr(false),
327+
}
328+
329+
// Create the test deployment
330+
deployment := createTestDeployment(spec)
331+
// Create expected StatefulSet
332+
stsExpected := createMinimalExpectedStatefulSet(deployment)
333+
addEnvVars(stsExpected, coh.ContainerNameCoherence, corev1.EnvVar{Name: coh.EnvVarOperatorAllowResume, Value: "false"})
334+
335+
// assert that the StatefulSet is as expected
336+
assertStatefulSetCreation(t, deployment, stsExpected)
337+
}

api/v1/zz_generated.deepcopy.go

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/coherence.oracle.com_coherence.yaml

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -727,8 +727,24 @@ spec:
727727
not set the application directory default value is "/app".
728728
type: string
729729
type: object
730+
autoResumeServices:
731+
additionalProperties:
732+
type: boolean
733+
description: AutoResumeServices is a map of Coherence service names
734+
to allow more fine-grained control over which services may be auto-resumed
735+
by the operator when a Coherence Pod starts. The key to the map
736+
is the name of the Coherence service. This should be the fully qualified
737+
name if scoped services are being used in Coherence. The value is
738+
a bool, set to `true` to allow the service to be auto-resumed or
739+
`false` to not allow the service to be auto-resumed. Adding service
740+
names to this list will override any value set in `ResumeServicesOnStartup`,
741+
so if the `ResumeServicesOnStartup` field is `false` but there are
742+
service names in the `AutoResumeServices`, mapped to `true`, those
743+
services will still be resumed. Note that starting Pods with suspended
744+
partitioned cache services may stop the Pod reaching the ready state.
745+
type: object
730746
automountServiceAccountToken:
731-
description: Whether or not to auto-mount the Kubernetes API credentials
747+
description: Whether to auto-mount the Kubernetes API credentials
732748
for a service account
733749
type: boolean
734750
cluster:
@@ -4770,11 +4786,11 @@ spec:
47704786
- name
47714787
x-kubernetes-list-type: map
47724788
haBeforeUpdate:
4773-
description: Whether or not to perform a StatusHA test on the cluster
4774-
before performing an update or deletion. This field can be set to
4775-
false to force through an update even when a Coherence deployment
4776-
is in an unstable state. The default is true, to always check for
4777-
StatusHA before updating a Coherence deployment.
4789+
description: Whether to perform a StatusHA test on the cluster before
4790+
performing an update or deletion. This field can be set to false
4791+
to force through an update even when a Coherence deployment is in
4792+
an unstable state. The default is true, to always check for StatusHA
4793+
before updating a Coherence deployment.
47784794
type: boolean
47794795
healthPort:
47804796
description: The port that the health check endpoint will bind to.
@@ -7582,8 +7598,8 @@ spec:
75827598
labels:
75837599
additionalProperties:
75847600
type: string
7585-
description: 'The extra labels to add to the all of the Pods in this
7586-
deployments. Labels here will add to or override those defined for
7601+
description: 'The extra labels to add to the all the Pods in this
7602+
deployment. Labels here will add to or override those defined for
75877603
the cluster. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/'
75887604
type: object
75897605
livenessProbe:
@@ -8603,6 +8619,17 @@ spec:
86038619
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
86048620
type: object
86058621
type: object
8622+
resumeServicesOnStartup:
8623+
description: ResumeServicesOnStartup allows the Operator to resume
8624+
suspended Coherence services when the Coherence container is started.
8625+
This only applies to storage enabled distributed cache services.
8626+
This ensures that services that are suspended due to the shutdown
8627+
of a storage tier, but those services are still running (albeit
8628+
suspended) in other storage disabled deployments, will be resumed
8629+
when storage comes back. Note that starting Pods with suspended
8630+
partitioned cache services may stop the Pod reaching the ready state.
8631+
The default value if not specified is true.
8632+
type: boolean
86068633
role:
86078634
description: The name of the role that this deployment represents
86088635
in a Coherence cluster. This value will be used to set the Coherence

docs/about/04_coherence_spec.adoc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,12 @@ m| ports | Ports specifies additional port mappings for the Pod and additional S
8484
m| scaling | The configuration to control safe scaling. m| &#42;<<ScalingSpec,ScalingSpec>> | false
8585
m| suspendProbe | The configuration of the probe used to signal that services must be suspended before a deployment is stopped. m| &#42;<<Probe,Probe>> | false
8686
m| suspendServicesOnShutdown | A flag controlling whether storage enabled cache services in this deployment will be suspended before the deployment is shutdown or scaled to zero. The action of suspending storage enabled services when the whole deployment is being stopped ensures that cache services with persistence enabled will shutdown cleanly without the possibility of Coherence trying to recover and re-balance partitions as Pods are stopped. The default value if not specified is true. m| &#42;bool | false
87+
m| resumeServicesOnStartup | ResumeServicesOnStartup allows the Operator to resume suspended Coherence services when the Coherence container is started. This only applies to storage enabled distributed cache services. This ensures that services that are suspended due to the shutdown of a storage tier, but those services are still running (albeit suspended) in other storage disabled deployments, will be resumed when storage comes back. Note that starting Pods with suspended partitioned cache services may stop the Pod reaching the ready state. The default value if not specified is true. m| &#42;bool | false
88+
m| autoResumeServices | AutoResumeServices is a map of Coherence service names to allow more fine-grained control over which services may be auto-resumed by the operator when a Coherence Pod starts. The key to the map is the name of the Coherence service. This should be the fully qualified name if scoped services are being used in Coherence. The value is a bool, set to `true` to allow the service to be auto-resumed or `false` to not allow the service to be auto-resumed. Adding service names to this list will override any value set in `ResumeServicesOnStartup`, so if the `ResumeServicesOnStartup` field is `false` but there are service names in the `AutoResumeServices`, mapped to `true`, those services will still be resumed. Note that starting Pods with suspended partitioned cache services may stop the Pod reaching the ready state. m| map[string]bool | false
8789
m| suspendServiceTimeout | SuspendServiceTimeout sets the number of seconds to wait for the service suspend call to return (the default is 60 seconds) m| &#42;int | false
8890
m| startQuorum | StartQuorum controls the start-up order of this Coherence resource in relation to other Coherence resources. m| []<<StartQuorum,StartQuorum>> | false
8991
m| env | Env is additional environment variable mappings that will be passed to the Coherence container in the Pod. To specify extra variables add them as name value pairs the same as they would be added to a Pod containers spec. m| []https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.17/#envvar-v1-core[corev1.EnvVar] | false
90-
m| labels | The extra labels to add to the all of the Pods in this deployments. Labels here will add to or override those defined for the cluster. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ m| map[string]string | false
92+
m| labels | The extra labels to add to the all the Pods in this deployment. Labels here will add to or override those defined for the cluster. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ m| map[string]string | false
9193
m| annotations | Annotations are free-form yaml that will be added to the store release as annotations Any annotations should be placed BELOW this annotations: key. For example if we wanted to include annotations for Prometheus it would look like this: +
9294
+
9395
annotations: +
@@ -139,9 +141,9 @@ m| hostIPC | Use the host's ipc namespace. Optional: Default to false. m| &#42;b
139141
m| network | Configure various networks and DNS settings for Pods in this rolw. m| &#42;<<NetworkSpec,NetworkSpec>> | false
140142
m| coherenceUtils | The configuration for the Coherence utils image m| &#42;<<ImageSpec,ImageSpec>> | false
141143
m| serviceAccountName | The name to use for the service account to use when RBAC is enabled The role bindings must already have been created as this chart does not create them it just sets the serviceAccountName value in the Pod spec. m| string | false
142-
m| automountServiceAccountToken | Whether or not to auto-mount the Kubernetes API credentials for a service account m| &#42;bool | false
144+
m| automountServiceAccountToken | Whether to auto-mount the Kubernetes API credentials for a service account m| &#42;bool | false
143145
m| operatorRequestTimeout | The timeout to apply to REST requests made back to the Operator from Coherence Pods. These requests are typically to obtain site and rack information for the Pod. m| &#42;int32 | false
144-
m| haBeforeUpdate | Whether or not to perform a StatusHA test on the cluster before performing an update or deletion. This field can be set to false to force through an update even when a Coherence deployment is in an unstable state. The default is true, to always check for StatusHA before updating a Coherence deployment. m| &#42;bool | false
146+
m| haBeforeUpdate | Whether to perform a StatusHA test on the cluster before performing an update or deletion. This field can be set to false to force through an update even when a Coherence deployment is in an unstable state. The default is true, to always check for StatusHA before updating a Coherence deployment. m| &#42;bool | false
145147
|===
146148
147149
<<Table of Contents,Back to TOC>>

java/coherence-operator/pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,13 @@
7474
</dependencies>
7575

7676
<build>
77+
<resources>
78+
<resource>
79+
<directory>${project.basedir}/src/main/resources</directory>
80+
<filtering>true</filtering>
81+
</resource>
82+
</resources>
83+
7784
<!-- Use the assembly plugin to pull together the files to use to build the
7885
docker image under the target/docker folder -->
7986
<plugins>

java/coherence-operator/src/main/java/com/oracle/coherence/k8s/CoherenceOperator.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66

77
package com.oracle.coherence.k8s;
88

9+
import java.io.PrintStream;
10+
import java.net.URL;
11+
import java.util.Properties;
12+
913
import com.tangosol.net.CacheFactory;
1014

1115
/**
@@ -18,6 +22,8 @@ public class CoherenceOperator
1822

1923
private static final String NA = "n/a";
2024

25+
private static Properties properties;
26+
2127
private String identity = NA;
2228

2329
/**
@@ -39,4 +45,39 @@ public String getIdentity() {
3945
public int getNodeId() {
4046
return CacheFactory.getCluster().getLocalMember().getId();
4147
}
48+
49+
/**
50+
* Returns the operator version.
51+
*
52+
* @return the operator version
53+
*/
54+
public static String getVersion() {
55+
return ensureProperties().getProperty("version", NA);
56+
}
57+
58+
/**
59+
* Print the Operator banner.
60+
*
61+
* @param out the {@link PrintStream} to print he banner on
62+
*/
63+
public static void printBanner(PrintStream out) {
64+
out.printf("CoherenceOperator: Java Runner version %s\n", getVersion());
65+
}
66+
67+
private static synchronized Properties ensureProperties() {
68+
if (properties == null) {
69+
Properties props = new Properties();
70+
try {
71+
URL url = CoherenceOperator.class.getResource("/META-INF/operator.properties");
72+
if (url != null) {
73+
props.load(url.openStream());
74+
}
75+
}
76+
catch (Throwable t) {
77+
t.printStackTrace();
78+
}
79+
properties = props;
80+
}
81+
return properties;
82+
}
4283
}

0 commit comments

Comments
 (0)