@@ -35,6 +35,8 @@ import (
35
35
"google.golang.org/grpc"
36
36
"google.golang.org/grpc/codes"
37
37
"google.golang.org/grpc/credentials"
38
+ grpchealth "google.golang.org/grpc/health"
39
+ "google.golang.org/grpc/health/grpc_health_v1"
38
40
"google.golang.org/grpc/keepalive"
39
41
"google.golang.org/grpc/status"
40
42
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -52,6 +54,7 @@ import (
52
54
"agones.dev/agones/pkg/gameserverallocations"
53
55
"agones.dev/agones/pkg/gameservers"
54
56
"agones.dev/agones/pkg/util/fswatch"
57
+ "agones.dev/agones/pkg/util/httpserver"
55
58
"agones.dev/agones/pkg/util/runtime"
56
59
"agones.dev/agones/pkg/util/signals"
57
60
)
@@ -218,19 +221,18 @@ func main() {
218
221
health , closer := setupMetricsRecorder (conf )
219
222
defer closer ()
220
223
221
- // http.DefaultServerMux is used for http connection, not for https
222
- http .Handle ("/" , health )
223
-
224
224
kubeClient , agonesClient , err := getClients (conf )
225
225
if err != nil {
226
226
logger .WithError (err ).Fatal ("could not create clients" )
227
227
}
228
228
229
+ listenCtx , cancelListenCtx := context .WithCancel (context .Background ())
230
+
229
231
// This will test the connection to agones on each readiness probe
230
232
// so if one of the allocator pod can't reach Kubernetes it will be removed
231
233
// from the Kubernetes service.
232
- ctx , cancelCtx := context .WithCancel (context .Background ())
233
234
podReady = true
235
+ grpcHealth := grpchealth .NewServer () // only used for gRPC, ignored o/w
234
236
health .AddReadinessCheck ("allocator-agones-client" , func () error {
235
237
if ! podReady {
236
238
return errors .New ("asked to shut down, failed readiness check" )
@@ -245,16 +247,15 @@ func main() {
245
247
signals .NewSigTermHandler (func () {
246
248
logger .Info ("Pod shutdown has been requested, failing readiness check" )
247
249
podReady = false
250
+ grpcHealth .Shutdown ()
248
251
time .Sleep (conf .ReadinessShutdownDuration )
249
- cancelCtx ()
250
- logger .Infof ("Readiness shutdown duration has passed, context cancelled" )
251
- time .Sleep (1 * time .Second ) // allow a brief time for cleanup, but force exit if main doesn't
252
- os .Exit (0 )
252
+ cancelListenCtx ()
253
253
})
254
254
255
255
grpcUnallocatedStatusCode := grpcCodeFromHTTPStatus (conf .httpUnallocatedStatusCode )
256
256
257
- h := newServiceHandler (ctx , kubeClient , agonesClient , health , conf .MTLSDisabled , conf .TLSDisabled , conf .remoteAllocationTimeout , conf .totalRemoteAllocationTimeout , conf .allocationBatchWaitTime , grpcUnallocatedStatusCode )
257
+ workerCtx , cancelWorkerCtx := context .WithCancel (context .Background ())
258
+ h := newServiceHandler (workerCtx , kubeClient , agonesClient , health , conf .MTLSDisabled , conf .TLSDisabled , conf .remoteAllocationTimeout , conf .totalRemoteAllocationTimeout , conf .allocationBatchWaitTime , grpcUnallocatedStatusCode )
258
259
259
260
if ! h .tlsDisabled {
260
261
cancelTLS , err := fswatch .Watch (logger , tlsDir , time .Second , func () {
@@ -294,51 +295,62 @@ func main() {
294
295
295
296
// If grpc and http use the same port then use a mux.
296
297
if conf .GRPCPort == conf .HTTPPort {
297
- runMux (h , conf .HTTPPort )
298
+ runMux (listenCtx , workerCtx , h , grpcHealth , conf .HTTPPort )
298
299
} else {
299
300
// Otherwise, run each on a dedicated port.
300
301
if validPort (conf .HTTPPort ) {
301
- runREST (h , conf .HTTPPort )
302
+ runREST (listenCtx , workerCtx , h , conf .HTTPPort )
302
303
}
303
304
if validPort (conf .GRPCPort ) {
304
- runGRPC (h , conf .GRPCPort )
305
+ runGRPC (listenCtx , h , grpcHealth , conf .GRPCPort )
305
306
}
306
307
}
307
308
308
- // Finally listen on 8080 (http) and block the main goroutine
309
- // this is used to serve /live and /ready handlers for Kubernetes probes.
310
- err = http .ListenAndServe (":8080" , http .DefaultServeMux )
311
- logger .WithError (err ).Fatal ("allocation service crashed" )
309
+ // Finally listen on 8080 (http), used to serve /live and /ready handlers for Kubernetes probes.
310
+ healthserver := httpserver.Server {Logger : logger }
311
+ healthserver .Handle ("/" , health )
312
+ go func () { _ = healthserver .Run (listenCtx , 0 ) }()
313
+
314
+ // TODO: This is messy. Contexts are the wrong way to handle this - we should be using shutdown,
315
+ // and a cascading graceful shutdown instead of multiple contexts and sleeps.
316
+ <- listenCtx .Done ()
317
+ logger .Infof ("Listen context cancelled" )
318
+ time .Sleep (5 * time .Second )
319
+ cancelWorkerCtx ()
320
+ logger .Infof ("Worker context cancelled" )
321
+ time .Sleep (1 * time .Second )
322
+ logger .Info ("Shut down allocator" )
312
323
}
313
324
314
325
func validPort (port int ) bool {
315
326
const maxPort = 65535
316
327
return port >= 0 && port < maxPort
317
328
}
318
329
319
- func runMux (h * serviceHandler , httpPort int ) {
330
+ func runMux (listenCtx context. Context , workerCtx context. Context , h * serviceHandler , grpcHealth * grpchealth. Server , httpPort int ) {
320
331
logger .Infof ("Running the mux handler on port %d" , httpPort )
321
332
grpcServer := grpc .NewServer (h .getMuxServerOptions ()... )
322
333
pb .RegisterAllocationServiceServer (grpcServer , h )
334
+ grpc_health_v1 .RegisterHealthServer (grpcServer , grpcHealth )
323
335
324
336
mux := runtime .NewServerMux ()
325
337
if err := pb .RegisterAllocationServiceHandlerServer (context .Background (), mux , h ); err != nil {
326
338
panic (err )
327
339
}
328
340
329
- runHTTP (h , httpPort , grpcHandlerFunc (grpcServer , mux ))
341
+ runHTTP (listenCtx , workerCtx , h , httpPort , grpcHandlerFunc (grpcServer , mux ))
330
342
}
331
343
332
- func runREST (h * serviceHandler , httpPort int ) {
344
+ func runREST (listenCtx context. Context , workerCtx context. Context , h * serviceHandler , httpPort int ) {
333
345
logger .WithField ("port" , httpPort ).Info ("Running the rest handler" )
334
346
mux := runtime .NewServerMux ()
335
347
if err := pb .RegisterAllocationServiceHandlerServer (context .Background (), mux , h ); err != nil {
336
348
panic (err )
337
349
}
338
- runHTTP (h , httpPort , mux )
350
+ runHTTP (listenCtx , workerCtx , h , httpPort , mux )
339
351
}
340
352
341
- func runHTTP (h * serviceHandler , httpPort int , handler http.Handler ) {
353
+ func runHTTP (listenCtx context. Context , workerCtx context. Context , h * serviceHandler , httpPort int , handler http.Handler ) {
342
354
cfg := & tls.Config {}
343
355
if ! h .tlsDisabled {
344
356
cfg .GetCertificate = h .getTLSCert
@@ -356,21 +368,29 @@ func runHTTP(h *serviceHandler, httpPort int, handler http.Handler) {
356
368
}
357
369
358
370
go func () {
371
+ go func () {
372
+ <- listenCtx .Done ()
373
+ _ = server .Shutdown (workerCtx )
374
+ }()
375
+
359
376
var err error
360
377
if ! h .tlsDisabled {
361
378
err = server .ListenAndServeTLS ("" , "" )
362
379
} else {
363
380
err = server .ListenAndServe ()
364
381
}
365
382
366
- if err != nil {
383
+ if err == http .ErrServerClosed {
384
+ logger .WithError (err ).Info ("HTTP/HTTPS server closed" )
385
+ os .Exit (0 )
386
+ } else {
367
387
logger .WithError (err ).Fatal ("Unable to start HTTP/HTTPS listener" )
368
388
os .Exit (1 )
369
389
}
370
390
}()
371
391
}
372
392
373
- func runGRPC (h * serviceHandler , grpcPort int ) {
393
+ func runGRPC (ctx context. Context , h * serviceHandler , grpcHealth * grpchealth. Server , grpcPort int ) {
374
394
logger .WithField ("port" , grpcPort ).Info ("Running the grpc handler on port" )
375
395
listener , err := net .Listen ("tcp" , fmt .Sprintf (":%d" , grpcPort ))
376
396
if err != nil {
@@ -380,11 +400,22 @@ func runGRPC(h *serviceHandler, grpcPort int) {
380
400
381
401
grpcServer := grpc .NewServer (h .getGRPCServerOptions ()... )
382
402
pb .RegisterAllocationServiceServer (grpcServer , h )
403
+ grpc_health_v1 .RegisterHealthServer (grpcServer , grpcHealth )
383
404
384
405
go func () {
406
+ go func () {
407
+ <- ctx .Done ()
408
+ grpcServer .GracefulStop ()
409
+ }()
410
+
385
411
err := grpcServer .Serve (listener )
386
- logger .WithError (err ).Fatal ("allocation service crashed" )
387
- os .Exit (1 )
412
+ if err != nil {
413
+ logger .WithError (err ).Fatal ("allocation service crashed" )
414
+ os .Exit (1 )
415
+ } else {
416
+ logger .Info ("allocation server closed" )
417
+ os .Exit (0 )
418
+ }
388
419
}()
389
420
}
390
421
0 commit comments