Skip to content

Commit

Permalink
[v2] Graceful shutdown (#659)
Browse files Browse the repository at this point in the history
This implements graceful shutdown by propagating interrupts to our child
processes.

Killing a workspace pod in the middle of an update will mark the Update
as failed ("update canceled" error), and subsequent Updates will resume
where it left off as you would expect.

Fixes #607.
  • Loading branch information
blampe authored Sep 24, 2024
1 parent 2a40b39 commit 181286c
Show file tree
Hide file tree
Showing 9 changed files with 278 additions and 89 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ pulumi-kubernetes-operator
# Temporary Build Files
build/_output
build/_test
.pulumi
Pulumi.*.yaml

# Created by https://www.gitignore.io/api/go,vim,emacs,visualstudiocode
### Emacs ###
Expand Down
5 changes: 4 additions & 1 deletion agent/cmd/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,10 @@ func (noop) Run(context.Context, string, io.Reader, []io.Writer, []io.Writer, []
func init() {
rootCmd.AddCommand(initCmd)
initCmd.Flags().StringVarP(&_targetDir, "target-dir", "t", "", "The target directory to initialize")
initCmd.MarkFlagRequired("target-dir")
err := initCmd.MarkFlagRequired("target-dir")
if err != nil {
panic(err)
}

initCmd.Flags().StringVar(&_fluxURL, "flux-url", "", "Flux archive URL")
initCmd.Flags().StringVar(&_fluxDigest, "flux-digest", "", "Flux digest")
Expand Down
101 changes: 32 additions & 69 deletions agent/cmd/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,46 +24,37 @@ import (
"path/filepath"
"syscall"

grpc_zap "github.com/grpc-ecosystem/go-grpc-middleware/logging/zap"
grpc_ctxtags "github.com/grpc-ecosystem/go-grpc-middleware/tags"
pb "github.com/pulumi/pulumi-kubernetes-operator/agent/pkg/proto"
"github.com/pulumi/pulumi-kubernetes-operator/agent/pkg/server"
"github.com/pulumi/pulumi-kubernetes-operator/agent/version"
"github.com/pulumi/pulumi/sdk/v3/go/auto"
"github.com/spf13/cobra"
"go.uber.org/zap"
"go.uber.org/zap/zapio"
"google.golang.org/grpc"
)

var (
WorkDir string
SkipInstall bool
Stack string
Host string
Port int
_workDir string
_skipInstall bool
_stack string
_host string
_port int
)

var rpcLogger *zap.Logger

// serveCmd represents the serve command
var serveCmd = &cobra.Command{
Use: "serve",
Short: "Serve the agent RPC service",
Long: `Start the agent gRPC server.
`,
PreRun: func(cmd *cobra.Command, args []string) {
rpcLogger = zap.L().Named("rpc")
grpc_zap.ReplaceGrpcLoggerV2(rpcLogger)
},
Run: func(cmd *cobra.Command, args []string) {
ctx := cmd.Context()

log.Infow("Pulumi Kubernetes Agent", "version", version.Version)
log.Debugw("executing serve command", "WorkDir", WorkDir)
log.Debugw("executing serve command", "WorkDir", _workDir)

// open the workspace using auto api
workspaceOpts := []auto.LocalWorkspaceOption{}
workDir, err := filepath.EvalSymlinks(WorkDir) // resolve the true location of the workspace
workDir, err := filepath.EvalSymlinks(_workDir) // resolve the true location of the workspace
if err != nil {
log.Fatalw("unable to resolve the workspace directory", zap.Error(err))
os.Exit(1)
Expand All @@ -82,7 +73,7 @@ var serveCmd = &cobra.Command{
log.Infow("opened a local workspace", "workspace", workDir,
"project", proj.Name, "runtime", proj.Runtime.Name())

if !SkipInstall {
if !_skipInstall {
plog := zap.L().Named("pulumi")
stdout := &zapio.Writer{Log: plog, Level: zap.InfoLevel}
defer stdout.Close()
Expand All @@ -105,38 +96,16 @@ var serveCmd = &cobra.Command{

// Create the automation service
autoServer, err := server.NewServer(ctx, workspace, &server.Options{
StackName: Stack,
StackName: _stack,
})
if err != nil {
log.Fatalw("unable to make an automation server", zap.Error(err))
os.Exit(1)
}

// Configure the grpc server.
// Apply zap logging and use filters to reduce log verbosity as needed.
address := fmt.Sprintf("%s:%d", Host, Port)
address := fmt.Sprintf("%s:%d", _host, _port)
log.Infow("starting the RPC server", "address", address)
serverOpts := []grpc_zap.Option{
grpc_zap.WithDecider(func(fullMethodName string, err error) bool {
return true
}),
}
decider := func(ctx context.Context, fullMethodName string, servingObject interface{}) bool {
return verbose
}
s := grpc.NewServer(
grpc.ChainUnaryInterceptor(
grpc_ctxtags.UnaryServerInterceptor(grpc_ctxtags.WithFieldExtractor(grpc_ctxtags.CodeGenRequestFieldExtractor)),
grpc_zap.UnaryServerInterceptor(rpcLogger, serverOpts...),
grpc_zap.PayloadUnaryServerInterceptor(rpcLogger, decider),
),
grpc.ChainStreamInterceptor(
grpc_ctxtags.StreamServerInterceptor(grpc_ctxtags.WithFieldExtractor(grpc_ctxtags.CodeGenRequestFieldExtractor)),
grpc_zap.StreamServerInterceptor(rpcLogger, serverOpts...),
grpc_zap.PayloadStreamServerInterceptor(rpcLogger, decider),
),
)
pb.RegisterAutomationServiceServer(s, autoServer)

s := server.NewGRPC(autoServer, log)

// Start the grpc server
lis, err := net.Listen("tcp", address)
Expand All @@ -146,50 +115,44 @@ var serveCmd = &cobra.Command{
}
log.Infow("server listening", "address", lis.Addr(), "workspace", workDir)

cancelCtx := setupSignalHandler()
go func() {
<-cancelCtx.Done()
log.Infow("shutting down the server")
s.GracefulStop()
autoServer.Cancel()
log.Infow("server stopped")
os.Exit(0)
}()
if err := s.Serve(lis); err != nil {
ctx, cancel := context.WithCancel(ctx)
setupSignalHandler(cancel)
if err := s.Serve(ctx, lis); err != nil {
log.Errorw("fatal: server failure", zap.Error(err))
os.Exit(1)
}

log.Infow("server stopped")
},
}

// SetupSignalHandler registers for SIGTERM and SIGINT. A context is returned
// which is canceled on one of these signals. If a second signal is caught, the program
// is terminated with exit code 1.
func setupSignalHandler() context.Context {
ctx, cancel := context.WithCancel(context.Background())

// SetupSignalHandler registers for SIGTERM and SIGINT. The fist signal invokes
// the cancel method. If a second signal is caught, the program is terminated
// with exit code 1.
func setupSignalHandler(cancel func()) {
c := make(chan os.Signal, 2)
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
go func() {
<-c
cancel()
<-c
os.Exit(1) // second signal. Exit directly.
os.Exit(1) // Second signal, exit directly.
}()

return ctx
}

func init() {
rootCmd.AddCommand(serveCmd)

serveCmd.Flags().StringVarP(&WorkDir, "workspace", "w", "", "The workspace directory to serve")
serveCmd.MarkFlagRequired("workspace")
serveCmd.Flags().StringVarP(&_workDir, "workspace", "w", "", "The workspace directory to serve")
err := serveCmd.MarkFlagRequired("workspace")
if err != nil {
panic(err)
}

serveCmd.Flags().BoolVar(&SkipInstall, "skip-install", false, "Skip installation of project dependencies")
serveCmd.Flags().BoolVar(&_skipInstall, "skip-install", false, "Skip installation of project dependencies")

serveCmd.Flags().StringVarP(&Stack, "stack", "s", "", "Select (or create) the stack to use")
serveCmd.Flags().StringVarP(&_stack, "stack", "s", "", "Select (or create) the stack to use")

serveCmd.Flags().StringVar(&Host, "host", "0.0.0.0", "Server bind address (default: 0.0.0.0)")
serveCmd.Flags().IntVar(&Port, "port", 50051, "Server port (default: 50051)")
serveCmd.Flags().StringVar(&_host, "host", "0.0.0.0", "Server bind address (default: 0.0.0.0)")
serveCmd.Flags().IntVar(&_port, "port", 50051, "Server port (default: 50051)")
}
2 changes: 1 addition & 1 deletion agent/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/go-git/go-git/v5 v5.12.0
github.com/gogo/protobuf v1.3.2
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0
github.com/mitchellh/go-ps v1.0.0
github.com/onsi/gomega v1.33.1
github.com/pulumi/pulumi/sdk/v3 v3.127.1-0.20240801092425-22d28187db0a
github.com/spf13/cobra v1.8.1
Expand Down Expand Up @@ -68,7 +69,6 @@ require (
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mattn/go-localereader v0.0.1 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/mitchellh/go-ps v1.0.0 // indirect
github.com/mitchellh/go-wordwrap v1.0.1 // indirect
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
Expand Down
77 changes: 77 additions & 0 deletions agent/pkg/server/grpc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
Copyright © 2024 Pulumi Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server

import (
"context"
"net"

grpc_zap "github.com/grpc-ecosystem/go-grpc-middleware/logging/zap"
grpc_ctxtags "github.com/grpc-ecosystem/go-grpc-middleware/tags"
pb "github.com/pulumi/pulumi-kubernetes-operator/agent/pkg/proto"
"go.uber.org/zap"
"google.golang.org/grpc"
)

// GRPC serves the automation service.
type GRPC struct {
*grpc.Server
wrapped *Server
log *zap.SugaredLogger
}

// NewGRPC constructs a new gRPC server with logging and graceful shutdown.
func NewGRPC(server *Server, rootLogger *zap.SugaredLogger) *GRPC {
log := rootLogger.Named("grpc")
// Configure the grpc server.
// Apply zap logging and use filters to reduce log verbosity as needed.
serverOpts := []grpc_zap.Option{
grpc_zap.WithDecider(func(fullMethodName string, err error) bool {
return true
}),
}

grpc_zap.ReplaceGrpcLoggerV2WithVerbosity(log.Desugar(), int(log.Level()))

s := grpc.NewServer(
grpc.ChainUnaryInterceptor(
grpc_ctxtags.UnaryServerInterceptor(grpc_ctxtags.WithFieldExtractor(grpc_ctxtags.CodeGenRequestFieldExtractor)),
grpc_zap.UnaryServerInterceptor(log.Desugar(), serverOpts...),
),
grpc.ChainStreamInterceptor(
grpc_ctxtags.StreamServerInterceptor(grpc_ctxtags.WithFieldExtractor(grpc_ctxtags.CodeGenRequestFieldExtractor)),
grpc_zap.StreamServerInterceptor(log.Desugar(), serverOpts...),
),
)
pb.RegisterAutomationServiceServer(s, server)

return &GRPC{Server: s, wrapped: server, log: log}
}

// Serve wraps the underlying gRPC server with graceful shutdown. When the
// given context is canceled a SIGTERM is propagated to all child processes
// (spawned by Automation API) and requests are given an opportunity to exit
// cleanly.
func (s *GRPC) Serve(ctx context.Context, l net.Listener) error {
go func() {
<-ctx.Done()
s.log.Infow("shutting down the server")
s.wrapped.Cancel() // Non-blocking.
s.GracefulStop() // Blocks until outstanding requests have finished.
}()

return s.Server.Serve(l)
}
Loading

0 comments on commit 181286c

Please sign in to comment.