Skip to content

Commit

Permalink
HA mode (#454)
Browse files Browse the repository at this point in the history
Signed-off-by: Mikhail Scherba <[email protected]>
  • Loading branch information
miklezzzz authored Mar 5, 2024
1 parent e2dec78 commit 4bd99b7
Show file tree
Hide file tree
Showing 13 changed files with 384 additions and 44 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ COPY --from=builder /app/shell-operator-clone/frameworks/shell/ /framework/shell
COPY --from=builder /app/shell-operator-clone/shell_lib.sh /
WORKDIR /

RUN mkdir /global-hooks /modules
ENV MODULES_DIR /modules
ENV GLOBAL_HOOKS_DIR /global-hooks
ENTRYPOINT ["/sbin/tini", "--", "/addon-operator"]
160 changes: 132 additions & 28 deletions cmd/addon-operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,15 @@ import (
"fmt"
"math/rand"
"os"
"strings"
"syscall"
"time"

log "github.com/sirupsen/logrus"
"gopkg.in/alecthomas/kingpin.v2"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/leaderelection"
"k8s.io/client-go/tools/leaderelection/resourcelock"

addon_operator "github.com/flant/addon-operator/pkg/addon-operator"
"github.com/flant/addon-operator/pkg/app"
Expand All @@ -20,6 +25,13 @@ import (
utils_signal "github.com/flant/shell-operator/pkg/utils/signal"
)

const (
leaseName = "addon-operator-leader-election"
leaseDuration = 35
renewalDeadline = 30
retryPeriod = 10
)

func main() {
kpApp := kingpin.New(app.AppName, fmt.Sprintf("%s %s: %s", app.AppName, app.Version, app.AppDescription))

Expand All @@ -41,41 +53,133 @@ func main() {
// start main loop
startCmd := kpApp.Command("start", "Start events processing.").
Default().
Action(func(c *kingpin.ParseContext) error {
sh_app.AppStartMessage = fmt.Sprintf("%s %s, shell-operator %s", app.AppName, app.Version, sh_app.Version)
Action(start)

// Init rand generator.
rand.Seed(time.Now().UnixNano())
app.DefineStartCommandFlags(kpApp, startCmd)

operator := addon_operator.NewAddonOperator(context.Background())
debug.DefineDebugCommands(kpApp)
app.DefineDebugCommands(kpApp)

bk := configmap.New(log.StandardLogger(), operator.KubeClient(), app.Namespace, app.ConfigMapName)
operator.SetupKubeConfigManager(bk)
kingpin.MustParse(kpApp.Parse(os.Args[1:]))
}

err := operator.Setup()
if err != nil {
fmt.Printf("Setup is failed: %s\n", err)
os.Exit(1)
}
func start(_ *kingpin.ParseContext) error {
sh_app.AppStartMessage = fmt.Sprintf("%s %s, shell-operator %s", app.AppName, app.Version, sh_app.Version)

err = operator.Start()
if err != nil {
fmt.Printf("Start is failed: %s\n", err)
os.Exit(1)
}
// Init rand generator.
rand.Seed(time.Now().UnixNano())

// Block action by waiting signals from OS.
utils_signal.WaitForProcessInterruption(func() {
operator.Stop()
os.Exit(1)
})
ctx := context.Background()

return nil
})
app.DefineStartCommandFlags(kpApp, startCmd)
operator := addon_operator.NewAddonOperator(ctx)

debug.DefineDebugCommands(kpApp)
app.DefineDebugCommands(kpApp)
operator.StartAPIServer()

kingpin.MustParse(kpApp.Parse(os.Args[1:]))
if os.Getenv("ADDON_OPERATOR_HA") == "true" {
log.Info("Addon-operator is starting in HA mode")
runHAMode(ctx, operator)
return nil
}

err := run(ctx, operator)
if err != nil {
log.Error(err)
os.Exit(1)
}

return nil
}

func run(_ context.Context, operator *addon_operator.AddonOperator) error {
bk := configmap.New(log.StandardLogger(), operator.KubeClient(), app.Namespace, app.ConfigMapName)
operator.SetupKubeConfigManager(bk)

err := operator.Setup()
if err != nil {
fmt.Printf("Setup is failed: %s\n", err)
os.Exit(1)
}

err = operator.Start()
if err != nil {
fmt.Printf("Start is failed: %s\n", err)
os.Exit(1)
}

// Block action by waiting signals from OS.
utils_signal.WaitForProcessInterruption(func() {
operator.Stop()
os.Exit(1)
})

return nil
}

func runHAMode(ctx context.Context, operator *addon_operator.AddonOperator) {
podName := os.Getenv("ADDON_OPERATOR_POD")
if len(podName) == 0 {
log.Info("ADDON_OPERATOR_POD env not set or empty")
os.Exit(1)
}

podIP := os.Getenv("ADDON_OPERATOR_LISTEN_ADDRESS")
if len(podIP) == 0 {
log.Info("ADDON_OPERATOR_LISTEN_ADDRESS env not set or empty")
os.Exit(1)
}

podNs := os.Getenv("ADDON_OPERATOR_NAMESPACE")
if len(podNs) == 0 {
log.Info("ADDON_OPERATOR_NAMESPACE env not set or empty")
os.Exit(1)
}

identity := fmt.Sprintf("%s.%s.%s.pod", podName, strings.ReplaceAll(podIP, ".", "-"), podNs)

err := operator.WithLeaderElector(&leaderelection.LeaderElectionConfig{
// Create a leaderElectionConfig for leader election
Lock: &resourcelock.LeaseLock{
LeaseMeta: v1.ObjectMeta{
Name: leaseName,
Namespace: podNs,
},
Client: operator.KubeClient().CoordinationV1(),
LockConfig: resourcelock.ResourceLockConfig{
Identity: identity,
},
},
LeaseDuration: time.Duration(leaseDuration) * time.Second,
RenewDeadline: time.Duration(renewalDeadline) * time.Second,
RetryPeriod: time.Duration(retryPeriod) * time.Second,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: func(ctx context.Context) {
err := run(ctx, operator)
if err != nil {
log.Info(err)
os.Exit(1)
}
},
OnStoppedLeading: func() {
log.Info("Restarting because the leadership was handed over")
operator.Stop()
os.Exit(1)
},
},
ReleaseOnCancel: true,
})
if err != nil {
log.Error(err)
}

go func() {
<-ctx.Done()
log.Info("Context canceled received")
err := syscall.Kill(1, syscall.SIGUSR2)
if err != nil {
log.Infof("Couldn't shutdown addon-operator: %s\n", err)
os.Exit(1)
}
}()

operator.LeaderElector.Run(ctx)
}
7 changes: 7 additions & 0 deletions examples/001-startup-global/addon-operator-cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: addon-operator
data:
global: ""
6 changes: 3 additions & 3 deletions examples/001-startup-global/addon-operator-rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ kind: ServiceAccount
metadata:
name: addon-operator-acc
---
apiVersion: rbac.authorization.k8s.io/v1beta1
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: addon-operator
Expand All @@ -20,7 +20,7 @@ rules:
verbs:
- "*"
---
apiVersion: rbac.authorization.k8s.io/v1beta1
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: addon-operator
Expand All @@ -31,4 +31,4 @@ roleRef:
subjects:
- kind: ServiceAccount
name: addon-operator-acc
namespace: example-startup-global
namespace: default
3 changes: 3 additions & 0 deletions examples/002-startup-global-high-availability/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM flant/addon-operator:latest
ADD modules /modules
ADD global-hooks /global-hooks
43 changes: 43 additions & 0 deletions examples/002-startup-global-high-availability/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
## onStartup global hooks example

Example of a global hook written as bash script.

### run

Build addon-operator image with custom scripts:

```
docker build -t "registry.mycompany.com/addon-operator:startup-global" .
docker push registry.mycompany.com/addon-operator:startup-global
```

Edit image in addon-operator-pod.yaml and apply manifests:

```
kubectl create ns example-startup-global
kubectl -n example-startup-global apply -f addon-operator-rbac.yaml
kubectl -n example-startup-global apply -f addon-operator-pod.yaml
```

See in logs that hook.sh was run at startup:

```
kubectl -n example-startup-global logs pod/addon-operator -f
...
INFO : Initializing global hooks ...
INFO : INIT: global hook 'hook.sh' ...
...
INFO : TASK_RUN GlobalHookRun@ON_STARTUP hook.sh
INFO : Running global hook 'hook.sh' binding 'ON_STARTUP' ...
OnStartup global hook
...
```

### cleanup

```
kubectl delete clusterrolebinding/addon-operator
kubectl delete clusterrole/addon-operator
kubectl delete ns/example-startup-global
docker rmi registry.mycompany.com/addon-operator:startup-global
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: addon-operator
data:
global: ""
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
name: addon-operator
spec:
replicas: 2
selector:
matchLabels:
app: addon-operator
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 1
type: RollingUpdate
template:
metadata:
labels:
app: addon-operator
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- addon-operator
topologyKey: kubernetes.io/hostname
weight: 100
containers:
- env:
- name: ADDON_OPERATOR_POD
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: ADDON_OPERATOR_HA
value: "true"
- name: ADDON_OPERATOR_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
- name: ADDON_OPERATOR_LISTEN_ADDRESS
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
image: registry.mycompany.com/addon-operator:ha
imagePullPolicy: IfNotPresent
name: addon-operator
readinessProbe:
httpGet:
path: /readyz
port: 9650
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
serviceAccount: addon-operator-acc
serviceAccountName: addon-operator-acc
terminationGracePeriodSeconds: 30
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: addon-operator-acc
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: addon-operator
rules:
- apiGroups:
- "*"
resources:
- "*"
verbs:
- "*"
- nonResourceURLs:
- "*"
verbs:
- "*"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: addon-operator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: addon-operator
subjects:
- kind: ServiceAccount
name: addon-operator-acc
namespace: default
Loading

0 comments on commit 4bd99b7

Please sign in to comment.