Skip to content

Commit

Permalink
fix: allow communication with instance managers in an unknown state
Browse files Browse the repository at this point in the history
Longhorn 6552

Signed-off-by: Eric Weber <[email protected]>
  • Loading branch information
ejweber committed Sep 3, 2024
1 parent d5832a1 commit a6e5f10
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 17 deletions.
10 changes: 5 additions & 5 deletions controller/engine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ func (ec *EngineController) CreateInstance(obj interface{}) (*longhorn.InstanceP
return nil, err
}

c, err := engineapi.NewInstanceManagerClient(im)
c, err := engineapi.NewInstanceManagerClient(im, false)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -578,7 +578,7 @@ func (ec *EngineController) DeleteInstance(obj interface{}) (err error) {
return nil
}

c, err := engineapi.NewInstanceManagerClient(im)
c, err := engineapi.NewInstanceManagerClient(im, true)
if err != nil {
return err
}
Expand Down Expand Up @@ -613,7 +613,7 @@ func (ec *EngineController) GetInstance(obj interface{}) (*longhorn.InstanceProc
return nil, err
}
}
c, err := engineapi.NewInstanceManagerClient(im)
c, err := engineapi.NewInstanceManagerClient(im, false)
if err != nil {
return nil, err
}
Expand All @@ -633,7 +633,7 @@ func (ec *EngineController) LogInstance(ctx context.Context, obj interface{}) (*
return nil, nil, err
}

c, err := engineapi.NewInstanceManagerClient(im)
c, err := engineapi.NewInstanceManagerClient(im, false)
if err != nil {
return nil, nil, err
}
Expand Down Expand Up @@ -2130,7 +2130,7 @@ func (ec *EngineController) UpgradeEngineInstance(e *longhorn.Engine, log *logru
return err
}

c, err := engineapi.NewInstanceManagerClient(im)
c, err := engineapi.NewInstanceManagerClient(im, false)
if err != nil {
return err
}
Expand Down
6 changes: 3 additions & 3 deletions controller/instance_manager_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ type InstanceManagerMonitor struct {
}

func updateInstanceManagerVersion(im *longhorn.InstanceManager) error {
cli, err := engineapi.NewInstanceManagerClient(im)
cli, err := engineapi.NewInstanceManagerClient(im, false)
if err != nil {
return err
}
Expand Down Expand Up @@ -507,7 +507,7 @@ func (imc *InstanceManagerController) syncLogSettingsToInstanceManagerPod(im *lo
return nil
}

client, err := engineapi.NewInstanceManagerClient(im)
client, err := engineapi.NewInstanceManagerClient(im, false)
if err != nil {
return errors.Wrapf(err, "failed to create instance manager client for %v", im.Name)
}
Expand Down Expand Up @@ -1533,7 +1533,7 @@ func (imc *InstanceManagerController) startMonitoring(im *longhorn.InstanceManag
}

// TODO: #2441 refactor this when we do the resource monitoring refactor
client, err := engineapi.NewInstanceManagerClient(im)
client, err := engineapi.NewInstanceManagerClient(im, false)
if err != nil {
log.WithError(err).Errorf("Failed to initialize im client to %v before monitoring", im.Name)
return
Expand Down
8 changes: 4 additions & 4 deletions controller/replica_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ func (rc *ReplicaController) CreateInstance(obj interface{}) (*longhorn.Instance
return nil, err
}

c, err := engineapi.NewInstanceManagerClient(im)
c, err := engineapi.NewInstanceManagerClient(im, false)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -562,7 +562,7 @@ func (rc *ReplicaController) DeleteInstance(obj interface{}) (err error) {
}
}()

c, err := engineapi.NewInstanceManagerClient(im)
c, err := engineapi.NewInstanceManagerClient(im, false)
if err != nil {
return err
}
Expand Down Expand Up @@ -628,7 +628,7 @@ func (rc *ReplicaController) GetInstance(obj interface{}) (*longhorn.InstancePro
}
}

c, err := engineapi.NewInstanceManagerClient(im)
c, err := engineapi.NewInstanceManagerClient(im, false)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -659,7 +659,7 @@ func (rc *ReplicaController) LogInstance(ctx context.Context, obj interface{}) (
return nil, nil, err
}

c, err := engineapi.NewInstanceManagerClient(im)
c, err := engineapi.NewInstanceManagerClient(im, false)
if err != nil {
return nil, nil, err
}
Expand Down
23 changes: 18 additions & 5 deletions engineapi/instance_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,24 @@ func CheckInstanceManagerProxySupport(im *longhorn.InstanceManager) error {
return nil
}

// NewInstanceManagerClient creates a new instance manager client
func NewInstanceManagerClient(im *longhorn.InstanceManager) (*InstanceManagerClient, error) {
// Do not check the major version here. Since IM cannot get the major version without using this client to call VersionGet().
if im.Status.CurrentState != longhorn.InstanceManagerStateRunning || im.Status.IP == "" {
return nil, fmt.Errorf("invalid Instance Manager %v, state: %v, IP: %v", im.Name, im.Status.CurrentState, im.Status.IP)
// NewInstanceManagerClient creates a new instance manager client. Usually, we only want to attempt to communicate with
// an instance manager in state running. However, sometimes it makes sense to make a best effort attempt to communicate
// with an instance manager in state unknown. It should be safe to do so, since Kubernetes should not reassign the pod's
// IP address until it is at least terminating, which puts the instance manager in state error. However, there is an
// increased chance of failure.
func NewInstanceManagerClient(im *longhorn.InstanceManager, allowUnknown bool) (*InstanceManagerClient, error) {
// Do not check the major version here since IM cannot get the major version without using this client to call
// VersionGet().

if im.Status.IP == "" {
return nil, fmt.Errorf("invalid instance manager %v, state %v, IP %v", im.Name, im.Status.CurrentState, im.Status.IP)
}
if im.Status.CurrentState == longhorn.InstanceManagerStateRunning {
// Nothing to do.
} else if im.Status.CurrentState == longhorn.InstanceManagerStateUnknown && allowUnknown {
logrus.Warnf("Communicating with instance manager %v, state %v, IP %v", im.Name, im.Status.CurrentState, im.Status.IP)
} else {
return nil, fmt.Errorf("invalid instance manager %v, state %v, IP %v", im.Name, im.Status.CurrentState, im.Status.IP)
}

// TODO: Initialize the following gRPC clients are similar. This can be simplified via factory method.
Expand Down

0 comments on commit a6e5f10

Please sign in to comment.