Skip to content

Commit 07aca7b

Browse files
authored
wait for volume attachments to be detached before deleting a vm (#281)
* wait for volume attachments to be detached before deleting a vm * decrease volume check interval to 2 sec.
1 parent 7c0f423 commit 07aca7b

File tree

4 files changed

+149
-1
lines changed

4 files changed

+149
-1
lines changed

cloud/scope/virtualmachine.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,10 @@ func (m *VirtualMachineScope) ClusterName() string {
139139
return m.AzureStackHCIVirtualMachine.Spec.ClusterName
140140
}
141141

142+
func (m *VirtualMachineScope) Client() client.Client {
143+
return m.client
144+
}
145+
142146
// Location returns the AzureStackHCIVirtualMachine location.
143147
func (m *VirtualMachineScope) Location() string {
144148
return m.AzureStackHCIVirtualMachine.Spec.Location

controllers/azurestackhcivirtualmachine_reconciler.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,24 @@ package controllers
1919

2020
import (
2121
"encoding/base64"
22+
"time"
2223

2324
infrav1 "github.com/microsoft/cluster-api-provider-azurestackhci/api/v1beta1"
2425
azurestackhci "github.com/microsoft/cluster-api-provider-azurestackhci/cloud"
2526
"github.com/microsoft/cluster-api-provider-azurestackhci/cloud/scope"
2627
"github.com/microsoft/cluster-api-provider-azurestackhci/cloud/services/disks"
2728
"github.com/microsoft/cluster-api-provider-azurestackhci/cloud/services/networkinterfaces"
2829
"github.com/microsoft/cluster-api-provider-azurestackhci/cloud/services/virtualmachines"
30+
infrav1util "github.com/microsoft/cluster-api-provider-azurestackhci/pkg/util"
2931
sdk_compute "github.com/microsoft/moc-sdk-for-go/services/compute"
3032
"github.com/pkg/errors"
33+
"k8s.io/apimachinery/pkg/util/wait"
34+
"sigs.k8s.io/controller-runtime/pkg/client"
35+
)
36+
37+
const (
38+
waitVolumeAttachmentsInterval = time.Second * 2
39+
waitVolumeAttachmentsTimeout = time.Minute * 5
3140
)
3241

3342
// azureStackHCIVirtualMachineService are list of services required by cluster actuator, easy to create a fake
@@ -90,6 +99,29 @@ func (s *azureStackHCIVirtualMachineService) Delete() error {
9099
Name: s.vmScope.Name(),
91100
}
92101

102+
now := time.Now()
103+
104+
if err := wait.PollImmediate(waitVolumeAttachmentsInterval, waitVolumeAttachmentsTimeout, func() (bool, error) {
105+
volumes, err := s.ListVolumeAttachments()
106+
if err != nil {
107+
s.vmScope.Error(err, "failed to check volume attachment on vm", "vmName", s.vmScope.Name())
108+
return true, nil
109+
}
110+
if len(volumes) == 0 {
111+
s.vmScope.Info("No volume attachments found on vm", "vmName", s.vmScope.Name())
112+
return true, nil
113+
}
114+
for _, volume := range volumes {
115+
s.vmScope.Info("VolumeAttachment is still attached on vm, waiting for the volume to be detached before delete the vm", "volume", volume, "vmName", s.vmScope.Name())
116+
}
117+
return false, nil
118+
}); err != nil {
119+
s.vmScope.Error(err, "failed to wait for volume attachments to be detached on vm", "vmName", s.vmScope.Name())
120+
}
121+
122+
latency := time.Since(now)
123+
s.vmScope.Info("Waiting for volume attachments to be detached on vm took", "vmName", s.vmScope.Name(), "duration", latency.String())
124+
93125
err := s.virtualMachinesSvc.Delete(s.vmScope.Context, vmSpec)
94126
if err != nil {
95127
return errors.Wrapf(err, "failed to delete machine")
@@ -117,6 +149,40 @@ func (s *azureStackHCIVirtualMachineService) Delete() error {
117149
return nil
118150
}
119151

152+
func (s *azureStackHCIVirtualMachineService) ListVolumeAttachments() ([]string, error) {
153+
// target cluster key
154+
clusterKey := client.ObjectKey{
155+
Namespace: s.vmScope.AzureStackHCIVirtualMachine.Namespace,
156+
Name: s.vmScope.AzureStackHCIVirtualMachine.Spec.ClusterName,
157+
}
158+
159+
targetClusterClient, err := infrav1util.NewTargetClusterClient(s.vmScope.Context, s.vmScope.Client(), clusterKey)
160+
if err != nil {
161+
s.vmScope.Error(err, "failed to create target cluster client", "nameSpace", clusterKey.Namespace, "name", clusterKey.Name)
162+
return nil, errors.Wrapf(err, "failed to create target cluster client for cluster %s:%s", clusterKey.Namespace, clusterKey.Name)
163+
}
164+
165+
// get kubernetes node name of the AzureStackHCIVirtualMachine that's being reconciled
166+
nodeName, err := infrav1util.GetNodeName(s.vmScope.Context, s.vmScope.Client(), s.vmScope.AzureStackHCIVirtualMachine.ObjectMeta)
167+
if err != nil {
168+
s.vmScope.Error(err, "failed to get valid node name for vm", "vmName", s.vmScope.Name())
169+
return nil, errors.Wrapf(err, "failed to get node name for vm %s", s.vmScope.Name())
170+
}
171+
172+
if nodeName == "" {
173+
s.vmScope.Info("Node name is empty, skipping volume attachment check", "vmName", s.vmScope.Name())
174+
return nil, nil
175+
}
176+
177+
// get volume attachments from target cluster
178+
volumes, err := infrav1util.ListVolumeAttachmentOnNode(s.vmScope.Context, targetClusterClient, clusterKey, nodeName)
179+
if err != nil {
180+
s.vmScope.Error(err, "failed to check volume attachment on vm", "vmName", s.vmScope.Name())
181+
return nil, errors.Wrapf(err, "failed to check volume attachment on vm %s", s.vmScope.Name())
182+
}
183+
return volumes, nil
184+
}
185+
120186
func (s *azureStackHCIVirtualMachineService) VMIfExists() (*infrav1.VM, error) {
121187

122188
vmSpec := &virtualmachines.Spec{

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ require (
9898
gopkg.in/yaml.v2 v2.4.0 // indirect
9999
gopkg.in/yaml.v3 v3.0.1 // indirect
100100
k8s.io/apiextensions-apiserver v0.29.3 // indirect
101+
k8s.io/cluster-bootstrap v0.29.3 // indirect
101102
k8s.io/component-base v0.29.3 // indirect
102103
k8s.io/klog v1.0.0 // indirect
103104
k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect

pkg/util/util.go

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,30 @@ import (
44
"context"
55
"crypto/rand"
66
"math/big"
7+
"strings"
8+
"time"
79

810
infrav1 "github.com/microsoft/cluster-api-provider-azurestackhci/api/v1beta1"
11+
"github.com/pkg/errors"
12+
13+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
914
"k8s.io/apimachinery/pkg/types"
1015
"k8s.io/apimachinery/pkg/util/uuid"
16+
"k8s.io/client-go/kubernetes"
17+
"k8s.io/client-go/rest"
18+
"k8s.io/client-go/tools/clientcmd"
19+
"k8s.io/utils/pointer"
20+
1121
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
22+
capiutil "sigs.k8s.io/cluster-api/util"
23+
"sigs.k8s.io/cluster-api/util/kubeconfig"
1224
"sigs.k8s.io/controller-runtime/pkg/client"
1325
"sigs.k8s.io/controller-runtime/pkg/controller"
1426
)
1527

1628
const (
17-
charSet = "abcdefghijklmnopqrstuvwxyz0123456789"
29+
charSet = "abcdefghijklmnopqrstuvwxyz0123456789"
30+
diskCsiDriver = "disk.csi.akshci.com"
1831
)
1932

2033
// GetAzureStackHCIMachinesInCluster gets a cluster's AzureStackHCIMachines resources.
@@ -37,6 +50,70 @@ func GetAzureStackHCIMachinesInCluster(ctx context.Context, controllerClient cli
3750
return machines, nil
3851
}
3952

53+
// Create a target cluster config based on the secret in the management cluster
54+
func NewTargetClusterConfig(ctx context.Context, controllerClient client.Reader, clusterKey client.ObjectKey) (*rest.Config, error) {
55+
kubeconfig, err := kubeconfig.FromSecret(ctx, controllerClient, clusterKey)
56+
if err != nil {
57+
return nil, errors.Wrapf(err, "failed to retrieve kubeconfig secret for cluster %s:%s", clusterKey.Namespace, clusterKey.Name)
58+
}
59+
60+
restConfig, err := clientcmd.RESTConfigFromKubeConfig(kubeconfig)
61+
if err != nil {
62+
return nil, errors.Wrapf(err, "failed to create client configuration for cluster %s:%s", clusterKey.Namespace, clusterKey.Name)
63+
}
64+
65+
return restConfig, nil
66+
}
67+
68+
func NewTargetClusterClient(ctx context.Context, controllerClient client.Client, clusterKey client.ObjectKey) (*kubernetes.Clientset, error) {
69+
restConfig, err := NewTargetClusterConfig(ctx, controllerClient, clusterKey)
70+
if err != nil {
71+
return nil, errors.Wrapf(err, "failed to create client configuration for cluster %s:%s", clusterKey.Namespace, clusterKey.Name)
72+
}
73+
74+
// sets the timeout, otherwise this will default to 0 (i.e. no timeout)
75+
restConfig.Timeout = 10 * time.Second
76+
77+
targetClusterClient, err := kubernetes.NewForConfig(restConfig)
78+
if err != nil {
79+
return nil, errors.Wrapf(err, "failed to connect to the cluster %s:%s", clusterKey.Namespace, clusterKey.Name)
80+
}
81+
82+
return targetClusterClient, err
83+
}
84+
85+
// GetNodeName returns the Node Name from the resource's owning CAPI machine object.
86+
func GetNodeName(ctx context.Context, client client.Client, obj metav1.ObjectMeta) (string, error) {
87+
machine, err := capiutil.GetOwnerMachine(ctx, client, obj)
88+
if err != nil {
89+
return "", errors.Wrapf(err, "failed to get owner machine for %s.%s", obj.Namespace, obj.Name)
90+
}
91+
if machine == nil {
92+
return "", errors.Errorf("resource %s.%s has no owning machine", obj.Namespace, obj.Name)
93+
}
94+
if machine.Status.NodeRef == nil {
95+
return "", errors.Errorf("machine %s.%s has no node ref", machine.Namespace, machine.Name)
96+
}
97+
return machine.Status.NodeRef.Name, nil
98+
}
99+
100+
func ListVolumeAttachmentOnNode(ctx context.Context, client *kubernetes.Clientset, clusterKey client.ObjectKey, nodeName string) ([]string, error) {
101+
volumeAttachmentList, err := client.StorageV1().VolumeAttachments().List(ctx, metav1.ListOptions{})
102+
if err != nil {
103+
return nil, errors.Wrapf(err, "failed to list VolumeAttachments for Cluster %s:%s", clusterKey.Namespace, clusterKey.Name)
104+
}
105+
106+
res := []string{}
107+
if volumeAttachmentList != nil && len(volumeAttachmentList.Items) > 0 {
108+
for _, va := range volumeAttachmentList.Items {
109+
if va.Spec.Attacher == diskCsiDriver && strings.EqualFold(va.Spec.NodeName, nodeName) {
110+
res = append(res, pointer.StringDeref(va.Spec.Source.PersistentVolumeName, ""))
111+
}
112+
}
113+
}
114+
return res, nil
115+
}
116+
40117
// RandomAlphaNumericString returns a random alphanumeric string.
41118
func RandomAlphaNumericString(n int) (string, error) {
42119
result := make([]byte, n)

0 commit comments

Comments
 (0)