|
| 1 | +--- |
| 2 | +title: azure-data-disk |
| 3 | +authors: |
| 4 | +- "@jcpowermac" |
| 5 | +reviewers: |
| 6 | +- "@JoelSpeed" |
| 7 | +- "@patrickdillon" |
| 8 | +approvers: |
| 9 | +- "@JoelSpeed" |
| 10 | +- "@patrickdillon" |
| 11 | +api-approvers: |
| 12 | +- "@JoelSpeed" |
| 13 | +creation-date: 2025-04-22 |
| 14 | +last-updated: 2025-04-22 |
| 15 | +tracking-link: |
| 16 | +- https://issues.redhat.com/browse/SPLAT-2133 |
| 17 | +see-also: |
| 18 | +replaces: |
| 19 | +superseded-by: |
| 20 | +--- |
| 21 | + |
| 22 | +# Azure Multi Disk |
| 23 | + |
| 24 | +## Summary |
| 25 | + |
| 26 | + |
| 27 | +## Motivation |
| 28 | + |
| 29 | +As the use of Kubernetes clusters grows, admins are needing more and more improvements to the VMs themselves to make sure they run as smoothly as possible. The number of cores and memory continue to increase for each machine and this is causing the amount of workloads to increase on each virtual machine. This growth is now causing the base VM image to not provide enough storage for OS needs. In some cases, users just increase the size of the primary disk using the existing configuration options for machines; however, this does not allows for all desired configuration choices. Admins are now wanting the ability to add additional disks to these VMs for things such as etcd storage, image storage, container runtime and even swap. |
| 30 | + |
| 31 | +### User Stories |
| 32 | + |
| 33 | +* As an OpenShift administrator, I want to be able to add additional disks to any of the azure VMs which are acting as a node so nodes can have additional disks for me to use to assign special case storage such as etcd data, swap, container images, etc. |
| 34 | + |
| 35 | +### Goals |
| 36 | + |
| 37 | + |
| 38 | +### Non-Goals |
| 39 | + |
| 40 | + |
| 41 | +## Proposal |
| 42 | + |
| 43 | + |
| 44 | +### Workflow Description |
| 45 | + |
| 46 | + |
| 47 | +### API Extensions |
| 48 | + |
| 49 | +This enhancement will be enhancing the installer's CRD / type used for the install-config.yaml. |
| 50 | + |
| 51 | +#### Installer |
| 52 | + |
| 53 | +The installer's install-config will be enhanced to allow the azure machine pools to define data disks. |
| 54 | + |
| 55 | +```go |
| 56 | +// MachinePool stores the configuration for a machine pool installed |
| 57 | +// on Azure. |
| 58 | +type MachinePool struct { |
| 59 | + ... |
| 60 | + // DataDisk specifies the parameters that are used to add one or more data disks to the machine. |
| 61 | + // +optional |
| 62 | + DataDisks []capz.DataDisk `json:"dataDisks,omitempty"` |
| 63 | +} |
| 64 | +``` |
| 65 | + |
| 66 | +Converting capz `DataDisk` to mapi `DataDisk` |
| 67 | + |
| 68 | +```go |
| 69 | + |
| 70 | + var dataDisks []machineapi.DataDisk |
| 71 | + |
| 72 | + for _, disk := range mpool.DataDisks { |
| 73 | + dataDisk := machineapi.DataDisk{ |
| 74 | + NameSuffix: disk.NameSuffix, |
| 75 | + DiskSizeGB: disk.DiskSizeGB, |
| 76 | + CachingType: machineapi.CachingTypeOption(disk.CachingType), |
| 77 | + |
| 78 | + // TODO: jcallen ** WARNING ** does this make sense to force delete? |
| 79 | + // TODO: jcallen why does mapi have this and capz does not? |
| 80 | + DeletionPolicy: machineapi.DiskDeletionPolicyTypeDelete, |
| 81 | + } |
| 82 | + |
| 83 | + if disk.Lun != nil { |
| 84 | + dataDisk.Lun = *disk.Lun |
| 85 | + } |
| 86 | + |
| 87 | + if disk.ManagedDisk != nil { |
| 88 | + dataDisk.ManagedDisk = machineapi.DataDiskManagedDiskParameters{ |
| 89 | + StorageAccountType: machineapi.StorageAccountType(disk.ManagedDisk.StorageAccountType), |
| 90 | + } |
| 91 | + |
| 92 | + if disk.ManagedDisk.DiskEncryptionSet != nil { |
| 93 | + dataDisk.ManagedDisk.DiskEncryptionSet = (*machineapi.DiskEncryptionSetParameters)(disk.ManagedDisk.SecurityProfile.DiskEncryptionSet) |
| 94 | + } |
| 95 | + } |
| 96 | + |
| 97 | + dataDisks = append(dataDisks, dataDisk) |
| 98 | + } |
| 99 | + |
| 100 | + spec := &machineapi.AzureMachineProviderSpec{ |
| 101 | + TypeMeta: metav1.TypeMeta{ |
| 102 | + APIVersion: "machine.openshift.io/v1beta1", |
| 103 | + Kind: "AzureMachineProviderSpec", |
| 104 | + }, |
| 105 | + ... |
| 106 | + DataDisks: dataDisks, |
| 107 | + } |
| 108 | + |
| 109 | +``` |
| 110 | + |
| 111 | +capz machine spec change |
| 112 | + |
| 113 | +```go |
| 114 | + for idx := int64(0); idx < total; idx++ { |
| 115 | + zone := mpool.Zones[int(idx)%len(mpool.Zones)] |
| 116 | + azureMachine := &capz.AzureMachine{ |
| 117 | + ObjectMeta: metav1.ObjectMeta{ |
| 118 | + Name: fmt.Sprintf("%s-%s-%d", clusterID, in.Pool.Name, idx), |
| 119 | + Labels: map[string]string{ |
| 120 | + "cluster.x-k8s.io/control-plane": "", |
| 121 | + "cluster.x-k8s.io/cluster-name": clusterID, |
| 122 | + }, |
| 123 | + }, |
| 124 | + Spec: capz.AzureMachineSpec{ |
| 125 | + ... |
| 126 | + DataDisks: mpool.DataDisks, |
| 127 | + }, |
| 128 | + } |
| 129 | +``` |
| 130 | +
|
| 131 | +### Topology Considerations |
| 132 | +
|
| 133 | +#### Hypershift / Hosted Control Planes |
| 134 | +
|
| 135 | +N/A |
| 136 | +
|
| 137 | +#### Standalone Clusters |
| 138 | +
|
| 139 | +N/A |
| 140 | +
|
| 141 | +#### Single-node Deployments or MicroShift |
| 142 | +
|
| 143 | +N/A |
| 144 | +
|
| 145 | +### Implementation Details/Notes/Constraints |
| 146 | +
|
| 147 | +
|
| 148 | +### Risks and Mitigations |
| 149 | +
|
| 150 | +This feature of allowing administrators to add new disks does not really introduce any risks. The disks will be created and added to the VMs during the cloning process. Once the VM is configured, the administrator can configure these disks to be used however they wish. The assignment of these disks is out of scope for this feature. |
| 151 | +
|
| 152 | +### Drawbacks |
| 153 | +
|
| 154 | +N/A |
| 155 | +
|
| 156 | +## Open Questions [optional] |
| 157 | +
|
| 158 | +
|
| 159 | +## Test Plan |
| 160 | +
|
| 161 | +## Graduation Criteria |
| 162 | +
|
| 163 | +### Dev Preview -> Tech Preview |
| 164 | +
|
| 165 | +- Installer allows configuration of data disks |
| 166 | +- CI jobs for testing installation with data disks configured |
| 167 | +- End user documentation, relative API stability |
| 168 | +- Sufficient test coverage |
| 169 | +
|
| 170 | +### Tech Preview -> GA |
| 171 | +
|
| 172 | +- More testing (upgrade, downgrade, scale) |
| 173 | +- Sufficient time for feedback |
| 174 | +- Available by default |
| 175 | +- User facing documentation created in OCP documentation |
| 176 | +- E2E tests are added for testing compute nodes with data disks |
| 177 | +
|
| 178 | +**For non-optional features moving to GA, the graduation criteria must include |
| 179 | +end to end tests.** |
| 180 | +
|
| 181 | +### Removing a deprecated feature |
| 182 | +
|
| 183 | +N/A |
| 184 | +
|
| 185 | +## Upgrade / Downgrade Strategy |
| 186 | +
|
| 187 | +The upgrade / downgrade process is not being impacted by this feature. No changes will need to be made if rolling back during a failed upgrade. |
| 188 | +
|
| 189 | +## Version Skew Strategy |
| 190 | +
|
| 191 | +N/A |
| 192 | +
|
| 193 | +## Support Procedures |
| 194 | +
|
| 195 | +N/A |
| 196 | +
|
| 197 | +## Alternatives |
| 198 | +
|
| 199 | +N/A |
| 200 | +
|
| 201 | +## Infrastructure Needed [optional] |
| 202 | +
|
| 203 | +
|
| 204 | +## Alternatives (Not Implemented)" |
| 205 | +
|
| 206 | +n/a |
| 207 | +
|
| 208 | +## Operational Aspects of API Extensions" |
| 209 | +N/A |
| 210 | +
|
| 211 | +
|
| 212 | +
|
0 commit comments