Skip to content

Commit

Permalink
Enable expanding root disk root_disk_size_gb
Browse files Browse the repository at this point in the history
Applications such as Nvidia's libraries, required for many AI / LLM
applications, aren't easily installed on anywhere but the root disk;
however, the amount of free space in the root disk of a Jammy stemcell
is too small to accommodate these libraries.

To address this, we introduce a new property, `root_disk_size_gb`.
Setting this property will expand the root disk during VM creation (in
the `create_vm` CPI method, before the BOSH packages are installed).

Typical use (in a Cloud Config):

```yaml
vm_extensions:
- name: 20G_root
  cloud_properties:
    root_disk_size_gb: 20
```

Setting this property disables the VM's "linked clone" feature [0],
which is incompatible with extending the size of the root disk. The VM
will therefore require more disk space than expected, typically the size
of the root disk (in the above example, 20 GiB)

Adding this feature brings the vSphere CPI to parity with the AWS,
Azure, and GCP CPIs.

Root disk size is measured in GiB (1,073,741,824 bytes), not in GB
(1,000,000,000 bytes).

Drive-by:

Removed unnecessary initialization, `config_spec.device_change = []`:
`ConfigSpec` class initializes `device_change` to an empty array; we
don't need to do it.

[0] A linked clone is a copy of a virtual machine that shares virtual
disks with the parent virtual machine. Only changes from the parent disk
are recorded, saving on disk space.

Signed-off-by: Brian Cunnie <[email protected]>
  • Loading branch information
cunnie committed Feb 1, 2024
1 parent dcb5f87 commit bf35f00
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 13 deletions.
4 changes: 4 additions & 0 deletions src/vsphere_cpi/lib/cloud/vsphere/vm_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ def pci_passthroughs
vm_type.pci_passthroughs || []
end

def root_disk_size_gb
vm_type.root_disk_size_gb.to_i
end

def storage_policy_name
@manifest_params[:storage_policy]
end
Expand Down
15 changes: 12 additions & 3 deletions src/vsphere_cpi/lib/cloud/vsphere/vm_creator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ def create(vm_config)

# Clone VM
logger.info("Cloning vm: #{replicated_stemcell_vm} to #{vm_config.name}")
# Don't link clone if expanding root disk, otherwise "Invalid operation for device '0'. Disks with parents cannot be expanded."
linked = vm_config.root_disk_size_gb > 0 ? false : true
created_vm_mob = @client.wait_for_task do
@cpi.clone_vm(
replicated_stemcell_vm.mob,
Expand All @@ -191,7 +193,7 @@ def create(vm_config)
cluster.resource_pool.mob,
datastore: datastore.mob,
host:,
linked: true,
linked: linked,
snapshot: snapshot.current_snapshot,
config: config_spec,
datastore_cluster:
Expand Down Expand Up @@ -251,7 +253,6 @@ def create(vm_config)
# Jammy stemcell at hardware version 13 only allows 1 vGPU; we want to be able to add more
unless vm_config.vgpus.empty?
config_spec = VimSdk::Vim::Vm::ConfigSpec.new
config_spec.device_change = []
vm_config.vgpus.each do |vgpu|
vgpu = Resources::PCIPassthrough.create_vgpu(vgpu)
vgpu_config = Resources::VM.create_add_device_spec(vgpu)
Expand All @@ -261,7 +262,6 @@ def create(vm_config)
end
unless vm_config.pci_passthroughs.empty?
config_spec = VimSdk::Vim::Vm::ConfigSpec.new
config_spec.device_change = []
vm_config.pci_passthroughs.each do |pci_passthrough|
virtual_pci_passthrough = Resources::PCIPassthrough.create_pci_passthrough(
vendor_id: pci_passthrough['vendor_id'],
Expand All @@ -271,6 +271,15 @@ def create(vm_config)
end
@client.reconfig_vm(created_vm_mob, config_spec)
end

if vm_config.root_disk_size_gb > 0
device_spec = Resources::VM.create_edit_device_spec(created_vm.system_disk)
device_spec.device.capacity_in_kb = vm_config.root_disk_size_gb * 2 ** 20 # GiB → kiB
config_spec = VimSdk::Vim::Vm::ConfigSpec.new
config_spec.device_change << device_spec
@client.reconfig_vm(created_vm_mob, config_spec)
end

# DRS Rules
create_drs_rules(vm_config, created_vm.mob, cluster)

Expand Down
1 change: 1 addition & 0 deletions src/vsphere_cpi/lib/cloud/vsphere/vm_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def initialize(datacenter, cloud_properties, pbm)
nsxt
pci_passthroughs
ram
root_disk_size_gb
storage_policy
tags
upgrade_hw_version
Expand Down
37 changes: 37 additions & 0 deletions src/vsphere_cpi/spec/integration/root_size_disk_gb_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
require 'integration/spec_helper'

describe 'root_disk_size_gb property' do

let(:vm_type) do
{
'ram' => 512,
'disk' => 2048,
'cpu' => 1,
}
end

context 'when "root_disk_size_gb" is not set' do
it 'creates a VM whose system disk is a linked-clone to the stemcell' do
simple_vm_lifecycle(@cpi, @vlan, vm_type) do |vm_id|
vm = @cpi.vm_provider.find(vm_id)
stemcell = @cpi.vm_provider.find(@stemcell_id)
system_disk = vm.system_disk
stemcell_disk = stemcell.system_disk
expect(system_disk.backing.parent.uuid).to eq(stemcell_disk.backing.parent.uuid)
end
end
end

context 'when "root_disk_size_gb" is set' do
let(:root_disk_size_gb) { 15 }
it 'creates a VM whose system disk is a linked-clone to the stemcell' do
vm_type['root_disk_size_gb'] = root_disk_size_gb
simple_vm_lifecycle(@cpi, @vlan, vm_type) do |vm_id|
vm = @cpi.vm_provider.find(vm_id)
system_disk = vm.system_disk
expect(system_disk.backing.parent).to be_nil # no parent disk, not a linked-clone
expect(system_disk.capacity_in_kb / 1024 / 1024).to eq(root_disk_size_gb) # convert kiB → GiB
end
end
end
end
49 changes: 39 additions & 10 deletions src/vsphere_cpi/spec/unit/cloud/vsphere/vm_creator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,15 @@ module VSphereCloud
)
}
let(:agent_env) { instance_double('VSphereCloud::AgentEnv') }
let(:config) { [ 'default_disk_type'=> default_disk_type, 'resource_pool' => 'test', 'datacenters'=> [ datacenter ],'host' => 'localhost', 'user' => 'admin', 'password' => 'password' ] }
let(:config) {
[
default_disk_type: default_disk_type,
resource_pool: 'test',
datacenters: [datacenter],
host: 'localhost',
user: 'admin',
password: 'password'
] }
let(:cpi_config) do
instance_double(
'VSphereCloud::Config',
Expand All @@ -36,6 +44,13 @@ module VSphereCloud
let(:custom_fields_manager) { instance_double('VimSdk::Vim::CustomFieldsManager') }
let(:datacenter) { { mob: datacenter_mob , name: 'dc-1', 'name' => 'dc-1', 'persistent_datastore_pattern' => 'ds-ps-*', 'datastore_pattern'=> 'ds-*', 'vm_folder'=> 'bosh_vms', 'template_folder'=> 'stemcells', 'disk_path' => 'disks', 'clusters' => [] } }
let(:datacenter_mob) { instance_double('VimSdk::Vim::Datacenter', name: 'dc-1') }
let(:cluster) { instance_double(Resources::Cluster, mob: cluster_mob,
resource_pool: resource_pool, host_group: nil, accessible_datastores: { "ds-1": {}, "ds-2": {} }) }
let(:cluster_mob) { instance_double('VimSdk::Vim::ClusterComputeResource') }
let(:resource_pool) { instance_double(Resources::ResourcePool, mob: resource_pool_mob) }
let(:resource_pool_mob) { instance_double(VimSdk::Vim::ResourcePool) }
let(:datastore) { instance_double(Resources::Datastore, name: 'ds-1', mob: datastore_mob) }
let(:datastore_mob) { instance_double(VimSdk::Vim::Datastore) }
let(:default_disk_type) { 'preallocated' }
let(:ip_conflict_detector) { instance_double(IPConflictDetector, ensure_no_conflicts: nil) }
let(:ensure_no_ip_conflicts) { true }
Expand Down Expand Up @@ -162,13 +177,13 @@ module VSphereCloud

cluster_placements: [
instance_double(VmPlacement,
cluster: instance_double(Resources::Cluster, host_group: nil, mob: nil, accessible_datastores: {"ds-1": {}, "ds-2": {}}),
fallback_disk_placements: [instance_double(Resources::Datastore, name: "ds-2")],
disk_placement: instance_double(Resources::Datastore, name: "ds-1")
cluster: cluster,
disk_placement: datastore,
fallback_disk_placements: [instance_double(Resources::Datastore, name: "ds-2")]
)
],
stemcell_cid: 'here-stemcell-cid'

stemcell_cid: 'here-stemcell-cid',
root_disk_size_gb: 0
)
}

Expand Down Expand Up @@ -199,7 +214,21 @@ module VSphereCloud
expect(client).to receive(:upgrade_vm_virtual_hardware).with(cloned_vm_mob)
expect(client).to receive(:reconfig_vm).with(cloned_vm_mob, anything)
subject.create(vm_config)
# expect{subject.create(vm_config)}.not_to raise_exception
end
end

context 'with root_disk_size_gb set to 15 GiB' do
let(:system_disk) { instance_double('VimSdk::Vim::Vm::Device::VirtualDisk') }
let(:device_spec) { instance_double(VimSdk::Vim::Vm::Device::VirtualDeviceSpec, device: system_disk) }
let(:new_disk_size_gb) { 15 }
before do
allow(vm_config).to receive(:root_disk_size_gb).and_return(new_disk_size_gb)
allow(Resources::VM).to receive(:create_edit_device_spec).and_return(device_spec)
end
it "reconfigures the VM with a larger root disk and we don't check linked clones because the mocking is already too much" do
expect(system_disk).to receive(:capacity_in_kb=).with(new_disk_size_gb * 2 ** 20) # convert kiB → GiB
expect(client).to receive(:reconfig_vm).with(cloned_vm_mob, anything)
subject.create(vm_config)
end
end

Expand Down Expand Up @@ -243,13 +272,13 @@ module VSphereCloud

cluster_placements: [
instance_double(VmPlacement,
cluster: instance_double(Resources::Cluster, host_group: nil, mob: nil, accessible_datastores: {"ds-1": {}, "ds-2": {}}),
cluster: cluster,
fallback_disk_placements: [],
disk_placement: instance_double(Resources::Datastore, name: "ds-1")
)
],
stemcell_cid: 'here-stemcell-cid'

stemcell_cid: 'here-stemcell-cid',
root_disk_size_gb: 0
)
}
it 'still fails if there are no viable fallback_disk_placements' do
Expand Down

0 comments on commit bf35f00

Please sign in to comment.