Skip to content

Commit

Permalink
kola: Introduce support for assigning an MSI to spawned VMs
Browse files Browse the repository at this point in the history
For access to Azure resources.

Signed-off-by: Jeremi Piotrowski <[email protected]>
  • Loading branch information
jepio committed Mar 5, 2025
1 parent 38ffbac commit 0d7eb30
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 14 deletions.
1 change: 1 addition & 0 deletions cmd/kola/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ func init() {
sv(&kola.AzureOptions.ResourceGroup, "azure-resource-group", "", "Deploy resources in an existing resource group")
sv(&kola.AzureOptions.AvailabilitySet, "azure-availability-set", "", "Deploy instances with an existing availibity set")
sv(&kola.AzureOptions.KolaVnet, "azure-kola-vnet", "", "Pass the vnet/subnet that kola is being ran from to restrict network access to created storage accounts")
sv(&kola.AzureOptions.VMIdentity, "azure-vm-identity", "", "Assign a managed identity to the VM by name (will be looked up for its ID)")

// do-specific options
sv(&kola.DOOptions.ConfigPath, "do-config-file", "", "DigitalOcean config file (default \"~/"+auth.DOConfigPath+"\")")
Expand Down
63 changes: 63 additions & 0 deletions platform/api/azure/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v5"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions"
Expand Down Expand Up @@ -59,6 +60,7 @@ type API struct {
ipClient *armnetwork.PublicIPAddressesClient
intClient *armnetwork.InterfacesClient
accClient *armstorage.AccountsClient
msiClient *armmsi.UserAssignedIdentitiesClient
Opts *Options
}

Expand Down Expand Up @@ -194,6 +196,12 @@ func (a *API) SetupClients() error {
}
a.accClient = scf.NewAccountsClient()

mcf, err := armmsi.NewClientFactory(a.subID, a.creds, opts)
if err != nil {
return err
}
a.msiClient = mcf.NewUserAssignedIdentitiesClient()

return nil
}

Expand Down Expand Up @@ -302,3 +310,58 @@ func (a *API) GC(gracePeriod time.Duration) error {

return nil
}

// FindManagedIdentityID searches for a managed identity by name across the subscription
// and returns its resource ID if found
func (a *API) FindManagedIdentityID(identityName string) (string, error) {
ctx := context.TODO()

// Use NewListBySubscriptionPager to search across the entire subscription
pager := a.msiClient.NewListBySubscriptionPager(nil)

for pager.More() {
page, err := pager.NextPage(ctx)
if err != nil {
return "", fmt.Errorf("failed to list managed identities: %v", err)
}

// Check each identity for a name match
for _, identity := range page.Value {
if identity.Name != nil && *identity.Name == identityName {
if identity.ID == nil || *identity.ID == "" {
continue
}

// Extract resource group name from the ID for logging
idParts := strings.Split(*identity.ID, "/")
var resourceGroup string
for i := 0; i < len(idParts)-2; i++ {
if idParts[i] == "resourceGroups" || idParts[i] == "resourcegroups" {
resourceGroup = idParts[i+1]
break
}
}

plog.Infof("Found managed identity %s in resource group %s", identityName, resourceGroup)
return *identity.ID, nil
}
}
}

return "", fmt.Errorf("managed identity %q was not found in the subscription", identityName)
}

// GetManagedIdentityID looks up a managed identity by name in a resource group and returns its resource ID
func (a *API) GetManagedIdentityID(resourceGroup, identityName string) (string, error) {
ctx := context.TODO()
identity, err := a.msiClient.Get(ctx, resourceGroup, identityName, nil)
if err != nil {
return "", fmt.Errorf("failed to get managed identity %q: %v", identityName, err)
}

if identity.ID == nil || *identity.ID == "" {
return "", fmt.Errorf("managed identity %q has no ID", identityName)
}

return *identity.ID, nil
}
29 changes: 23 additions & 6 deletions platform/api/azure/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (a *API) getVMRG(rg string) string {
return vmrg
}

func (a *API) getVMParameters(name, sshkey, storageAccountURI string, userdata *conf.Conf, ip *armnetwork.PublicIPAddress, nic *armnetwork.Interface) armcompute.VirtualMachine {
func (a *API) getVMParameters(name, sshkey, storageAccountURI string, userdata *conf.Conf, ip *armnetwork.PublicIPAddress, nic *armnetwork.Interface, managedIdentityID string) armcompute.VirtualMachine {
osProfile := armcompute.OSProfile{
AdminUsername: to.Ptr("core"),
ComputerName: &name,
Expand Down Expand Up @@ -113,6 +113,8 @@ func (a *API) getVMParameters(name, sshkey, storageAccountURI string, userdata *
plog.Warningf("failed to get image info: %v; continuing", err)
}
}

// Set up the VM configuration
vm := armcompute.VirtualMachine{
Name: &name,
Location: &a.Opts.Location,
Expand Down Expand Up @@ -155,15 +157,15 @@ func (a *API) getVMParameters(name, sshkey, storageAccountURI string, userdata *
},
}

// Configure disk controller if specified
switch a.Opts.DiskController {
case "nvme":
vm.Properties.StorageProfile.DiskControllerType = to.Ptr(armcompute.DiskControllerTypesNVMe)
case "scsi":
vm.Properties.StorageProfile.DiskControllerType = to.Ptr(armcompute.DiskControllerTypesSCSI)
}

// I don't think it would be an issue to have empty user-data set but better
// to be safe than sorry.
// Configure user data or custom data
if ud != "" {
if a.Opts.UseUserData && userdata.IsIgnition() {
plog.Infof("using user-data")
Expand All @@ -174,15 +176,29 @@ func (a *API) getVMParameters(name, sshkey, storageAccountURI string, userdata *
}
}

// Configure availability set if specified
availabilitySetID := a.getAvset()
if availabilitySetID != "" {
vm.Properties.AvailabilitySet = &armcompute.SubResource{ID: &availabilitySetID}
}

// Configure managed identity if specified
if managedIdentityID != "" {
plog.Infof("Assigning managed identity to VM (using pre-looked-up ID)")

// Configure the VM with the user assigned managed identity
vm.Identity = &armcompute.VirtualMachineIdentity{
Type: to.Ptr(armcompute.ResourceIdentityTypeUserAssigned),
UserAssignedIdentities: map[string]*armcompute.UserAssignedIdentitiesValue{
managedIdentityID: {},
},
}
}

return vm
}

func (a *API) CreateInstance(name, sshkey, resourceGroup, storageAccount string, userdata *conf.Conf, network Network) (*Machine, error) {
func (a *API) CreateInstance(name, sshkey, resourceGroup, storageAccount string, userdata *conf.Conf, network Network, managedIdentityID string) (*Machine, error) {
// only VMs are created in the user supplied resource group, kola still manages a resource group
// for the gallery and storage account.
vmResourceGroup := a.getVMRG(resourceGroup)
Expand All @@ -204,7 +220,8 @@ func (a *API) CreateInstance(name, sshkey, resourceGroup, storageAccount string,
return nil, fmt.Errorf("couldn't get NIC name")
}

vmParams := a.getVMParameters(name, sshkey, fmt.Sprintf("https://%s.blob.core.windows.net/", storageAccount), userdata, ip, nic)
// Pass the managedIdentityID to getVMParameters
vmParams := a.getVMParameters(name, sshkey, fmt.Sprintf("https://%s.blob.core.windows.net/", storageAccount), userdata, ip, nic, managedIdentityID)
plog.Infof("Creating Instance %s", name)

clean := func() {
Expand Down Expand Up @@ -317,6 +334,7 @@ func (a *API) GetConsoleOutput(name, resourceGroup, storageAccount string) ([]by
if err != nil {
return nil, err
}

var data io.ReadCloser
err = util.Retry(6, 10*time.Second, func() error {
data, err = GetBlob(client, container, blobname)
Expand All @@ -331,6 +349,5 @@ func (a *API) GetConsoleOutput(name, resourceGroup, storageAccount string) ([]by
if err != nil {
return nil, err
}

return io.ReadAll(data)
}
2 changes: 2 additions & 0 deletions platform/api/azure/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,6 @@ type Options struct {
ResourceGroup string
// AvailabilitySet is an existing availability set to deploy the instance in.
AvailabilitySet string
// VMIdentity is the name of a managed identity to assign to the VM.
VMIdentity string
}
14 changes: 8 additions & 6 deletions platform/machine/azure/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ import (

type cluster struct {
*platform.BaseCluster
flight *flight
sshKey string
ResourceGroup string
StorageAccount string
Network azure.Network
flight *flight
sshKey string
ResourceGroup string
StorageAccount string
Network azure.Network
ManagedIdentityID string // Add managed identity ID field to cluster struct
}

func (ac *cluster) vmname() string {
Expand All @@ -48,7 +49,8 @@ func (ac *cluster) NewMachine(userdata *conf.UserData) (platform.Machine, error)
return nil, err
}

instance, err := ac.flight.Api.CreateInstance(ac.vmname(), ac.sshKey, ac.ResourceGroup, ac.StorageAccount, conf, ac.Network)
// Pass the managed identity ID to the CreateInstance method
instance, err := ac.flight.Api.CreateInstance(ac.vmname(), ac.sshKey, ac.ResourceGroup, ac.StorageAccount, conf, ac.Network, ac.ManagedIdentityID)
if err != nil {
return nil, err
}
Expand Down
17 changes: 15 additions & 2 deletions platform/machine/azure/flight.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type flight struct {
ImageResourceGroup string
ImageStorageAccount string
Network azure.Network
ManagedIdentityID string // Store the managed identity resource ID
}

// NewFlight creates an instance of a Flight suitable for spawning
Expand Down Expand Up @@ -79,6 +80,17 @@ func NewFlight(opts *azure.Options) (platform.Flight, error) {
return nil, err
}

// If a managed identity is specified, look it up across all resource groups
// and fail fast if it can't be found
if opts.VMIdentity != "" {
plog.Infof("Looking up managed identity %q", opts.VMIdentity)
af.ManagedIdentityID, err = api.FindManagedIdentityID(opts.VMIdentity)
if err != nil {
return nil, fmt.Errorf("failed to find managed identity: %v", err)
}
plog.Infof("Found managed identity with ID %s", af.ManagedIdentityID)
}

if opts.BlobURL != "" || opts.ImageFile != "" {
imageName := fmt.Sprintf("%v", time.Now().UnixNano())
blobName := imageName + ".vhd"
Expand Down Expand Up @@ -152,8 +164,9 @@ func (af *flight) NewCluster(rconf *platform.RuntimeConfig) (platform.Cluster, e
}

ac := &cluster{
BaseCluster: bc,
flight: af,
BaseCluster: bc,
flight: af,
ManagedIdentityID: af.ManagedIdentityID,
}

if !rconf.NoSSHKeyInMetadata {
Expand Down

0 comments on commit 0d7eb30

Please sign in to comment.