Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cherry-pick feat: delete acr and recreate if cache rule is wrong #5373

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 67 additions & 21 deletions e2e/aks_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func getBaseClusterModel(clusterName string) *armcontainerservice.ManagedCluster
}

func addAirgapNetworkSettings(ctx context.Context, t *testing.T, clusterModel *armcontainerservice.ManagedCluster) error {
t.Logf("Adding network settings for airgap cluster %s in rg %s\n", *clusterModel.Name, *clusterModel.Properties.NodeResourceGroup)
t.Logf("Adding network settings for airgap cluster %s in rg %s", *clusterModel.Name, *clusterModel.Properties.NodeResourceGroup)

vnet, err := getClusterVNet(ctx, *clusterModel.Properties.NodeResourceGroup)
if err != nil {
Expand Down Expand Up @@ -151,7 +151,7 @@ func airGapSecurityGroup(location, clusterFQDN string) (armnetwork.SecurityGroup
}

func addPrivateEndpointForACR(ctx context.Context, t *testing.T, nodeResourceGroup string, vnet VNet) error {
t.Logf("Checking if private endpoint for private container registry is in rg %s\n", nodeResourceGroup)
t.Logf("Checking if private endpoint for private container registry is in rg %s", nodeResourceGroup)

var err error
var exists bool
Expand Down Expand Up @@ -192,7 +192,7 @@ func addPrivateEndpointForACR(ctx context.Context, t *testing.T, nodeResourceGro
func privateEndpointExists(ctx context.Context, t *testing.T, nodeResourceGroup, privateEndpointName string) (bool, error) {
existingPE, err := config.Azure.PrivateEndpointClient.Get(ctx, nodeResourceGroup, privateEndpointName, nil)
if err == nil && existingPE.ID != nil {
t.Logf("Private Endpoint already exists with ID: %s\n", *existingPE.ID)
t.Logf("Private Endpoint already exists with ID: %s", *existingPE.ID)
return true, nil
}
if err != nil && !strings.Contains(err.Error(), "ResourceNotFound") {
Expand All @@ -201,19 +201,34 @@ func privateEndpointExists(ctx context.Context, t *testing.T, nodeResourceGroup,
return false, nil
}

func createPrivateAzureContainerRegistry(ctx context.Context, t *testing.T, resourceGroup, privateACRName string) error {
t.Logf("Creating private Azure Container Registry in rg %s\n", resourceGroup)
func createPrivateAzureContainerRegistry(ctx context.Context, t *testing.T, cluster *armcontainerservice.ManagedCluster, resourceGroup, privateACRName string) error {
t.Logf("Creating private Azure Container Registry in rg %s", resourceGroup)

acr, err := config.Azure.RegistriesClient.Get(ctx, resourceGroup, privateACRName, nil)
if err == nil {
t.Logf("Private ACR already exists at id %s, skipping creation", *acr.ID)
return nil
}

// check if error is anything but not found
var azErr *azcore.ResponseError
if errors.As(err, &azErr) && azErr.StatusCode != 404 {
return fmt.Errorf("failed to get private ACR: %w", err)
err, recreateACR := shouldRecreateACR(ctx, t, resourceGroup, privateACRName)
if err != nil {
return fmt.Errorf("failed to check cache rules: %w", err)
}
if !recreateACR {
t.Logf("Private ACR already exists at id %s, skipping creation", *acr.ID)
return nil
}
t.Logf("Private ACR exists with the wrong cache deleting...")
if err := deletePrivateAzureContainerRegistry(ctx, t, resourceGroup, privateACRName); err != nil {
return fmt.Errorf("failed to delete private acr: %w", err)
}
// if ACR gets recreated so should the cluster
t.Logf("Private ACR deleted, deleting cluster %s", *cluster.Name)
if err := deleteCluster(ctx, t, cluster); err != nil {
return fmt.Errorf("failed to delete cluster: %w", err)
}
} else {
// check if error is anything but not found
var azErr *azcore.ResponseError
if errors.As(err, &azErr) && azErr.StatusCode != 404 {
return fmt.Errorf("failed to get private ACR: %w", err)
}
}

t.Logf("ACR does not exist, creating...")
Expand Down Expand Up @@ -242,20 +257,51 @@ func createPrivateAzureContainerRegistry(ctx context.Context, t *testing.T, reso
return fmt.Errorf("failed to create private ACR during polling: %w", err)
}

t.Logf("Private Azure Container Registry created\n")
t.Logf("Private Azure Container Registry created")
if err := addCacheRuelsToPrivateAzureContainerRegistry(ctx, t, config.ResourceGroupName, config.PrivateACRName); err != nil {
return fmt.Errorf("failed to add cache rules to private acr: %w", err)
}
return nil
}

func deletePrivateAzureContainerRegistry(ctx context.Context, t *testing.T, resourceGroup, privateACRName string) error {
t.Logf("Deleting private Azure Container Registry in rg %s", resourceGroup)

pollerResp, err := config.Azure.RegistriesClient.BeginDelete(ctx, resourceGroup, privateACRName, nil)
if err != nil {
return fmt.Errorf("failed to delete private ACR: %w", err)
}
_, err = pollerResp.PollUntilDone(ctx, nil)
if err != nil {
return fmt.Errorf("failed to delete private ACR during polling: %w", err)
}
t.Logf("Private Azure Container Registry deleted")
return nil
}

// if the ACR needs to be recreated so does the airgap k8s cluster
func shouldRecreateACR(ctx context.Context, t *testing.T, resourceGroup, privateACRName string) (error, bool) {
t.Logf("Checking if private Azure Container Registry cache rules are correct in rg %s", resourceGroup)

cacheRules, err := config.Azure.CacheRulesClient.Get(ctx, resourceGroup, privateACRName, "aks-managed-rule", nil)
if err != nil {
return fmt.Errorf("failed to get cache rules: %w", err), false
}
if cacheRules.Properties != nil && cacheRules.Properties.TargetRepository != nil && *cacheRules.Properties.TargetRepository != config.Config.AzureContainerRegistrytargetRepository {
t.Logf("Private ACR cache is not correct: %s", *cacheRules.Properties.TargetRepository)
return nil, true
}
t.Logf("Private ACR cache is correct")
return nil, false
}

func addCacheRuelsToPrivateAzureContainerRegistry(ctx context.Context, t *testing.T, resourceGroup, privateACRName string) error {
t.Logf("Adding cache rules to private Azure Container Registry in rg %s\n", resourceGroup)
t.Logf("Adding cache rules to private Azure Container Registry in rg %s", resourceGroup)

cacheParams := armcontainerregistry.CacheRule{
Properties: &armcontainerregistry.CacheRuleProperties{
SourceRepository: to.Ptr("mcr.microsoft.com/*"),
TargetRepository: to.Ptr("aks/*"),
TargetRepository: to.Ptr(config.Config.AzureContainerRegistrytargetRepository),
},
}
cacheCreateResp, err := config.Azure.CacheRulesClient.BeginCreate(
Expand All @@ -274,12 +320,12 @@ func addCacheRuelsToPrivateAzureContainerRegistry(ctx context.Context, t *testin
return fmt.Errorf("failed to create cache rule in polling: %w", err)
}

t.Logf("Cache rule created\n")
t.Logf("Cache rule created")
return nil
}

func createPrivateEndpoint(ctx context.Context, t *testing.T, nodeResourceGroup, privateEndpointName, acrName string, vnet VNet) (armnetwork.PrivateEndpointsClientCreateOrUpdateResponse, error) {
t.Logf("Creating Private Endpoint in rg %s\n", nodeResourceGroup)
t.Logf("Creating Private Endpoint in rg %s", nodeResourceGroup)
acrID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.ContainerRegistry/registries/%s", config.Config.SubscriptionID, config.ResourceGroupName, acrName)

peParams := armnetwork.PrivateEndpoint{
Expand Down Expand Up @@ -315,7 +361,7 @@ func createPrivateEndpoint(ctx context.Context, t *testing.T, nodeResourceGroup,
return armnetwork.PrivateEndpointsClientCreateOrUpdateResponse{}, fmt.Errorf("failed to create private endpoint in polling: %w", err)
}

t.Logf("Private Endpoint created or updated with ID: %s\n", *resp.ID)
t.Logf("Private Endpoint created or updated with ID: %s", *resp.ID)
return resp, nil
}

Expand All @@ -338,7 +384,7 @@ func createPrivateZone(ctx context.Context, t *testing.T, nodeResourceGroup, pri
return armprivatedns.PrivateZonesClientCreateOrUpdateResponse{}, fmt.Errorf("failed to create private dns zone in polling: %w", err)
}

t.Logf("Private DNS Zone created or updated with ID: %s\n", *resp.ID)
t.Logf("Private DNS Zone created or updated with ID: %s", *resp.ID)
return resp, nil
}

Expand Down Expand Up @@ -373,7 +419,7 @@ func createPrivateDNSLink(ctx context.Context, t *testing.T, vnet VNet, nodeReso
return fmt.Errorf("failed to create virtual network link in polling: %w", err)
}

t.Logf("Virtual Network Link created or updated with ID: %s\n", *resp.ID)
t.Logf("Virtual Network Link created or updated with ID: %s", *resp.ID)
return nil
}

Expand Down
45 changes: 39 additions & 6 deletions e2e/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func prepareCluster(ctx context.Context, t *testing.T, cluster *armcontainerserv

// private acr must be created before we add the debug daemonsets
if isAirgap {
if err := createPrivateAzureContainerRegistry(ctx, t, config.ResourceGroupName, config.PrivateACRName); err != nil {
if err := createPrivateAzureContainerRegistry(ctx, t, cluster, config.ResourceGroupName, config.PrivateACRName); err != nil {
return nil, fmt.Errorf("failed to create private acr: %w", err)
}
}
Expand Down Expand Up @@ -164,7 +164,7 @@ func getOrCreateCluster(ctx context.Context, t *testing.T, cluster *armcontainer
if err != nil {
return nil, fmt.Errorf("failed to get cluster %q: %w", *cluster.Name, err)
}
t.Logf("cluster %s already exists in rg %s\n", *cluster.Name, config.ResourceGroupName)
t.Logf("cluster %s already exists in rg %s", *cluster.Name, config.ResourceGroupName)
switch *existingCluster.Properties.ProvisioningState {
case "Succeeded":
return &existingCluster.ManagedCluster, nil
Expand All @@ -176,8 +176,41 @@ func getOrCreateCluster(ctx context.Context, t *testing.T, cluster *armcontainer
}
}

func deleteCluster(ctx context.Context, t *testing.T, cluster *armcontainerservice.ManagedCluster) error {
t.Logf("deleting cluster %s in rg %s", *cluster.Name, config.ResourceGroupName)
_, err := config.Azure.AKS.Get(ctx, config.ResourceGroupName, *cluster.Name, nil)
if err != nil {
var azErr *azcore.ResponseError
if errors.As(err, &azErr) && azErr.StatusCode == 404 {
t.Logf("cluster %s does not exist in rg %s", *cluster.Name, config.ResourceGroupName)
return nil
}
return fmt.Errorf("failed to get cluster %q: %w", *cluster.Name, err)
}

pollerResp, err := config.Azure.AKS.BeginDelete(ctx, config.ResourceGroupName, *cluster.Name, nil)
if err != nil {
return fmt.Errorf("failed to delete cluster %q: %w", *cluster.Name, err)
}
_, err = pollerResp.PollUntilDone(ctx, config.DefaultPollUntilDoneOptions)
if err != nil {
return fmt.Errorf("failed to wait for cluster deletion %w", err)
}
t.Logf("deleted cluster %s in rg %s", *cluster.Name, config.ResourceGroupName)
return nil
}

func isExistingResourceGroup(ctx context.Context, resourceGroupName string) (bool, error) {
rgExistence, err := config.Azure.ResourceGroup.CheckExistence(ctx, resourceGroupName, nil)
if err != nil {
return false, fmt.Errorf("failed to get RG %q: %w", resourceGroupName, err)
}

return rgExistence.Success, nil
}

func createNewAKSCluster(ctx context.Context, t *testing.T, cluster *armcontainerservice.ManagedCluster) (*armcontainerservice.ManagedCluster, error) {
t.Logf("creating or updating cluster %s in rg %s\n", *cluster.Name, *cluster.Location)
t.Logf("creating or updating cluster %s in rg %s", *cluster.Name, *cluster.Location)
// Note, it seems like the operation still can start a trigger a new operation even if nothing has changes
pollerResp, err := config.Azure.AKS.BeginCreateOrUpdate(
ctx,
Expand Down Expand Up @@ -207,7 +240,7 @@ func createNewAKSClusterWithRetry(ctx context.Context, t *testing.T, cluster *ar
retryInterval := 30 * time.Second
var lastErr error
for attempt := 0; attempt < maxRetries; attempt++ {
t.Logf("Attempt %d: creating or updating cluster %s in region %s and rg %s\n", attempt+1, *cluster.Name, *cluster.Location, resourceGroup)
t.Logf("Attempt %d: creating or updating cluster %s in region %s and rg %s", attempt+1, *cluster.Name, *cluster.Location, config.ResourceGroupName)

createdCluster, err := createNewAKSCluster(ctx, t, cluster)
if err == nil {
Expand All @@ -218,7 +251,7 @@ func createNewAKSClusterWithRetry(ctx context.Context, t *testing.T, cluster *ar
var respErr *azcore.ResponseError
if errors.As(err, &respErr) && respErr.StatusCode == 409 {
lastErr = err
t.Logf("Attempt %d failed with 409 Conflict: %v. Retrying in %v...\n", attempt+1, err, retryInterval)
t.Logf("Attempt %d failed with 409 Conflict: %v. Retrying in %v...", attempt+1, err, retryInterval)

select {
case <-time.After(retryInterval):
Expand Down Expand Up @@ -248,7 +281,7 @@ func getOrCreateMaintenanceConfiguration(ctx context.Context, t *testing.T, clus
}

func createNewMaintenanceConfiguration(ctx context.Context, t *testing.T, cluster *armcontainerservice.ManagedCluster) (*armcontainerservice.MaintenanceConfiguration, error) {
t.Logf("creating maintenance configuration for cluster %s in rg %s\n", *cluster.Name, config.ResourceGroupName)
t.Logf("creating maintenance configuration for cluster %s in rg %s", *cluster.Name, config.ResourceGroupName)
maintenance := armcontainerservice.MaintenanceConfiguration{
Properties: &armcontainerservice.MaintenanceConfigurationProperties{
MaintenanceWindow: &armcontainerservice.MaintenanceWindow{
Expand Down
46 changes: 26 additions & 20 deletions e2e/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,32 @@ var (
)

type Configuration struct {
AirgapNSGName string `env:"AIRGAP_NSG_NAME" envDefault:"abe2e-airgap-securityGroup" json:"airgapNSGName"`
DefaultSubnetName string `env:"DEFAULT_SUBNET_NAME" envDefault:"aks-subnet" json:"defaultSubnetName"`
BuildID string `env:"BUILD_ID" envDefault:"local" json:"buildID"`
Location string `env:"LOCATION" envDefault:"westus3" json:"location"`
SubscriptionID string `env:"SUBSCRIPTION_ID" envDefault:"8ecadfc9-d1a3-4ea4-b844-0d9f87e4d7c8" json:"subscriptionID"`
GallerySubscriptionID string `env:"GALLERY_SUBSCRIPTION_ID" envDefault:"c4c3550e-a965-4993-a50c-628fd38cd3e1" json:"gallerySubscriptionID"`
GalleryResourceGroupName string `env:"GALLERY_RESOURCE_GROUP_NAME" envDefault:"aksvhdtestbuildrg" json:"galleryResourceGroupName"`
GalleryName string `env:"GALLERY_NAME" envDefault:"PackerSigGalleryEastUS" json:"galleryName"`
SIGVersionTagName string `env:"SIG_VERSION_TAG_NAME" envDefault:"branch" json:"sigVersionTagName"`
SIGVersionTagValue string `env:"SIG_VERSION_TAG_VALUE" envDefault:"refs/heads/master" json:"sigVersionTagValue"`
TagsToRun string `env:"TAGS_TO_RUN" json:"tagsToRun"`
TagsToSkip string `env:"TAGS_TO_SKIP" json:"tagsToSkip"`
TestTimeout time.Duration `env:"TEST_TIMEOUT" envDefault:"35m" json:"testTimeout"`
E2ELoggingDir string `env:"LOGGING_DIR" envDefault:"scenario-logs" json:"e2eLoggingDir"`
IgnoreScenariosWithMissingVHD bool `env:"IGNORE_SCENARIOS_WITH_MISSING_VHD" json:"ignoreScenariosWithMissingVHD"`
SkipTestsWithSKUCapacityIssue bool `env:"SKIP_TESTS_WITH_SKU_CAPACITY_ISSUE" json:"skipTestsWithSKUCapacityIssue"`
KeepVMSS bool `env:"KEEP_VMSS" json:"keepVMSS"`
BlobStorageAccountPrefix string `env:"BLOB_STORAGE_ACCOUNT_PREFIX" envDefault:"abe2e" json:"blobStorageAccountPrefix"`
BlobContainer string `env:"BLOB_CONTAINER" envDefault:"abe2e" json:"blobContainer"`
EnableNodeBootstrapperTest bool `env:"ENABLE_NODE_BOOTSTRAPPER_TEST" json:"enableNodeBootstrapperTest"`
AirgapNSGName string `env:"AIRGAP_NSG_NAME" envDefault:"abe2e-airgap-securityGroup"`
AzureContainerRegistrytargetRepository string `env:"ACR_TARGET_REPOSITORY" envDefault:"*"`
BlobContainer string `env:"BLOB_CONTAINER" envDefault:"abe2e"`
BlobStorageAccountPrefix string `env:"BLOB_STORAGE_ACCOUNT_PREFIX" envDefault:"abe2e"`
BuildID string `env:"BUILD_ID" envDefault:"local"`
DefaultSubnetName string `env:"DEFAULT_SUBNET_NAME" envDefault:"aks-subnet"`
E2ELoggingDir string `env:"LOGGING_DIR" envDefault:"scenario-logs"`
EnableAKSNodeControllerTest bool `env:"ENABLE_AKS_NODE_CONTROLLER_TEST"`
GalleryNameLinux string `env:"GALLERY_NAME" envDefault:"PackerSigGalleryEastUS"`
GalleryNameWindows string `env:"GALLERY_NAME_WINDOWS" envDefault:"AKSWindows"`
GalleryResourceGroupNameLinux string `env:"GALLERY_RESOURCE_GROUP" envDefault:"aksvhdtestbuildrg"`
GalleryResourceGroupNameWindows string `env:"GALLERY_RESOURCE_GROUP_WINDOWS" envDefault:"AKS-Windows"`
GallerySubscriptionIDLinux string `env:"GALLERY_SUBSCRIPTION_ID" envDefault:"c4c3550e-a965-4993-a50c-628fd38cd3e1"`
GallerySubscriptionIDWindows string `env:"GALLERY_SUBSCRIPTION_ID_WINDOWS" envDefault:"4be8920b-2978-43d7-ab14-04d8549c1d05"`
IgnoreScenariosWithMissingVHD bool `env:"IGNORE_SCENARIOS_WITH_MISSING_VHD"`
KeepVMSS bool `env:"KEEP_VMSS"`
Location string `env:"LOCATION" envDefault:"westus3"`
SIGVersionTagName string `env:"SIG_VERSION_TAG_NAME" envDefault:"branch"`
SIGVersionTagValue string `env:"SIG_VERSION_TAG_VALUE" envDefault:"refs/heads/dev"`
SkipTestsWithSKUCapacityIssue bool `env:"SKIP_TESTS_WITH_SKU_CAPACITY_ISSUE"`
SubscriptionID string `env:"SUBSCRIPTION_ID" envDefault:"8ecadfc9-d1a3-4ea4-b844-0d9f87e4d7c8"`
TagsToRun string `env:"TAGS_TO_RUN"`
TagsToSkip string `env:"TAGS_TO_SKIP"`
TestTimeout time.Duration `env:"TEST_TIMEOUT" envDefault:"35m"`
TestTimeoutVMSS time.Duration `env:"TEST_TIMEOUT_VMSS" envDefault:"17m"`
WindowsAdminPassword string `env:"WINDOWS_ADMIN_PASSWORD"`
}

func (c *Configuration) BlobStorageAccount() string {
Expand Down
Loading
Loading