Skip to content

Commit

Permalink
[CLD-7878]Remove unnecessary go routines + improve deletion process f…
Browse files Browse the repository at this point in the history
…or utilities (#1057)

* [CLD-7878]Fix error handling in go routine

* [CLD-7878]Fix error handling

* [CLD-7878]Adding git pull to ensure latest main

* [CLD-7878]Improve deletion process

* [CLD-7878]Remove unnecessary go routine

* [CLD-7878]Improve deletion process

* Update internal/tools/argocd/application.go

Co-authored-by: Gabe Jackson <[email protected]>

---------

Co-authored-by: Gabe Jackson <[email protected]>
  • Loading branch information
andrleite and gabrieljackson authored Jun 19, 2024
1 parent 2fa9199 commit 7bcf96a
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 66 deletions.
6 changes: 6 additions & 0 deletions internal/provisioner/cluster_register.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ func (cr *ClusterRegister) getClusterCreds(s3StateStore string) (*k8s.Kubeconfig
func (cr *ClusterRegister) deregisterClusterFromArgocd() error {
logger := cr.logger.WithField("cluster", cr.cluster.ID)

// Git pull to get the latest state before deleting the cluster
err := cr.gitClient.Pull(logger)
if err != nil {
return errors.Wrap(err, "failed to pull from argocd repo")
}

clusteFile, err := os.ReadFile(cr.clusterFilePath)
if err != nil {
return errors.Wrap(err, "failed to read cluster file")
Expand Down
11 changes: 6 additions & 5 deletions internal/provisioner/kops_provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"os"
"strings"
"time"

"github.com/mattermost/mattermost-cloud/internal/provisioner/utility"
"github.com/mattermost/mattermost-cloud/internal/supervisor"
Expand Down Expand Up @@ -887,15 +888,15 @@ func (provisioner *KopsProvisioner) cleanupCluster(cluster *model.Cluster, tempD

// Remove utility and cluster from argocd
if cluster.UtilityMetadata.ManagedByArgocd {
// Git pull to get the latest state before deleting the cluster
err = gitClient.Pull(logger)
err = ugh.RemoveUtilityFromArgocd(gitClient)
if err != nil {
return errors.Wrap(err, "failed to pull from argocd repo")
return errors.Wrap(err, "failed to remove utility from argocd")
}

err = ugh.RemoveUtilityFromArgocd()
appName := fmt.Sprintf("%s-%s", "gitops-sre", provisioner.awsClient.GetCloudEnvironmentName())
err = argocdClient.WaitForAppHealthy(appName, 5*time.Minute)
if err != nil {
return errors.Wrap(err, "failed to remove utility from argocd")
return errors.Wrap(err, "failed to wait for app to be healthy")
}

var cr *ClusterRegister
Expand Down
31 changes: 23 additions & 8 deletions internal/provisioner/utility/argo_utility.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"os"
"path/filepath"
"strings"
"sync"
"time"

"github.com/mattermost/mattermost-cloud/internal/tools/argocd"
Expand All @@ -29,8 +28,6 @@ func ProvisionUtilityArgocd(utilityName, tempDir, clusterID string, allowCIDRRan
return errors.Wrap(err, "failed to pull from repo")
}

//TODO: Skip provision utility if it is already provisioned

appsFile, err := os.ReadFile(tempDir + "/apps/" + awsClient.GetCloudEnvironmentName() + ArgocdAppsFile)
if err != nil {
return errors.Wrap(err, "failed to read cluster file")
Expand Down Expand Up @@ -123,24 +120,25 @@ func ProvisionUtilityArgocd(utilityName, tempDir, clusterID string, allowCIDRRan

appName := utilityName + "-sre-" + awsClient.GetCloudEnvironmentName() + "-" + clusterID
gitopsAppName := "gitops-sre-" + awsClient.GetCloudEnvironmentName()

app, err := argocdClient.SyncApplication(gitopsAppName)
if err != nil {
return errors.Wrap(err, "failed to sync application")
}

var wg sync.WaitGroup
timeout := time.Second * 600

wg.Add(1)
go argocdClient.WaitForAppHealthy(appName, &wg, timeout)
wg.Wait()
err = argocdClient.WaitForAppHealthy(appName, timeout)
if err != nil {
return errors.Wrap(err, "failed to wait for application to be healthy")
}

logger.WithField("app:", app.Name).Info("Deployed utility successfully.")

return nil
}

func (group utilityGroup) RemoveUtilityFromArgocd() error {
func (group utilityGroup) RemoveUtilityFromArgocd(gitClient git.Client) error {

appsFile, err := os.ReadFile(group.tempDir + "/apps/" + group.awsClient.GetCloudEnvironmentName() + ArgocdAppsFile)
if err != nil {
Expand Down Expand Up @@ -168,6 +166,23 @@ func (group utilityGroup) RemoveUtilityFromArgocd() error {
return errors.Wrap(err, "failed to remove helm values directory")
}

applicationFile := filepath.Join(group.tempDir, "apps", group.awsClient.GetCloudEnvironmentName(), ArgocdAppsFile)

// Git pull to get the latest state before deleting the cluster
err = gitClient.Pull(group.logger)
if err != nil {
return errors.Wrap(err, "failed to pull from argocd repo")
}

commitMsg := "Removing Utilities: " + group.cluster.ID
if err = gitClient.Commit(applicationFile, commitMsg, group.logger); err != nil {
return errors.Wrap(err, "failed to commit to repo")
}

if err = gitClient.Push(group.logger); err != nil {
return errors.Wrap(err, "failed to push to repo")
}

return nil

}
Expand Down
16 changes: 8 additions & 8 deletions internal/provisioner/utility/unmanaged.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ func (u *unmanaged) CreateOrUpgrade() error {
return err
}
if err = u.utiliyArgocdDeploy(u.Name()); err != nil {
return errors.Wrapf(err, "failed to provision %s utility", u.Name())
return err
}

case model.NginxCanonicalName, model.NginxInternalCanonicalName:
if err = u.utiliyArgocdDeploy(u.Name()); err != nil {
return errors.Wrapf(err, "failed to provision %s utility", u.Name())
return err
}

endpoint, elbType, elbErr := getElasticLoadBalancerInfo(u.Name(), u.logger, u.kubeconfigPath)
Expand Down Expand Up @@ -132,7 +132,7 @@ func (u *unmanaged) CreateOrUpgrade() error {
}

if err = u.utiliyArgocdDeploy(u.Name()); err != nil {
return errors.Wrapf(err, "failed to provision %s utility", u.Name())
return err
}

app := "prometheus"
Expand Down Expand Up @@ -185,7 +185,7 @@ func (u *unmanaged) CreateOrUpgrade() error {
}

if err := u.utiliyArgocdDeploy(u.Name()); err != nil {
return errors.Wrapf(err, "failed to provision %s utility", u.Name())
return err
}

if u.awsClient.IsProvisionedPrivateCNAME(grpcDNS, logger) {
Expand All @@ -208,19 +208,19 @@ func (u *unmanaged) CreateOrUpgrade() error {
}
case model.TeleportCanonicalName:
if err := u.utiliyArgocdDeploy(u.Name()); err != nil {
return errors.Wrapf(err, "failed to provision %s utility", u.Name())
return err
}
case model.PromtailCanonicalName:
if err := u.utiliyArgocdDeploy(u.Name()); err != nil {
return errors.Wrapf(err, "failed to provision %s utility", u.Name())
return err
}
case model.VeleroCanonicalName:
if err := u.utiliyArgocdDeploy(u.Name()); err != nil {
return errors.Wrapf(err, "failed to provision %s utility", u.Name())
return err
}
case model.RtcdCanonicalName:
if err := u.utiliyArgocdDeploy(u.Name()); err != nil {
return errors.Wrapf(err, "failed to provision %s utility", u.Name())
return err
}
default:
u.logger.WithFields(log.Fields{
Expand Down
50 changes: 9 additions & 41 deletions internal/tools/argocd/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package argocd

import (
"context"
"sync"
"time"

"github.com/pkg/errors"
Expand All @@ -20,11 +19,12 @@ func (c *ApiClient) SyncApplication(gitopsAppName string) (*argoappv1.Applicatio
return nil, errors.Wrap(err, "failed to sync application.")
}

var wg sync.WaitGroup
timeout := time.Second * 600
wg.Add(1)
go c.waitForSyncCompletion(gitopsAppName, &wg, timeout)
wg.Wait()

err = c.WaitForAppHealthy(gitopsAppName, timeout)
if err != nil {
return nil, errors.Wrap(err, "failed to wait for application to be healthy")
}

c.logger.Debugf("Successfully synced application %s", gitopsAppName)

Expand All @@ -35,8 +35,7 @@ func (c *ApiClient) SyncApplication(gitopsAppName string) (*argoappv1.Applicatio
return app, nil
}

func (c *ApiClient) WaitForAppHealthy(appName string, wg *sync.WaitGroup, timeout time.Duration) error { //TODO return error
defer wg.Done()
func (c *ApiClient) WaitForAppHealthy(appName string, timeout time.Duration) error {

c.logger.Infof("Waiting for application %s to be healthy ...", appName)

Expand All @@ -49,7 +48,7 @@ func (c *ApiClient) WaitForAppHealthy(appName string, wg *sync.WaitGroup, timeou
Refresh: &refresh,
})
if err != nil {
return errors.Wrapf(err, "failed to get application %s", appName)
return err
}

if app.Status.Health.Status == health.HealthStatusHealthy && app.Status.Sync.Status == argoappv1.SyncStatusCodeSynced {
Expand All @@ -62,39 +61,8 @@ func (c *ApiClient) WaitForAppHealthy(appName string, wg *sync.WaitGroup, timeou
}

//Add a small delay to reduce CPU usage and avoid too_many_pings error.
time.Sleep(time.Second * 1)
//This time is needed for the application to be healthy in the ArgoCD.
time.Sleep(time.Second * 5)
}
return nil
}

func (c *ApiClient) waitForSyncCompletion(appName string, wg *sync.WaitGroup, timeout time.Duration) {
defer wg.Done()

startTime := time.Now()
refresh := "true"

c.logger.Infof("Waiting for application %s to be synced...\n", appName)
for {

syncStatus, err := c.appClient.Get(context.Background(), &application.ApplicationQuery{
Name: &appName,
Refresh: &refresh,
})
if err != nil {
c.logger.Errorf("failed to get application %s: %v", appName, err)
}

if syncStatus.Status.OperationState.Phase != "Running" {
break
}

// Check for timeout
if time.Since(startTime) >= timeout {
c.logger.Errorf("timed out waiting for application %s to be synced", appName)
return
}

//Add a small delay to reduce CPU usage and avoid too_many_pings error.
time.Sleep(time.Second * 1)
}
}
3 changes: 1 addition & 2 deletions internal/tools/argocd/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package argocd

import (
"errors"
"sync"
"time"

"github.com/argoproj/argo-cd/v2/pkg/apiclient"
Expand All @@ -13,7 +12,7 @@ import (

type Client interface {
SyncApplication(gitopsAppName string) (*argoappv1.Application, error)
WaitForAppHealthy(appName string, wg *sync.WaitGroup, timeout time.Duration) error
WaitForAppHealthy(appName string, timeout time.Duration) error
}

type Connection struct {
Expand Down
3 changes: 1 addition & 2 deletions internal/tools/argocd/noop_client.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package argocd

import (
"sync"
"time"

argoappv1 "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1"
Expand All @@ -13,6 +12,6 @@ func (n *NoOpClient) SyncApplication(gitopsAppName string) (*argoappv1.Applicati
return nil, nil
}

func (n *NoOpClient) WaitForAppHealthy(appName string, wg *sync.WaitGroup, timeout time.Duration) error {
func (n *NoOpClient) WaitForAppHealthy(appName string, timeout time.Duration) error {
return nil
}

0 comments on commit 7bcf96a

Please sign in to comment.