From 2491dda1f86b19628cd41a1863feec8f1a321c54 Mon Sep 17 00:00:00 2001 From: Alberto Gonzalez Date: Tue, 3 Sep 2024 23:09:19 +0200 Subject: [PATCH 01/13] Add condition for virt and add rolebinding for hcp --- internal/models/ocp_sandbox.go | 116 +++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 40 deletions(-) diff --git a/internal/models/ocp_sandbox.go b/internal/models/ocp_sandbox.go index 7d9333f7..58f84367 100644 --- a/internal/models/ocp_sandbox.go +++ b/internal/models/ocp_sandbox.go @@ -1026,6 +1026,40 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri return } + // Assign ClusterRole sandbox-hcp (created with gitops) to the SA if hcp option was selected + if value, exists := cloud_selector["hcp"]; exists && (value == "yes" || value == "true") { + _, err = clientset.RbacV1().RoleBindings(namespaceName).Create(context.TODO(), &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceAccountName + "-hcp", + Labels: map[string]string{ + "serviceUuid": serviceUuid, + "guid": annotations["guid"], + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "ClusterRole", + Name: serviceAccountName + "-hcp", + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: serviceAccountName, + Namespace: namespaceName, + }, + }, + }, metav1.CreateOptions{}) + + if err != nil { + log.Logger.Error("Error creating OCP RoleBind", "error", err) + if err := clientset.CoreV1().Namespaces().Delete(context.TODO(), namespaceName, metav1.DeleteOptions{}); err != nil { + log.Logger.Error("Error cleaning up the namespace", "error", err) + } + rnew.SetStatus("error") + return + } + } + // TODO: parameterize this, or detect when to execute it, otherwise it'll fail // // Create RoleBind for the Service Account in the Namespace for kubevirt // _, err = clientset.RbacV1().RoleBindings(namespaceName).Create(context.TODO(), &rbacv1.RoleBinding{ @@ -1059,46 +1093,48 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri // return // } - // Look if namespace 'cnv-images' exists - if _, err := clientset.CoreV1().Namespaces().Get(context.TODO(), "cnv-images", metav1.GetOptions{}); err == nil { - - rb := &rbacv1.RoleBinding{ - ObjectMeta: metav1.ObjectMeta{ - Name: "allow-clone-" + namespaceName[:min(51, len(namespaceName))], - Namespace: "cnv-images", - Labels: map[string]string{ - "serviceUuid": serviceUuid, - "guid": annotations["guid"], - }, - }, - Subjects: []rbacv1.Subject{ - { - Kind: "ServiceAccount", - Name: "default", - Namespace: namespaceName, - }, - }, - RoleRef: rbacv1.RoleRef{ - Kind: "ClusterRole", - Name: "datavolume-cloner", - APIGroup: "rbac.authorization.k8s.io", - }, - } - - _, err = clientset.RbacV1().RoleBindings("cnv-images").Create(context.TODO(), rb, metav1.CreateOptions{}) - if err != nil { - if !strings.Contains(err.Error(), "already exists") { - log.Logger.Error("Error creating rolebinding on cnv-images", "error", err) - - if err := clientset.CoreV1().Namespaces().Delete(context.TODO(), namespaceName, metav1.DeleteOptions{}); err != nil { - log.Logger.Error("Error cleaning up the namespace", "error", err) - } - rnew.SetStatus("error") - return - } - } - } - + // if cloud_selector has enabled the virt flag, then we give permission to cnv-images namespace + if value, exists := cloud_selector["virt"]; exists && (value == "yes" || value == "true") { + // Look if namespace 'cnv-images' exists + if _, err := clientset.CoreV1().Namespaces().Get(context.TODO(), "cnv-images", metav1.GetOptions{}); err == nil { + + rb := &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "allow-clone-" + namespaceName[:min(51, len(namespaceName))], + Namespace: "cnv-images", + Labels: map[string]string{ + "serviceUuid": serviceUuid, + "guid": annotations["guid"], + }, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: "default", + Namespace: namespaceName, + }, + }, + RoleRef: rbacv1.RoleRef{ + Kind: "ClusterRole", + Name: "datavolume-cloner", + APIGroup: "rbac.authorization.k8s.io", + }, + } + + _, err = clientset.RbacV1().RoleBindings("cnv-images").Create(context.TODO(), rb, metav1.CreateOptions{}) + if err != nil { + if !strings.Contains(err.Error(), "already exists") { + log.Logger.Error("Error creating rolebinding on cnv-images", "error", err) + + if err := clientset.CoreV1().Namespaces().Delete(context.TODO(), namespaceName, metav1.DeleteOptions{}); err != nil { + log.Logger.Error("Error cleaning up the namespace", "error", err) + } + rnew.SetStatus("error") + return + } + } + } + } secrets, err := clientset.CoreV1().Secrets(namespaceName).List(context.TODO(), metav1.ListOptions{}) if err != nil { From 6195ffb903b133c7625aa0ff8509ca4407014aef Mon Sep 17 00:00:00 2001 From: Alberto Gonzalez Date: Tue, 10 Sep 2024 09:46:07 +0200 Subject: [PATCH 02/13] WIP keycloak --- internal/models/ocp_sandbox.go | 107 +++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/internal/models/ocp_sandbox.go b/internal/models/ocp_sandbox.go index 58f84367..87d49d3f 100644 --- a/internal/models/ocp_sandbox.go +++ b/internal/models/ocp_sandbox.go @@ -2,6 +2,8 @@ package models import ( "context" + "crypto/rand" + "encoding/base64" "encoding/json" "errors" "fmt" @@ -20,6 +22,9 @@ import ( "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" metricsv "k8s.io/metrics/pkg/client/clientset/versioned" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" ) type OcpSandboxProvider struct { @@ -85,6 +90,16 @@ type TokenResponse struct { var nameRegex = regexp.MustCompile(`^[a-zA-Z0-9-]+$`) +// GenerateRandomPassword generates a random password of specified length. +func generateRandomPassword(length int) (string, error) { + bytes := make([]byte, length) + if _, err := rand.Read(bytes); err != nil { + return "", err + } + return base64.URLEncoding.EncodeToString(bytes), nil +} + + // Bind and Render func (p *OcpSharedClusterConfiguration) Bind(r *http.Request) error { // Ensure the name is not empty @@ -921,6 +936,14 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri return } + // Create an dynamic OpenShift client for non regular objects + dynclientset, err := dynamic.NewForConfig(config) + if err != nil { + log.Logger.Error("Error creating OCP client", "error", err) + rnew.SetStatus("error") + return + } + serviceAccountName := "sandbox" suffix := annotations["namespace_suffix"] if suffix == "" { @@ -1025,6 +1048,62 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri rnew.SetStatus("error") return } + // Create an user if the keycloak option was enabled + if value, exists := cloud_selector["keycloak"]; exists && (value == "yes" || value == "true") { + // Generate a random password for the Keycloak user + userAccountName := "sandbox-" + guid + password, err := generateRandomPassword(16) + if err != nil { + log.Logger.Error("Error generating password", "error", err) + } + + // Define the KeycloakUser GroupVersionResource + keycloakUserGVR := schema.GroupVersionResource{ + Group: "keycloak.org", + Version: "v1alpha1", + Resource: "keycloakusers", + } + + // Create the KeycloakUser object as an unstructured object + keycloakUser := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "keycloak.org/v1alpha1", + "kind": "KeycloakUser", + "metadata": map[string]interface{}{ + "name": userAccountName, + "namespace": "rhsso", // The namespace where Keycloak is installed + }, + "spec": map[string]interface{}{ + "user": map[string]interface{}{ + "username": userAccountName, + "enabled": true, + "credentials": []interface{}{ + map[string]interface{}{ + "type": "password", + "value": password, + "temporary": false, + }, + }, + }, + "realmSelector": map[string]interface{}{ + "matchLabels": map[string]interface{}{ + "app": "sso", // The label selector for the Keycloak realm + }, + }, + }, + }, + } + + // Create the KeycloakUser resource in the specified namespace + namespace := "rhsso" + _, err = dynclientset.Resource(keycloakUserGVR).Namespace(namespace).Create(context.TODO(), keycloakUser, metav1.CreateOptions{}) + if err != nil { + log.Logger.Error("Error creating KeycloakUser", "error", err) + } + + fmt.Println("KeycloakUser created successfully") + + } // Assign ClusterRole sandbox-hcp (created with gitops) to the SA if hcp option was selected if value, exists := cloud_selector["hcp"]; exists && (value == "yes" || value == "true") { @@ -1442,6 +1521,16 @@ func (account *OcpSandboxWithCreds) Delete() error { account.SetStatus("error") return err } + + // Create an dynamic OpenShift client for non regular objects + dynclientset, err := dynamic.NewForConfig(config) + if err != nil { + log.Logger.Error("Error creating OCP client", "error", err, "name", account.Name) + account.SetStatus("error") + return err + } + + // Define the Service Account name serviceAccountName := "sandbox" @@ -1496,6 +1585,24 @@ func (account *OcpSandboxWithCreds) Delete() error { } } + // Delete the User + userAccountName := "sandbox-" + account.Annotations["guid"] + // Define the KeycloakUser GroupVersionResource + keycloakUserGVR := schema.GroupVersionResource{ + Group: "keycloak.org", + Version: "v1alpha1", + Resource: "keycloakusers", + } + + namespace := "rhsso" + err = dynclientset.Resource(keycloakUserGVR).Namespace(namespace).Delete(context.TODO(), userAccountName, metav1.DeleteOptions{}) + if err != nil { + log.Logger.Error("Error deleting OCP namespace", "error", err, "name", account.Name) + account.SetStatus("error") + return err + } + + _, err = account.Provider.DbPool.Exec( context.Background(), "DELETE FROM resources WHERE id = $1", From 2ef32deea30f9c23b4236fc8f642919908731be8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Cor=C3=A9?= Date: Wed, 11 Sep 2024 10:55:56 +0200 Subject: [PATCH 03/13] merge main + indentation --- internal/models/ocp_sandbox.go | 320 ++++++++++++++++----------------- 1 file changed, 159 insertions(+), 161 deletions(-) diff --git a/internal/models/ocp_sandbox.go b/internal/models/ocp_sandbox.go index 0caa8ad7..0dd53e26 100644 --- a/internal/models/ocp_sandbox.go +++ b/internal/models/ocp_sandbox.go @@ -2,8 +2,8 @@ package models import ( "context" - "crypto/rand" - "encoding/base64" + "crypto/rand" + "encoding/base64" "encoding/json" "errors" "fmt" @@ -19,13 +19,13 @@ import ( rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" metricsv "k8s.io/metrics/pkg/client/clientset/versioned" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/client-go/dynamic" ) type OcpSandboxProvider struct { @@ -1029,12 +1029,12 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri } // Create an dynamic OpenShift client for non regular objects - dynclientset, err := dynamic.NewForConfig(config) - if err != nil { - log.Logger.Error("Error creating OCP client", "error", err) - rnew.SetStatus("error") - return - } + dynclientset, err := dynamic.NewForConfig(config) + if err != nil { + log.Logger.Error("Error creating OCP client", "error", err) + rnew.SetStatus("error") + return + } serviceAccountName := "sandbox" suffix := annotations["namespace_suffix"] @@ -1195,96 +1195,96 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri rnew.SetStatus("error") return } - // Create an user if the keycloak option was enabled - if value, exists := cloud_selector["keycloak"]; exists && (value == "yes" || value == "true") { - // Generate a random password for the Keycloak user - userAccountName := "sandbox-" + guid - password, err := generateRandomPassword(16) - if err != nil { + // Create an user if the keycloak option was enabled + if value, exists := cloud_selector["keycloak"]; exists && (value == "yes" || value == "true") { + // Generate a random password for the Keycloak user + userAccountName := "sandbox-" + guid + password, err := generateRandomPassword(16) + if err != nil { log.Logger.Error("Error generating password", "error", err) - } - - // Define the KeycloakUser GroupVersionResource - keycloakUserGVR := schema.GroupVersionResource{ - Group: "keycloak.org", - Version: "v1alpha1", - Resource: "keycloakusers", - } - - // Create the KeycloakUser object as an unstructured object - keycloakUser := &unstructured.Unstructured{ - Object: map[string]interface{}{ - "apiVersion": "keycloak.org/v1alpha1", - "kind": "KeycloakUser", - "metadata": map[string]interface{}{ - "name": userAccountName, - "namespace": "rhsso", // The namespace where Keycloak is installed - }, - "spec": map[string]interface{}{ - "user": map[string]interface{}{ - "username": userAccountName, - "enabled": true, - "credentials": []interface{}{ - map[string]interface{}{ - "type": "password", - "value": password, - "temporary": false, - }, - }, - }, - "realmSelector": map[string]interface{}{ - "matchLabels": map[string]interface{}{ - "app": "sso", // The label selector for the Keycloak realm - }, - }, - }, - }, - } - - // Create the KeycloakUser resource in the specified namespace - namespace := "rhsso" - _, err = dynclientset.Resource(keycloakUserGVR).Namespace(namespace).Create(context.TODO(), keycloakUser, metav1.CreateOptions{}) - if err != nil { - log.Logger.Error("Error creating KeycloakUser", "error", err) - } - - fmt.Println("KeycloakUser created successfully") - - } - - // Assign ClusterRole sandbox-hcp (created with gitops) to the SA if hcp option was selected - if value, exists := cloud_selector["hcp"]; exists && (value == "yes" || value == "true") { - _, err = clientset.RbacV1().RoleBindings(namespaceName).Create(context.TODO(), &rbacv1.RoleBinding{ - ObjectMeta: metav1.ObjectMeta{ - Name: serviceAccountName + "-hcp", - Labels: map[string]string{ - "serviceUuid": serviceUuid, - "guid": annotations["guid"], - }, - }, - RoleRef: rbacv1.RoleRef{ - APIGroup: rbacv1.GroupName, - Kind: "ClusterRole", - Name: serviceAccountName + "-hcp", - }, - Subjects: []rbacv1.Subject{ - { - Kind: "ServiceAccount", - Name: serviceAccountName, - Namespace: namespaceName, - }, - }, - }, metav1.CreateOptions{}) - - if err != nil { - log.Logger.Error("Error creating OCP RoleBind", "error", err) - if err := clientset.CoreV1().Namespaces().Delete(context.TODO(), namespaceName, metav1.DeleteOptions{}); err != nil { - log.Logger.Error("Error cleaning up the namespace", "error", err) - } - rnew.SetStatus("error") - return - } - } + } + + // Define the KeycloakUser GroupVersionResource + keycloakUserGVR := schema.GroupVersionResource{ + Group: "keycloak.org", + Version: "v1alpha1", + Resource: "keycloakusers", + } + + // Create the KeycloakUser object as an unstructured object + keycloakUser := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "keycloak.org/v1alpha1", + "kind": "KeycloakUser", + "metadata": map[string]interface{}{ + "name": userAccountName, + "namespace": "rhsso", // The namespace where Keycloak is installed + }, + "spec": map[string]interface{}{ + "user": map[string]interface{}{ + "username": userAccountName, + "enabled": true, + "credentials": []interface{}{ + map[string]interface{}{ + "type": "password", + "value": password, + "temporary": false, + }, + }, + }, + "realmSelector": map[string]interface{}{ + "matchLabels": map[string]interface{}{ + "app": "sso", // The label selector for the Keycloak realm + }, + }, + }, + }, + } + + // Create the KeycloakUser resource in the specified namespace + namespace := "rhsso" + _, err = dynclientset.Resource(keycloakUserGVR).Namespace(namespace).Create(context.TODO(), keycloakUser, metav1.CreateOptions{}) + if err != nil { + log.Logger.Error("Error creating KeycloakUser", "error", err) + } + + fmt.Println("KeycloakUser created successfully") + + } + + // Assign ClusterRole sandbox-hcp (created with gitops) to the SA if hcp option was selected + if value, exists := cloud_selector["hcp"]; exists && (value == "yes" || value == "true") { + _, err = clientset.RbacV1().RoleBindings(namespaceName).Create(context.TODO(), &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceAccountName + "-hcp", + Labels: map[string]string{ + "serviceUuid": serviceUuid, + "guid": annotations["guid"], + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "ClusterRole", + Name: serviceAccountName + "-hcp", + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: serviceAccountName, + Namespace: namespaceName, + }, + }, + }, metav1.CreateOptions{}) + + if err != nil { + log.Logger.Error("Error creating OCP RoleBind", "error", err) + if err := clientset.CoreV1().Namespaces().Delete(context.TODO(), namespaceName, metav1.DeleteOptions{}); err != nil { + log.Logger.Error("Error cleaning up the namespace", "error", err) + } + rnew.SetStatus("error") + return + } + } // TODO: parameterize this, or detect when to execute it, otherwise it'll fail // // Create RoleBind for the Service Account in the Namespace for kubevirt @@ -1319,48 +1319,48 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri // return // } - // if cloud_selector has enabled the virt flag, then we give permission to cnv-images namespace - if value, exists := cloud_selector["virt"]; exists && (value == "yes" || value == "true") { - // Look if namespace 'cnv-images' exists - if _, err := clientset.CoreV1().Namespaces().Get(context.TODO(), "cnv-images", metav1.GetOptions{}); err == nil { - - rb := &rbacv1.RoleBinding{ - ObjectMeta: metav1.ObjectMeta{ - Name: "allow-clone-" + namespaceName[:min(51, len(namespaceName))], - Namespace: "cnv-images", - Labels: map[string]string{ - "serviceUuid": serviceUuid, - "guid": annotations["guid"], - }, - }, - Subjects: []rbacv1.Subject{ - { - Kind: "ServiceAccount", - Name: "default", - Namespace: namespaceName, - }, - }, - RoleRef: rbacv1.RoleRef{ - Kind: "ClusterRole", - Name: "datavolume-cloner", - APIGroup: "rbac.authorization.k8s.io", - }, - } - - _, err = clientset.RbacV1().RoleBindings("cnv-images").Create(context.TODO(), rb, metav1.CreateOptions{}) - if err != nil { - if !strings.Contains(err.Error(), "already exists") { - log.Logger.Error("Error creating rolebinding on cnv-images", "error", err) - - if err := clientset.CoreV1().Namespaces().Delete(context.TODO(), namespaceName, metav1.DeleteOptions{}); err != nil { - log.Logger.Error("Error cleaning up the namespace", "error", err) - } - rnew.SetStatus("error") - return - } - } - } - } + // if cloud_selector has enabled the virt flag, then we give permission to cnv-images namespace + if value, exists := cloud_selector["virt"]; exists && (value == "yes" || value == "true") { + // Look if namespace 'cnv-images' exists + if _, err := clientset.CoreV1().Namespaces().Get(context.TODO(), "cnv-images", metav1.GetOptions{}); err == nil { + + rb := &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "allow-clone-" + namespaceName[:min(51, len(namespaceName))], + Namespace: "cnv-images", + Labels: map[string]string{ + "serviceUuid": serviceUuid, + "guid": annotations["guid"], + }, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: "default", + Namespace: namespaceName, + }, + }, + RoleRef: rbacv1.RoleRef{ + Kind: "ClusterRole", + Name: "datavolume-cloner", + APIGroup: "rbac.authorization.k8s.io", + }, + } + + _, err = clientset.RbacV1().RoleBindings("cnv-images").Create(context.TODO(), rb, metav1.CreateOptions{}) + if err != nil { + if !strings.Contains(err.Error(), "already exists") { + log.Logger.Error("Error creating rolebinding on cnv-images", "error", err) + + if err := clientset.CoreV1().Namespaces().Delete(context.TODO(), namespaceName, metav1.DeleteOptions{}); err != nil { + log.Logger.Error("Error cleaning up the namespace", "error", err) + } + rnew.SetStatus("error") + return + } + } + } + } secrets, err := clientset.CoreV1().Secrets(namespaceName).List(context.TODO(), metav1.ListOptions{}) if err != nil { @@ -1659,14 +1659,13 @@ func (account *OcpSandboxWithCreds) Delete() error { return err } - // Create an dynamic OpenShift client for non regular objects - dynclientset, err := dynamic.NewForConfig(config) - if err != nil { - log.Logger.Error("Error creating OCP client", "error", err, "name", account.Name) - account.SetStatus("error") - return err - } - + // Create an dynamic OpenShift client for non regular objects + dynclientset, err := dynamic.NewForConfig(config) + if err != nil { + log.Logger.Error("Error creating OCP client", "error", err, "name", account.Name) + account.SetStatus("error") + return err + } // Define the Service Account name serviceAccountName := "sandbox" @@ -1713,24 +1712,23 @@ func (account *OcpSandboxWithCreds) Delete() error { } } - // Delete the User + // Delete the User userAccountName := "sandbox-" + account.Annotations["guid"] - // Define the KeycloakUser GroupVersionResource - keycloakUserGVR := schema.GroupVersionResource{ - Group: "keycloak.org", - Version: "v1alpha1", - Resource: "keycloakusers", - } - - namespace := "rhsso" - err = dynclientset.Resource(keycloakUserGVR).Namespace(namespace).Delete(context.TODO(), userAccountName, metav1.DeleteOptions{}) + // Define the KeycloakUser GroupVersionResource + keycloakUserGVR := schema.GroupVersionResource{ + Group: "keycloak.org", + Version: "v1alpha1", + Resource: "keycloakusers", + } + + namespace := "rhsso" + err = dynclientset.Resource(keycloakUserGVR).Namespace(namespace).Delete(context.TODO(), userAccountName, metav1.DeleteOptions{}) if err != nil { log.Logger.Error("Error deleting OCP namespace", "error", err, "name", account.Name) account.SetStatus("error") return err } - _, err = account.Provider.DbPool.Exec( context.Background(), "DELETE FROM resources WHERE id = $1", From cc59ef022a44b46b5cdc2f06097c85da3460867e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Cor=C3=A9?= Date: Wed, 11 Sep 2024 11:03:38 +0200 Subject: [PATCH 04/13] Make tests pass, ignore when keycloak not found --- internal/models/ocp_sandbox.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/internal/models/ocp_sandbox.go b/internal/models/ocp_sandbox.go index 0dd53e26..1bd44132 100644 --- a/internal/models/ocp_sandbox.go +++ b/internal/models/ocp_sandbox.go @@ -1667,9 +1667,6 @@ func (account *OcpSandboxWithCreds) Delete() error { return err } - // Define the Service Account name - serviceAccountName := "sandbox" - // Check if the namespace exists _, err = clientset.CoreV1().Namespaces().Get(context.TODO(), account.Namespace, metav1.GetOptions{}) if err != nil { @@ -1724,9 +1721,13 @@ func (account *OcpSandboxWithCreds) Delete() error { namespace := "rhsso" err = dynclientset.Resource(keycloakUserGVR).Namespace(namespace).Delete(context.TODO(), userAccountName, metav1.DeleteOptions{}) if err != nil { - log.Logger.Error("Error deleting OCP namespace", "error", err, "name", account.Name) - account.SetStatus("error") - return err + if strings.Contains(err.Error(), "not found") { + log.Logger.Info("Keycloak not found, move on", "name", account.Name) + } else { + log.Logger.Error("Error deleting OCP namespace", "error", err, "name", account.Name) + account.SetStatus("error") + return err + } } _, err = account.Provider.DbPool.Exec( From f3398bb16d6a49f1dafcf543a1b2a03f7fed0aab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Cor=C3=A9?= Date: Wed, 11 Sep 2024 14:25:43 +0200 Subject: [PATCH 05/13] Save credentials + add functional test --- internal/api/v1/v1.go | 16 +++++- internal/models/ocp_sandbox.go | 20 ++++++-- tests/004_keycloak.hurl | 90 ++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 5 deletions(-) create mode 100644 tests/004_keycloak.hurl diff --git a/internal/api/v1/v1.go b/internal/api/v1/v1.go index e8c8393d..7982d22f 100644 --- a/internal/api/v1/v1.go +++ b/internal/api/v1/v1.go @@ -129,7 +129,7 @@ func (p *PlacementRequest) Bind(r *http.Request) error { if len(p.Resources) == 0 { return errors.New("no resources specified") } - for i, _ := range p.Resources { + for i, resourceRequest := range p.Resources { if p.Resources[i].Annotations == nil { p.Resources[i].Annotations = make(models.Annotations) } @@ -139,6 +139,20 @@ func (p *PlacementRequest) Bind(r *http.Request) error { if p.Resources[i].Quota == nil { p.Resources[i].Quota = &v1.ResourceList{} } + if resourceRequest.CloudSelector != nil { + for k, v := range resourceRequest.CloudSelector { + // We work with string and not bool + // This is a convention to automatically convert "yes" and "no" + // instead of "true" and "false" + // That will help match clusters that have 'yes' when the client sends the cloud.selector to 'true' + if v == "true" { + resourceRequest.CloudSelector[k] = "yes" + } + if v == "false" { + resourceRequest.CloudSelector[k] = "no" + } + } + } } return nil diff --git a/internal/models/ocp_sandbox.go b/internal/models/ocp_sandbox.go index 1bd44132..0c387d54 100644 --- a/internal/models/ocp_sandbox.go +++ b/internal/models/ocp_sandbox.go @@ -112,6 +112,13 @@ type OcpServiceAccount struct { Token string `json:"token"` } +// Credential for keycloak account +type KeycloakCredential struct { + Kind string `json:"kind"` // "KeycloakCredential" + Username string `json:"username"` + Password string `json:"password"` +} + type OcpSandboxes []OcpSandbox type TokenResponse struct { @@ -1195,6 +1202,7 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri rnew.SetStatus("error") return } + creds := []any{} // Create an user if the keycloak option was enabled if value, exists := cloud_selector["keycloak"]; exists && (value == "yes" || value == "true") { // Generate a random password for the Keycloak user @@ -1248,8 +1256,13 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri log.Logger.Error("Error creating KeycloakUser", "error", err) } - fmt.Println("KeycloakUser created successfully") + log.Logger.Debug("KeycloakUser created successfully") + creds = append(creds, KeycloakCredential{ + Kind: "KeycloakUser", + Username: userAccountName, + Password: password, + }) } // Assign ClusterRole sandbox-hcp (created with gitops) to the SA if hcp option was selected @@ -1390,13 +1403,12 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri break } } - creds := []any{ + creds = append(creds, OcpServiceAccount{ Kind: "ServiceAccount", Name: serviceAccountName, Token: string(saSecret.Data["token"]), - }, - } + }) rnew.Credentials = creds rnew.Status = "success" diff --git a/tests/004_keycloak.hurl b/tests/004_keycloak.hurl new file mode 100644 index 00000000..236e47d5 --- /dev/null +++ b/tests/004_keycloak.hurl @@ -0,0 +1,90 @@ +################################################################################# +# Get an access token using the login token +################################################################################# + +GET {{host}}/api/v1/login +Authorization: Bearer {{login_token}} +HTTP 200 +[Captures] +access_token: jsonpath "$.access_token" +[Asserts] +jsonpath "$.access_token" isString +jsonpath "$.access_token_exp" isString + +################################################################################# +# Get an Admin access token using the login token +################################################################################# + +GET {{host}}/api/v1/login +Authorization: Bearer {{login_token_admin}} +HTTP 200 +[Captures] +access_token_admin: jsonpath "$.access_token" +[Asserts] +jsonpath "$.access_token" isString +jsonpath "$.access_token_exp" isString + +################################################################################# +# Create a new placement, with keycloak enabled +################################################################################# + +POST {{host}}/api/v1/placements +Authorization: Bearer {{access_token}} +{ + "service_uuid": "{{uuid}}", + "resources": [ + { + "kind": "OcpSandbox", + "cloud_selector": { + "keycloak": "true" + } + } + ], + "annotations": { + "tests": "Simple OcpSandbox placement", + "guid": "testg", + "env_type": "ocp4-cluster-blablablabla" + } +} +HTTP 200 +[Captures] +sandbox_name: jsonpath "$.Placement.resources[0].name" +[Asserts] +jsonpath "$.message" == "Placement Created" +jsonpath "$.Placement.service_uuid" == "{{uuid}}" +jsonpath "$.Placement.resources" count == 1 +jsonpath "$.Placement.resources[0].status" == "initializing" + +################################################################################# +# Wait until the placement is succesfull and resources are ready +################################################################################# + +GET {{host}}/api/v1/placements/{{uuid}} +Authorization: Bearer {{access_token}} +[Options] +retry: 40 +HTTP 200 +[Asserts] +jsonpath "$.service_uuid" == "{{uuid}}" +jsonpath "$.status" == "success" +jsonpath "$.resources" count == 1 +jsonpath "$.resources[0].status" == "success" +jsonpath "$.resources[0].ingress_domain" split "." count > 2 +jsonpath "$.resources[0].credentials" count >= 2 +jsonpath "$.resources[0].credentials[?(@.kind == 'ServiceAccount')].token" count == 1 +jsonpath "$.resources[0].credentials[?(@.kind == 'KeycloakUser')].username" count == 1 +jsonpath "$.resources[0].credentials[?(@.kind == 'KeycloakUser')].password" count == 1 + +################################################################################# +# Delete placement +################################################################################# + +DELETE {{host}}/api/v1/placements/{{uuid}} +Authorization: Bearer {{access_token}} +HTTP 202 + +GET {{host}}/api/v1/placements/{{uuid}} +Authorization: Bearer {{access_token}} +[Options] +retry: 40 +HTTP 404 From 3212161222c71483cfa226a8db5bb006d85ce963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Cor=C3=A9?= Date: Wed, 11 Sep 2024 15:41:16 +0200 Subject: [PATCH 06/13] Use the credentials saved to delete the user That will help when the username is going to be more random --- internal/models/ocp_sandbox.go | 36 +++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/internal/models/ocp_sandbox.go b/internal/models/ocp_sandbox.go index 0c387d54..cd5d229e 100644 --- a/internal/models/ocp_sandbox.go +++ b/internal/models/ocp_sandbox.go @@ -1722,7 +1722,6 @@ func (account *OcpSandboxWithCreds) Delete() error { } // Delete the User - userAccountName := "sandbox-" + account.Annotations["guid"] // Define the KeycloakUser GroupVersionResource keycloakUserGVR := schema.GroupVersionResource{ Group: "keycloak.org", @@ -1730,16 +1729,35 @@ func (account *OcpSandboxWithCreds) Delete() error { Resource: "keycloakusers", } + usernames := []string{} + for _, cred := range account.Credentials { + if m, ok := cred.(map[string]interface{}); ok { + if m["kind"] == "KeycloakUser" { + if username, ok := m["username"].(string); ok { + usernames = append(usernames, username) + } + } + } + } + namespace := "rhsso" - err = dynclientset.Resource(keycloakUserGVR).Namespace(namespace).Delete(context.TODO(), userAccountName, metav1.DeleteOptions{}) - if err != nil { - if strings.Contains(err.Error(), "not found") { - log.Logger.Info("Keycloak not found, move on", "name", account.Name) - } else { - log.Logger.Error("Error deleting OCP namespace", "error", err, "name", account.Name) - account.SetStatus("error") - return err + + for _, userAccountName := range usernames { + + err = dynclientset.Resource(keycloakUserGVR).Namespace(namespace).Delete(context.TODO(), userAccountName, metav1.DeleteOptions{}) + if err != nil { + if strings.Contains(err.Error(), "not found") { + log.Logger.Info("Keycloak not found, move on", "name", account.Name) + } else { + log.Logger.Error("Error deleting KeycloadUser", "error", err, "name", account.Name) + account.SetStatus("error") + return err + } } + + log.Logger.Info("KeycloakUser deleted", + "cluster", account.OcpSharedClusterConfigurationName, + "name", account.Name, "user", userAccountName) } _, err = account.Provider.DbPool.Exec( From b0f9cd25db519581f248774497c81d6101a0ed3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Cor=C3=A9?= Date: Wed, 11 Sep 2024 18:02:26 +0200 Subject: [PATCH 07/13] s/interface{}/any --- internal/models/ocp_sandbox.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/internal/models/ocp_sandbox.go b/internal/models/ocp_sandbox.go index cd5d229e..a11fc5ad 100644 --- a/internal/models/ocp_sandbox.go +++ b/internal/models/ocp_sandbox.go @@ -1221,27 +1221,27 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri // Create the KeycloakUser object as an unstructured object keycloakUser := &unstructured.Unstructured{ - Object: map[string]interface{}{ + Object: map[string]any{ "apiVersion": "keycloak.org/v1alpha1", "kind": "KeycloakUser", - "metadata": map[string]interface{}{ + "metadata": map[string]any{ "name": userAccountName, "namespace": "rhsso", // The namespace where Keycloak is installed }, - "spec": map[string]interface{}{ - "user": map[string]interface{}{ + "spec": map[string]any{ + "user": map[string]any{ "username": userAccountName, "enabled": true, - "credentials": []interface{}{ - map[string]interface{}{ + "credentials": []any{ + map[string]any{ "type": "password", "value": password, "temporary": false, }, }, }, - "realmSelector": map[string]interface{}{ - "matchLabels": map[string]interface{}{ + "realmSelector": map[string]any{ + "matchLabels": map[string]any{ "app": "sso", // The label selector for the Keycloak realm }, }, @@ -1731,7 +1731,7 @@ func (account *OcpSandboxWithCreds) Delete() error { usernames := []string{} for _, cred := range account.Credentials { - if m, ok := cred.(map[string]interface{}); ok { + if m, ok := cred.(map[string]any); ok { if m["kind"] == "KeycloakUser" { if username, ok := m["username"].(string); ok { usernames = append(usernames, username) From ef686f2d89e60745eb99f280a7c9730b4f90032b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Cor=C3=A9?= Date: Thu, 12 Sep 2024 10:42:10 +0200 Subject: [PATCH 08/13] Don't add hcp, it's already been added --- internal/models/ocp_sandbox.go | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/internal/models/ocp_sandbox.go b/internal/models/ocp_sandbox.go index 98cabde0..72dd9fb1 100644 --- a/internal/models/ocp_sandbox.go +++ b/internal/models/ocp_sandbox.go @@ -1299,40 +1299,6 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri } } - // Assign ClusterRole sandbox-hcp (created with gitops) to the SA if hcp option was selected - if value, exists := cloud_selector["hcp"]; exists && (value == "yes" || value == "true") { - _, err = clientset.RbacV1().RoleBindings(namespaceName).Create(context.TODO(), &rbacv1.RoleBinding{ - ObjectMeta: metav1.ObjectMeta{ - Name: serviceAccountName + "-hcp", - Labels: map[string]string{ - "serviceUuid": serviceUuid, - "guid": annotations["guid"], - }, - }, - RoleRef: rbacv1.RoleRef{ - APIGroup: rbacv1.GroupName, - Kind: "ClusterRole", - Name: serviceAccountName + "-hcp", - }, - Subjects: []rbacv1.Subject{ - { - Kind: "ServiceAccount", - Name: serviceAccountName, - Namespace: namespaceName, - }, - }, - }, metav1.CreateOptions{}) - - if err != nil { - log.Logger.Error("Error creating OCP RoleBind", "error", err) - if err := clientset.CoreV1().Namespaces().Delete(context.TODO(), namespaceName, metav1.DeleteOptions{}); err != nil { - log.Logger.Error("Error cleaning up the namespace", "error", err) - } - rnew.SetStatus("error") - return - } - } - // if cloud_selector has enabled the virt flag, then we give permission to cnv-images namespace if value, exists := cloud_selector["virt"]; exists && (value == "yes" || value == "true") { // Look if namespace 'cnv-images' exists From 88297a4da921d1321106f6198ac56bcba47f44d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Cor=C3=A9?= Date: Thu, 12 Sep 2024 10:44:35 +0200 Subject: [PATCH 09/13] rename test --- tests/{004_keycloak.hurl => 005_keycloak.hurl} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{004_keycloak.hurl => 005_keycloak.hurl} (100%) diff --git a/tests/004_keycloak.hurl b/tests/005_keycloak.hurl similarity index 100% rename from tests/004_keycloak.hurl rename to tests/005_keycloak.hurl From f569055650fb7ed5cd5efd1b682618d4b1d93ad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Cor=C3=A9?= Date: Fri, 13 Sep 2024 12:39:57 +0200 Subject: [PATCH 10/13] Update swagger --- docs/api-reference/swagger.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api-reference/swagger.yaml b/docs/api-reference/swagger.yaml index dd0924da..2082e1db 100644 --- a/docs/api-reference/swagger.yaml +++ b/docs/api-reference/swagger.yaml @@ -2324,8 +2324,8 @@ components: StrictDefaultSandboxQuota is a flag to determine if the default sandbox quota should be strictly enforced. If set to true, the default sandbox quota will be enforced as a hard limit. Requested quota not be allowed to exceed the default. - If set to false, the default sandbox will be updated - to the requested quota. + If set to false, the default sandbox-quota will be updated + to the requested quota even if some values are greater. default: false quota_required: type: boolean From 584ed61dcec9266ea10beaaa7b91cfb60fe65bc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillaume=20Cor=C3=A9?= Date: Wed, 18 Sep 2024 15:29:19 +0200 Subject: [PATCH 11/13] rename hurl file --- tests/{005_keycloak.hurl => 006_keycloak.hurl} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{005_keycloak.hurl => 006_keycloak.hurl} (100%) diff --git a/tests/005_keycloak.hurl b/tests/006_keycloak.hurl similarity index 100% rename from tests/005_keycloak.hurl rename to tests/006_keycloak.hurl From c86d897dbab28bafae4bc05a6a54d2848339e188 Mon Sep 17 00:00:00 2001 From: Alberto Gonzalez Rodriguez Date: Sun, 27 Oct 2024 23:05:23 +0100 Subject: [PATCH 12/13] Rebase keycloak (#91) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add secret to generate a token (#82) * Add secret to generate a token * Add secret to generate a token * Use new fork of aws-nuke (#83) * Use new fork of aws-nuke https://github.com/rebuy-de/aws-nuke is not maintained anymore. The official fork is https://github.com/ekristen/aws-nuke as mentioned in the readme. This change updates the conan image and playbook. - use the binary from the new fork - to be on the safe side: keep using the old binary as a last step * Update readme * Fix helm chart for conan and update readme * Fix command line with new version of aws nuke * tool: add login to hurl file (mark for cleanup) * aws-nuke config: Use new keys in conf file fixes: WARN[0000] deprecated configuration key 'account-blocklist' - please use 'blocklist' instead component=config WARN[0000] deprecated configuration key 'feature-flags' - please use 'settings' instead component=config * aws-nuke: Add separate config for legacy + fixes * aws-nuke: fail playbook if legacy aws-nuke failed * Fix typo * Fix ansible deprecation warnings (#84) * OcpSandbox: Fix credentials output when empty/nil (#87) * Adding health check for ocp shared cluster (#86) * Adding health check for ocp shared cluster * Adding health check for ocp shared cluster * Adding health check for ocp shared cluster * conan: fix ongoing cleanup errors (#85) - bump aws-nuke to v3.26.0 - Instances setup with the disable-stop-protection were not deleted by aws-nuke. => Enable the DisableStopProtection option for aws-nuke. - add a 'debug' environment variable to better control output of conan by default improve output of conan by being a little bit less verbose. - EC2Images: include disabled and deprecated images + disable deregistration protection disabled, deprecated images or images with deregistration protection weren't deleted by aws-nuke - `manual_cleanup.py`: Release EIP that are in a NetworkBorderGroup - aws-nuke misses them. - `manual_cleanup.py`: VPC can't be deleted when they have a VPC Lattice target group registered. Delete VPC Lattice target groups and targets and deregister it from the VPC. - Improve output of the ansible playbook by reducing noise: * add the `--quiet` option to the aws-nuke command * do not include `stdout` and `stderr` in the output of the register for the aws-nuke task `stdout_lines` and `stderr_lines` are enough and more readable. - `requirements.txt`: do not pin versions of python modules. Instead, use the latest version of each module those will be baked into the container image. That is useful here to have the DeletionMode option for the `delete_stack()` function for deleting faulty cloudformation stacks. - Add duration of the "cleanup" run at the end for each sandbox. ``` 2024-10-09T06:39:11+00:00 sandbox123 reset took 30m20s ``` - Cloudformation stacks are sometimes stuck in DELETE_FAILED because a resource part of the stack is already deleted. in `manual_cleanup.py` use the `FORCE_DELETE_STACK` option. - Fix some Ansible deprecation warnings * conan script: fix test with empty var Fixes the error: ./wipe_sandbox.sh: line 122: [: : integer expression expected * Conan performance improvements (#88) Before: **35+ minutes** to cleanup a sandbox After and without `aws-nuke-legacy`: **~5 minutes** * Throw an error if aws-nuke-legacy deletes resource(s) * Add a flag to disable/enable aws-nuke legacy Once we're sure no resource is ever cleaned up by aws-nuke legacy after aws-nuke new fork, we can easily disable it. * Add ansible log when debug is on * Give aws-nuke up to 1h * target groups cleanup is done in `manual_cleanup.py`, remove it from ansible tasks * Disassociation of EIP is done in `manual_cleanup.py`, remove it from ansible tasks * RDS: Disable deletion protection is done by aws-nuke, remove it from the ansible tasks * Termination protection is done by aws-nuke, remove it from the ansible tasks * Reduce noise in logs * Print `aws-nuke` summary, including the number of resources nuked. ``` reset_sandbox939.log:Nuke complete: 0 failed, 2495 skipped, 3 finished. ``` * Do not run `manual_cleanup.py` first but only after running aws-nuke once. * Enable profiling (http/pprof) (#89) - create debug routes behind *admin* authentication * Fix loop to get secret (#90) * Fix loop to get secret, add a stop condition --------- Co-authored-by: Guillaume Coré --- Containerfile.conan | 10 +- cmd/sandbox-api/main.go | 29 ++++- ...p_shared_cluster_configuration_handlers.go | 37 ++++++ conan/conan.sh | 9 ++ conan/readme.adoc | 10 +- conan/requirements.txt | 74 ++++++------ conan/wipe_sandbox.sh | 33 +++++- deploy/helm-conan/templates/namespace.yaml | 13 -- deploy/helm-conan/values.yaml | 1 + docs/api-reference/swagger.yaml | 34 ++++++ internal/models/ocp_sandbox.go | 69 ++++++++++- .../roles/infra-aws-sandbox/defaults/main.yml | 3 +- .../infra-aws-sandbox/files/manual_cleanup.py | 110 ++++++++++++++++- .../roles/infra-aws-sandbox/tasks/iam.yml | 35 ++++-- .../roles/infra-aws-sandbox/tasks/keypair.yml | 4 +- .../tasks/manual_cleanup.yml | 111 +----------------- .../roles/infra-aws-sandbox/tasks/reset.yml | 92 +++++++++++---- .../templates/nuke-config-legacy.yml.j2 | 21 ++++ .../templates/nuke-config.yml.j2 | 45 +++++-- tests/001.hurl | 2 +- tools/sandbox_mark_for_cleanup.hurl | 13 ++ 21 files changed, 527 insertions(+), 228 deletions(-) delete mode 100644 deploy/helm-conan/templates/namespace.yaml create mode 100644 playbooks/roles/infra-aws-sandbox/templates/nuke-config-legacy.yml.j2 diff --git a/Containerfile.conan b/Containerfile.conan index d0d6ce2d..a2e64293 100644 --- a/Containerfile.conan +++ b/Containerfile.conan @@ -9,7 +9,8 @@ RUN make sandbox-list FROM registry.access.redhat.com/ubi8/ubi:latest MAINTAINER Guillaume Coré -ARG AWSNUKE_VERSION=v2.25.0 +ARG AWSNUKE_VERSION=v3.26.0 +ARG AWSNUKE_LEGACY_VERSION=v2.25.0 ARG RUSH_VERSION=v0.5.4 USER root @@ -52,9 +53,12 @@ RUN mkdir -p -m 770 /home/opentlc-mgr/pool_management/output_dir_sandbox \ # aws-nuke ############################## && curl --silent --location \ - https://github.com/rebuy-de/aws-nuke/releases/download/${AWSNUKE_VERSION}/aws-nuke-${AWSNUKE_VERSION}-linux-amd64.tar.gz \ + https://github.com/ekristen/aws-nuke/releases/download/${AWSNUKE_VERSION}/aws-nuke-${AWSNUKE_VERSION}-linux-amd64.tar.gz \ + | tar -xz -C /usr/local/bin --wildcards 'aws-nuke' \ + && curl --silent --location \ + https://github.com/rebuy-de/aws-nuke/releases/download/${AWSNUKE_LEGACY_VERSION}/aws-nuke-${AWSNUKE_LEGACY_VERSION}-linux-amd64.tar.gz \ | tar -xz -C /usr/local/bin --wildcards 'aws-nuke-*-linux-amd64' \ - && ln -s /usr/local/bin/aws-nuke-${AWSNUKE_VERSION}-linux-amd64 /usr/local/bin/aws-nuke \ + && ln -s /usr/local/bin/aws-nuke-${AWSNUKE_LEGACY_VERSION}-linux-amd64 /usr/local/bin/aws-nuke-legacy \ ############################## # Rush ############################## diff --git a/cmd/sandbox-api/main.go b/cmd/sandbox-api/main.go index f6410ca7..481f7538 100644 --- a/cmd/sandbox-api/main.go +++ b/cmd/sandbox-api/main.go @@ -5,15 +5,11 @@ import ( _ "embed" "log/slog" "net/http" + "net/http/pprof" "os" "strings" "time" - "github.com/rhpds/sandbox/internal/config" - sandboxdb "github.com/rhpds/sandbox/internal/dynamodb" - "github.com/rhpds/sandbox/internal/log" - "github.com/rhpds/sandbox/internal/models" - "github.com/getkin/kin-openapi/openapi3" gorillamux "github.com/getkin/kin-openapi/routers/gorillamux" "github.com/go-chi/chi/v5" @@ -21,6 +17,11 @@ import ( "github.com/go-chi/httplog/v2" "github.com/go-chi/jwtauth/v5" "github.com/jackc/pgx/v4/pgxpool" + + "github.com/rhpds/sandbox/internal/config" + sandboxdb "github.com/rhpds/sandbox/internal/dynamodb" + "github.com/rhpds/sandbox/internal/log" + "github.com/rhpds/sandbox/internal/models" ) //go:embed assets/swagger.yaml @@ -267,6 +268,7 @@ func main() { r.Post("/api/v1/ocp-shared-cluster-configurations", baseHandler.CreateOcpSharedClusterConfigurationHandler) r.Get("/api/v1/ocp-shared-cluster-configurations", baseHandler.GetOcpSharedClusterConfigurationsHandler) r.Get("/api/v1/ocp-shared-cluster-configurations/{name}", baseHandler.GetOcpSharedClusterConfigurationHandler) + r.Get("/api/v1/ocp-shared-cluster-configurations/{name}/health", baseHandler.HealthOcpSharedClusterConfigurationHandler) r.Put("/api/v1/ocp-shared-cluster-configurations/{name}/disable", baseHandler.DisableOcpSharedClusterConfigurationHandler) r.Put("/api/v1/ocp-shared-cluster-configurations/{name}/enable", baseHandler.EnableOcpSharedClusterConfigurationHandler) r.Put("/api/v1/ocp-shared-cluster-configurations/{name}/update", baseHandler.UpdateOcpSharedClusterConfigurationHandler) @@ -278,6 +280,23 @@ func main() { r.Delete("/api/v1/reservations/{name}", baseHandler.DeleteReservationHandler) }) + // --------------------------------------------------------------------- + // Profiling + // --------------------------------------------------------------------- + router.Group(func(r chi.Router) { + // --------------------------------- + // Admin auth but no OpenAPI validation + // --------------------------------- + r.Use(jwtauth.Verifier(tokenAuth)) + r.Use(AuthenticatorAdmin) + // Profiling + r.Get("/debug/pprof/", pprof.Index) + r.Get("/debug/pprof/profile", pprof.Profile) + r.Get("/debug/pprof/trace", pprof.Trace) + r.Get("/debug/pprof/cmdline", pprof.Cmdline) + r.Get("/debug/pprof/symbol", pprof.Symbol) + }) + // --------------------------------------------------------------------- // Login Routes // --------------------------------------------------------------------- diff --git a/cmd/sandbox-api/ocp_shared_cluster_configuration_handlers.go b/cmd/sandbox-api/ocp_shared_cluster_configuration_handlers.go index 2710ae32..6231b4f9 100644 --- a/cmd/sandbox-api/ocp_shared_cluster_configuration_handlers.go +++ b/cmd/sandbox-api/ocp_shared_cluster_configuration_handlers.go @@ -86,6 +86,43 @@ func (h *BaseHandler) DisableOcpSharedClusterConfigurationHandler(w http.Respons }) } +func (h *BaseHandler) HealthOcpSharedClusterConfigurationHandler(w http.ResponseWriter, r *http.Request) { + // Get the name of the OCP shared cluster configuration from the URL + name := chi.URLParam(r, "name") + + // Get the OCP shared cluster configuration from the database + cluster, err := h.OcpSandboxProvider.GetOcpSharedClusterConfigurationByName(name) + if err != nil { + if err == pgx.ErrNoRows { + w.WriteHeader(http.StatusNotFound) + render.Render(w, r, &v1.Error{ + HTTPStatusCode: http.StatusNotFound, + Message: "OCP shared cluster configuration not found", + }) + return + } + + w.WriteHeader(http.StatusInternalServerError) + render.Render(w, r, &v1.Error{ + HTTPStatusCode: http.StatusInternalServerError, + Message: "Failed to get OCP shared cluster configuration", + ErrorMultiline: []string{err.Error()}, + }) + return + } + + err = cluster.TestConnection() + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + render.Render(w, r, &v1.Error{ + HTTPStatusCode: http.StatusInternalServerError, + Message: "Error connecting to OpenShift Cluster", + ErrorMultiline: []string{err.Error()}, + }) + } + + w.WriteHeader(http.StatusOK) +} func (h *BaseHandler) EnableOcpSharedClusterConfigurationHandler(w http.ResponseWriter, r *http.Request) { // Get the name of the OCP shared cluster configuration from the URL name := chi.URLParam(r, "name") diff --git a/conan/conan.sh b/conan/conan.sh index da22c64e..46a9c91e 100755 --- a/conan/conan.sh +++ b/conan/conan.sh @@ -64,6 +64,13 @@ fi # the conan process owning the lock. lock_timeout=${lock_timeout:-2} + +# Variable to manage output loglevel +debug=${debug:-false} + +# Control weither to run the legacy aws-nuke or not, in addition to the active fork +run_aws_nuke_legacy=${run_aws_nuke_legacy:-false} + ############## export AWSCLI @@ -87,6 +94,8 @@ export threads export vault_file export workdir export sandbox_filter +export debug +export run_aws_nuke_legacy ORIG="$(cd "$(dirname "$0")" || exit; pwd)" diff --git a/conan/readme.adoc b/conan/readme.adoc index 72a14223..7d2410d1 100644 --- a/conan/readme.adoc +++ b/conan/readme.adoc @@ -6,11 +6,11 @@ image::conan.webp[Conan the destroyer of Sandboxes,300,300,float="left"] It watches the pool of sandboxes, and look for those marked as `to_cleanup`. -Then it runs link:https://github.com/rebuy-de/aws-nuke[aws-nuke] to wipe them, and put them back in the pool of available sandboxes. +Then it runs link:https://github.com/ekristen/aws-nuke[aws-nuke] to wipe them, and put them back in the pool of available sandboxes. == Dependencies -* link:https://github.com/rebuy-de/aws-nuke[`aws-nuke`] binary +* link:https://github.com/ekristen/aws-nuke[`aws-nuke`] binary * IPA client * kerberos, `kinit` * link:../readme.adoc[`sandbox-list`] binary @@ -94,3 +94,9 @@ podman run -e sandbox_filter="^sandbox2345 " ... $ podman secret rm vault_file aws_credentials ---- + +== Install via helm + +---- +helm install -f .dev.conan.yaml sandbox-conan deploy/helm-conan/ +---- diff --git a/conan/requirements.txt b/conan/requirements.txt index 4320ce26..18d09c41 100644 --- a/conan/requirements.txt +++ b/conan/requirements.txt @@ -1,37 +1,37 @@ -ansible-core==2.15.6 -boto3==1.29.5 -botocore==1.32.5 -cffi==1.16.0 -colorama==0.4.6 -cryptography==41.0.5 -decorator==5.1.1 -distro==1.8.0 -dnspython==2.4.2 -docutils==0.20.1 -gssapi==1.8.3 -importlib-resources==5.0.7 -ipa==4.10.2 -ipaclient==4.10.2 -ipalib==4.10.2 -ipaplatform==4.10.2 -ipapython==4.10.2 -Jinja2==3.1.2 -jmespath==1.0.1 -MarkupSafe==2.1.3 -netaddr==0.9.0 -packaging==23.2 -psutil==5.9.6 -pyasn1==0.5.1 -pyasn1-modules==0.3.0 -pycparser==2.21 -pypng==0.20220715.0 -python-dateutil==2.8.2 -PyYAML==6.0.1 -qrcode==7.4.2 -resolvelib==1.0.1 -rsa==4.9 -s3transfer==0.7.0 -selinux==0.3.0 -six==1.16.0 -typing_extensions==4.8.0 -urllib3==1.26.18 +ansible-core +boto3 +botocore +cffi +colorama +cryptography +decorator +distro +dnspython +docutils +gssapi +importlib-resources +ipa +ipaclient +ipalib +ipaplatform +ipapython +Jinja2 +jmespath +MarkupSafe +netaddr +packaging +psutil +pyasn1 +pyasn1-modules +pycparser +pypng +python-dateutil +PyYAML +qrcode +resolvelib +rsa +s3transfer +selinux +six +typing_extensions +urllib3 diff --git a/conan/wipe_sandbox.sh b/conan/wipe_sandbox.sh index 49205196..4fc773c4 100755 --- a/conan/wipe_sandbox.sh +++ b/conan/wipe_sandbox.sh @@ -7,6 +7,7 @@ max_retries=${max_retries:-2} aws_nuke_retries=${aws_nuke_retries:-0} # retry after 48h TTL_EVENTLOG=$((3600*24)) +debug=${debug:-false} # Mandatory ENV variables @@ -74,7 +75,7 @@ get_conan_cleanup_count() { exit 1 fi - if [ "${conan_cleanup_count}" = "null" ]; then + if [ "${conan_cleanup_count}" = "null" ] || [ -z "${conan_cleanup_count}" ]; then conan_cleanup_count=0 fi @@ -119,13 +120,19 @@ EOM # check if max_retries is reached if [ "$(get_conan_cleanup_count "${sandbox}")" -ge "${max_retries}" ]; then - echo "$(date -uIs) ${sandbox} max_retries reached, skipping for now, will retry after 24h" + # print info only once. + if [ ! -e "/tmp/${sandbox}_max_retries" ]; then + echo "$(date -uIs) ${sandbox} max_retries reached, skipping for now, will retry after 24h" + touch "/tmp/${sandbox}_max_retries" + fi rm "${errlog}" return 1 fi if grep -q ConditionalCheckFailedException "${errlog}"; then - echo "$(date -uIs) Another process is already cleaning up ${sandbox}: skipping" + if [ "${debug}" = "true" ]; then + echo "$(date -uIs) Another process is already cleaning up ${sandbox}: skipping" + fi rm "${errlog}" return 1 else @@ -136,7 +143,6 @@ EOM fi fi - # If anything happens, unlock the sandbox trap "_on_exit" EXIT @@ -201,6 +207,7 @@ sandbox_reset() { echo "$(date -uIs) reset sandbox${s}" >> "${eventlog}" echo "$(date -uIs) ${sandbox} reset starting..." + start_time=$(date +%s) export ANSIBLE_NO_TARGET_SYSLOG=True @@ -232,10 +239,28 @@ sandbox_reset() { -e kerberos_keytab="${kerberos_keytab:-}" \ -e kerberos_user="${kerberos_user}" \ -e kerberos_password="${kerberos_password:-}" \ + -e run_aws_nuke_legacy="${run_aws_nuke_legacy:-false}" \ reset_single.yml > "${logfile}"; then echo "$(date -uIs) ${sandbox} reset OK" + end_time=$(date +%s) + duration=$((end_time - start_time)) + # Calculate the time it took + echo "$(date -uIs) ${sandbox} reset took $((duration / 60))m$((duration % 60))s" + echo "$(date -uIs) ${sandbox} $(grep -Eo 'Nuke complete: [^"]+' "${logfile}")" + + if [ "${debug}" = "true" ]; then + echo "$(date -uIs) =========BEGIN========== ${logfile}" + cat "${logfile}" + echo "$(date -uIs) =========END============ ${logfile}" + fi + rm "${eventlog}" else + end_time=$(date +%s) + duration=$((end_time - start_time)) + # Calculate the time it took + echo "$(date -uIs) ${sandbox} reset took $((duration / 60))m$((duration % 60))s" + echo "$(date -uIs) ${sandbox} reset FAILED." >&2 echo "$(date -uIs) =========BEGIN========== ${logfile}" >&2 cat "${logfile}" >&2 diff --git a/deploy/helm-conan/templates/namespace.yaml b/deploy/helm-conan/templates/namespace.yaml deleted file mode 100644 index 3939ae22..00000000 --- a/deploy/helm-conan/templates/namespace.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -apiVersion: v1 -kind: Namespace -metadata: - name: {{ .Values.namespace }} - labels: - app.kubernetes.io/version: {{ .Values.deployment.tag | default .Chart.AppVersion | quote }} - meta.helm.sh/release-name: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - meta.helm.sh/release-namespace: {{ .Release.Namespace }} - {{- if .Values.labels }} - {{- toYaml .Values.labels | nindent 4 }} - {{- end }} diff --git a/deploy/helm-conan/values.yaml b/deploy/helm-conan/values.yaml index 49a7e0b1..dfabec0d 100644 --- a/deploy/helm-conan/values.yaml +++ b/deploy/helm-conan/values.yaml @@ -1,4 +1,5 @@ --- +namespace: babylon-sandbox-conan deployment: image: quay.io/rhpds/sandbox-conan tag: latest diff --git a/docs/api-reference/swagger.yaml b/docs/api-reference/swagger.yaml index 334f02c7..7c7c2ec7 100644 --- a/docs/api-reference/swagger.yaml +++ b/docs/api-reference/swagger.yaml @@ -1487,6 +1487,40 @@ paths: application/json: schema: $ref: "#/components/schemas/Error" + /ocp-shared-cluster-configurations/{name}/health: + get: + summary: Get health status about an OcpSharedClusterConfiguration endpoint + operationId: healthOcpSharedClusterConfiguration + tags: + - admin + parameters: + - name: name + in: path + required: true + description: The name of the OcpSharedClusterConfiguration + schema: + type: string + example: ocp-cluster-1 + responses: + '200': + description: Validation successful + content: + application/json: + schema: + $ref: "#/components/schemas/OcpSharedClusterConfiguration" + '404': + description: healthOcpSharedClusterConfiguration OcpSharedClusterConfiguration not found + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + default: + description: healthOcpSharedClusterConfiguration unexpected error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + /ocp-shared-cluster-configurations/{name}/enable: put: summary: Enable an OcpSharedClusterConfiguration diff --git a/internal/models/ocp_sandbox.go b/internal/models/ocp_sandbox.go index debeabe3..d82c3e0a 100644 --- a/internal/models/ocp_sandbox.go +++ b/internal/models/ocp_sandbox.go @@ -107,7 +107,7 @@ type OcpSandbox struct { type OcpSandboxWithCreds struct { OcpSandbox - Credentials []any `json:"credentials"` + Credentials []any `json:"credentials,omitempty"` Provider *OcpSandboxProvider `json:"-"` } @@ -862,6 +862,31 @@ func (a *OcpSharedClusterConfiguration) CreateRestConfig() (*rest.Config, error) return clientcmd.RESTConfigFromKubeConfig([]byte(a.Kubeconfig)) } + +func (a *OcpSharedClusterConfiguration) TestConnection() (error) { + // Get the OCP shared cluster configuration from the database + config, err := a.CreateRestConfig() + if err != nil { + log.Logger.Error("Error creating OCP config", "error", err) + return errors.New("Error creating OCP config: " + err.Error()) + } + + // Create an OpenShift client + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + log.Logger.Error("Error creating OCP client", "error", err) + return errors.New("Error creating OCP client: " + err.Error()) + } + + // Check if we can access to "default" namespace + _, err = clientset.CoreV1().Namespaces().Get(context.TODO(), "default", metav1.GetOptions{}) + if err != nil { + log.Logger.Error("Error accessing default namespace", "error", err) + return errors.New("Error accessing default namespace: " + err.Error()) + } + return nil +} + func includeNodeInUsageCalculation(node v1.Node) (bool, string) { if node.Spec.Unschedulable { return false, "unschedulable" @@ -1420,21 +1445,47 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri } } } - secrets, err := clientset.CoreV1().Secrets(namespaceName).List(context.TODO(), metav1.ListOptions{}) + + // Create secret to generate a token, for the clusters without image registry and for future versions of OCP + secret := &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceAccountName + "-token", + Namespace: namespaceName, + Annotations: map[string]string{ + "kubernetes.io/service-account.name": serviceAccountName, + }, + }, + Type: v1.SecretTypeServiceAccountToken, + } + _, err = clientset.CoreV1().Secrets(namespaceName).Create(context.TODO(), secret, metav1.CreateOptions{}) if err != nil { - log.Logger.Error("Error listing OCP secrets", "error", err) + log.Logger.Error("Error creating secret for SA", "error", err) // Delete the namespace if err := clientset.CoreV1().Namespaces().Delete(context.TODO(), namespaceName, metav1.DeleteOptions{}); err != nil { - log.Logger.Error("Error creating OCP service account", "error", err) + log.Logger.Error("Error creating OCP secret for SA", "error", err) } rnew.SetStatus("error") return } + maxRetries := 5 + retryCount := 0 + sleepDuration := time.Second * 5 var saSecret *v1.Secret // Loop till token exists for { + secrets, err := clientset.CoreV1().Secrets(namespaceName).List(context.TODO(), metav1.ListOptions{}) + if err != nil { + log.Logger.Error("Error listing OCP secrets", "error", err) + // Delete the namespace + if err := clientset.CoreV1().Namespaces().Delete(context.TODO(), namespaceName, metav1.DeleteOptions{}); err != nil { + log.Logger.Error("Error creating OCP service account", "error", err) + } + rnew.SetStatus("error") + return + } + for _, secret := range secrets.Items { if val, exists := secret.ObjectMeta.Annotations["kubernetes.io/service-account.name"]; exists { if _, exists := secret.Data["token"]; exists { @@ -1448,6 +1499,16 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri if saSecret != nil { break } + // Retry logic + retryCount++ + if retryCount >= maxRetries { + log.Logger.Error("Max retries reached, service account secret not found") + rnew.SetStatus("error") + return + } + + // Sleep before retrying + time.Sleep(sleepDuration) } creds = append(creds, OcpServiceAccount{ diff --git a/playbooks/roles/infra-aws-sandbox/defaults/main.yml b/playbooks/roles/infra-aws-sandbox/defaults/main.yml index 044d0f32..4ff74118 100644 --- a/playbooks/roles/infra-aws-sandbox/defaults/main.yml +++ b/playbooks/roles/infra-aws-sandbox/defaults/main.yml @@ -36,7 +36,8 @@ ocpkey: ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC8c3m39CoyA3bbgQUui3MGlJOryfg98NwI #################################### nuke_sandbox: true -aws_nuke_binary_path: /usr/bin/aws-nuke +aws_nuke_binary_path: aws-nuke +aws_nuke_legacy_binary_path: aws-nuke-legacy aws_nuke_account_blacklist: - 017310218799 # Master account diff --git a/playbooks/roles/infra-aws-sandbox/files/manual_cleanup.py b/playbooks/roles/infra-aws-sandbox/files/manual_cleanup.py index 11281064..83c5ed36 100644 --- a/playbooks/roles/infra-aws-sandbox/files/manual_cleanup.py +++ b/playbooks/roles/infra-aws-sandbox/files/manual_cleanup.py @@ -15,6 +15,8 @@ with open('/tmp/aws_nuke_filters.json', 'r') as f: aws_nuke_filter.update(json.load(f)) +clientlaticce = boto3.client('vpc-lattice') + # Delete all EC2VPC client = boto3.client('ec2') @@ -23,6 +25,8 @@ response = client.describe_vpcs() for vpc in response['Vpcs']: + + print("Deleting VPC: " + vpc['VpcId']) # Delete all subnets response2 = client.describe_subnets( Filters=[ @@ -105,6 +109,33 @@ print("Disassociated route table: " + association['RouteTableAssociationId']) changed = True + # deregister all VPC lattice target groups + + response5 = clientlaticce.list_target_groups( + vpcIdentifier=vpc['VpcId'] + ) + + for target_group in response5['items']: + # remove all targets from the target group + + response6 = clientlaticce.list_targets( + targetGroupIdentifier=target_group['arn'] + ) + + if len(response6['items']) != 0: + clientlaticce.deregister_targets( + targetGroupIdentifier=target_group['arn'], + targets=[ + { 'id': y['id'], 'port': y['port'] } for y in response6['items'] + ] + ) + print("Deregistered targets: " + response6['items']) + + clientlaticce.delete_target_group( + targetGroupIdentifier=target_group['arn'] + ) + print("Deregistered target group: " + target_group['arn']) + changed = True # Delete VPC @@ -113,12 +144,38 @@ ) print("Deleted VPC: " + vpc['VpcId']) - changed = True except botocore.exceptions.ClientError as e: print(e) +try: + response = client.describe_images(Owners=['self'], IncludeDeprecated=True, IncludeDisabled=True) + + for image in response['Images']: + print("Deregistering AMI: " + image['ImageId']) + client.deregister_image( + ImageId=image['ImageId'] + ) + print("Deregistered AMI: " + image['ImageId']) + for device in image.get('BlockDeviceMappings', []): + snapshot_id = device.get('Ebs', {}).get('SnapshotId') + if snapshot_id: + print("Deleting snapshot: %s associated with AMI: %s" % (snapshot_id, image['ImageId'])) + client.delete_snapshot(SnapshotId=snapshot_id) + print("Successfully deleted snapshot: %s" % (snapshot_id)) + changed = True + # Delete all snapshots + response = client.describe_snapshots(OwnerIds=['self']) + + for snapshot in response['Snapshots']: + client.delete_snapshot( + SnapshotId=snapshot['SnapshotId'] + ) + print("Deleted snapshot: " + snapshot['SnapshotId']) + changed = True +except botocore.exceptions.ClientError as e: + print(e) # Delete all Cognito User Pools @@ -280,10 +337,35 @@ except botocore.exceptions.ClientError as e: print(e) -# Cleanup Public ECR -client = boto3.client('ecr-public') -if os.environ.get('AWS_REGION') == 'us-east-1': + +# Release all Elastic IPs + +try: + response = client.describe_addresses() + + for address in response['Addresses']: + # Disassociate address + if address.get('AssociationId'): + client.disassociate_address( + AssociationId=address['AssociationId'] + ) + print("Disassociated Elastic IP: " + address['AllocationId']) + + client.release_address( + AllocationId=address['AllocationId'], + NetworkBorderGroup=address.get('NetworkBorderGroup', '') + ) + print("Released Elastic IP: " + address['AllocationId']) + changed = True +except botocore.exceptions.ClientError as e: + print(e) + + + +if os.environ.get('AWS_DEFAULT_REGION') == 'us-east-1': + # Cleanup Public ECR + client = boto3.client('ecr-public') try: response = client.describe_repositories() @@ -361,8 +443,26 @@ changed = True # UninitializedAccountException except client.exceptions.UninitializedAccountException: - print("MGNSourceServer is not supported in this region") + pass + #print("MGNSourceServer is not supported in this region") + +# Delete cloudformation stack +client = boto3.client('cloudformation') +try: + response = client.describe_stacks() + + for stack in response['Stacks']: + # Check if stack is in DELETE_FAILED state + if stack['StackStatus'] == 'DELETE_FAILED': + client.delete_stack( + StackName=stack['StackName'], + DeletionMode='FORCE_DELETE_STACK' + ) + print("Deleted stack: " + stack['StackName']) + changed = True +except botocore.exceptions.ClientError as e: + print(e) diff --git a/playbooks/roles/infra-aws-sandbox/tasks/iam.yml b/playbooks/roles/infra-aws-sandbox/tasks/iam.yml index 1ddc2f60..999d83d7 100644 --- a/playbooks/roles/infra-aws-sandbox/tasks/iam.yml +++ b/playbooks/roles/infra-aws-sandbox/tasks/iam.yml @@ -1,16 +1,31 @@ --- -- name: Copy IAM template to output_dir - copy: - dest: "{{ output_dir }}/CF-IAM.json" - src: files/CF-IAM.json - - name: Create IAM role using Cloudformation cloudformation: profile: "{{ account_profile }}" - template: "{{ output_dir }}/CF-IAM.json" + template_body: "{{ lookup('file', 'CF-IAM.json') }}" region: "{{ aws_region }}" stack_name: roles - register: _cfiamrole - until: _cfiamrole is succeeded - delay: 60 - retries: 5 + register: r_cf + ignore_errors: yes + +- when: r_cf is failed + block: + - name: Delete IAM role Cloudformation stack + cloudformation: + profile: "{{ account_profile }}" + region: "{{ aws_region }}" + stack_name: roles + state: absent + + - name: Delete the config-rule-role role + iam_role: + profile: "{{ account_profile }}" + name: config-rule-role + state: absent + + - name: Retry create IAM role using Cloudformation + cloudformation: + profile: "{{ account_profile }}" + template_body: "{{ lookup('file', 'CF-IAM.json') }}" + region: "{{ aws_region }}" + stack_name: roles diff --git a/playbooks/roles/infra-aws-sandbox/tasks/keypair.yml b/playbooks/roles/infra-aws-sandbox/tasks/keypair.yml index bebba8a6..ea821cbb 100644 --- a/playbooks/roles/infra-aws-sandbox/tasks/keypair.yml +++ b/playbooks/roles/infra-aws-sandbox/tasks/keypair.yml @@ -7,7 +7,7 @@ key_material: "{{ opentlc_admin_backdoor }}" aws_access_key: "{{ assumed_role.sts_creds.access_key }}" aws_secret_key: "{{ assumed_role.sts_creds.secret_key }}" - security_token: "{{ assumed_role.sts_creds.session_token }}" + session_token: "{{ assumed_role.sts_creds.session_token }}" loop: "{{ all_regions }}" loop_control: loop_var: _region @@ -23,7 +23,7 @@ key_material: "{{ ocpkey }}" aws_access_key: "{{ assumed_role.sts_creds.access_key }}" aws_secret_key: "{{ assumed_role.sts_creds.secret_key }}" - security_token: "{{ assumed_role.sts_creds.session_token }}" + session_token: "{{ assumed_role.sts_creds.session_token }}" loop: "{{ all_regions }}" loop_control: loop_var: _region diff --git a/playbooks/roles/infra-aws-sandbox/tasks/manual_cleanup.yml b/playbooks/roles/infra-aws-sandbox/tasks/manual_cleanup.yml index c0fbb8da..5de04b7b 100644 --- a/playbooks/roles/infra-aws-sandbox/tasks/manual_cleanup.yml +++ b/playbooks/roles/infra-aws-sandbox/tasks/manual_cleanup.yml @@ -22,6 +22,10 @@ debug: var: r_manual_cleanup + - when: r_manual_cleanup is changed + set_fact: + run_aws_nuke_again: true + # Reject all VPC connections - name: Get all VPC endpoint connections @@ -65,87 +69,6 @@ - set_fact: run_aws_nuke_again: true - # Instance - - - name: Get all instances - ec2_instance_info: - register: r_all_instances - - - when: r_all_instances.instances | length > 0 - block: - - name: Disable termination protection on all instances - command: >- - {{ aws_cli }} ec2 - --region "{{ _region }}" - modify-instance-attribute - --instance-id {{ _instance.instance_id }} - --no-disable-api-termination - when: - - '"state" in _instance' - - _instance.state.name != "terminated" - loop: "{{ r_all_instances.instances }}" - loop_control: - loop_var: _instance - - - set_fact: - run_aws_nuke_again: true - - # RDS DB Instances - - - name: Get all RDS DB instances - command: >- - {{ aws_cli }} - --region {{ _region | quote }} - rds describe-db-instances - --output json - --query 'DBInstances[*].DBInstanceIdentifier' - register: r_all_db_instances - changed_when: false - - - name: Save list as fact - set_fact: - db_instances: "{{ r_all_db_instances.stdout | from_json | list }}" - - - name: Disable termination protection on all DBInstances - loop: "{{ db_instances }}" - command: >- - {{ aws_cli }} - --region {{ _region | quote }} - rds modify-db-instance - --db-instance-identifier {{ item }} - --no-deletion-protection - # EIP - - - ec2_eip_info: - register: r_all_eips - - - when: r_all_eips.addresses | length > 0 - block: - # The following does not seem to work with aws profile - # Thus use the aws CLI instead. - # - name: Disassociate and release EIP - # ec2_eip: - # state: absent - # release_on_disassociation: true - # public_ip: "{{ _eip.public_ip }}" - # profile: "{{ account_profile }}" - # loop: "{{ r_all_eips.addresses }}" - # loop_control: - # loop_var: _eip - - - name: Disassociate EIP - command: >- - {{ aws_cli }} ec2 - --region "{{ _region }}" - disassociate-address - --public-ip "{{ _eip.public_ip }}" - loop: "{{ r_all_eips.addresses }}" - loop_control: - loop_var: _eip - - - set_fact: - run_aws_nuke_again: true - # Access Points - name: List all Access points command: >- @@ -199,32 +122,6 @@ - set_fact: run_aws_nuke_again: true - # Target groups - - name: List all target groups - command: >- - {{ aws_cli }} --region "{{ _region }}" - vpc-lattice list-target-groups - --query 'items[*].arn' --output json - - register: r_target_groups - changed_when: false - failed_when: >- - r_target_groups.rc != 0 - and 'Could not connect to the endpoint URL' not in r_target_groups.stderr - - - when: >- - r_target_groups.rc == 0 and r_target_groups.stdout != "" - block: - - name: Delete all target groups - loop: "{{ r_target_groups.stdout | default('[]', true) | from_json | default([]) }}" - command: >- - {{ aws_cli }} --region "{{ _region }}" - vpc-lattice delete-target-group - --target-group-identifier {{ item | quote }} - - - set_fact: - run_aws_nuke_again: true - # FSx filesystem cleanup - name: List all fsx volumes command: >- diff --git a/playbooks/roles/infra-aws-sandbox/tasks/reset.yml b/playbooks/roles/infra-aws-sandbox/tasks/reset.yml index e5374585..416a12af 100644 --- a/playbooks/roles/infra-aws-sandbox/tasks/reset.yml +++ b/playbooks/roles/infra-aws-sandbox/tasks/reset.yml @@ -17,27 +17,6 @@ _hostedzoneid: "{{ _route53zone.zone_id }}" aws_public_zone: "{{ account_name }}{{subdomain_base}}." -# Get a new token as the current one may have timed out (1h) -- include_tasks: assume.yml - -- loop: "{{ all_regions }}" - loop_control: - loop_var: _region - environment: - AWS_REGION: "{{ _region }}" - AWS_DEFAULT_REGION: "{{ _region }}" - AWS_ACCESS_KEY_ID: "{{ assumed_role.sts_creds.access_key }}" - AWS_SECRET_ACCESS_KEY: "{{ assumed_role.sts_creds.secret_key }}" - AWS_SESSION_TOKEN: "{{ assumed_role.sts_creds.session_token }}" - ignore_errors: true - name: Run files/manual_cleanup.py script - script: files/manual_cleanup.py - register: r_manual_cleanup - # timeout after 2 minutes - timeout: 120 - changed_when: >- - 'Changes were made' in r_manual_cleanup.stdout - - tags: nuke when: nuke_sandbox | bool block: @@ -65,9 +44,14 @@ src: "{{ role_path }}/templates/nuke-config.yml.j2" dest: "{{ output_dir }}/{{ account_name }}_nuke-config.yml" + - name: Generate config file for aws-nuke-legacy + template: + src: "{{ role_path }}/templates/nuke-config-legacy.yml.j2" + dest: "{{ output_dir }}/{{ account_name }}_nuke-config-legacy.yml" + - name: Run aws-nuke on sandbox account command: >- - {{ aws_nuke_binary_path }} --profile {{ account_name }} + {{ aws_nuke_binary_path }} nuke --profile {{ account_name }} -c "{{ output_dir }}/{{ account_name }}_nuke-config.yml" --no-dry-run --force @@ -78,12 +62,20 @@ retries: "{{ aws_nuke_retries }}" until: _awsnuke is succeeded no_log: true - async: 1800 + async: 3600 poll: 30 delay: 30 + changed_when: >- + 'No resource to delete.' not in _awsnuke.stdout_lines - debug: - var: _awsnuke + # stdout and stderr are really not human friendly. keep stdout_lines and stdin_lines + var: >- + _awsnuke + | dict2items + | selectattr('key', 'ne', 'stdout') + | selectattr('key', 'ne', 'stderr') + | items2dict - when: _awsnuke is failed block: @@ -100,7 +92,7 @@ - name: Run aws-nuke again when: run_aws_nuke_again | default(false) command: >- - {{ aws_nuke_binary_path }} --profile {{ account_name }} + {{ aws_nuke_binary_path }} nuke --profile {{ account_name }} -c "{{ output_dir }}/{{ account_name }}_nuke-config.yml" --no-dry-run --force @@ -111,13 +103,51 @@ retries: 0 until: _awsnuke2 is succeeded no_log: true + # second time shouldn't take too long async: 1800 poll: 30 delay: 30 + changed_when: >- + 'No resource to delete.' not in _awsnuke.stdout_lines - debug: - var: _awsnuke2 + var: >- + _awsnuke2 + | dict2items + | selectattr('key', 'ne', 'stdout') + | selectattr('key', 'ne', 'stderr') + | items2dict + + - name: Run aws-nuke legacy on sandbox account + when: run_aws_nuke_legacy | default(false) | bool + # be on the safe side, run the official (unmaintained) binary + command: >- + {{ aws_nuke_legacy_binary_path }} + --profile {{ account_name }} + -c "{{ output_dir }}/{{ account_name }}_nuke-config-legacy.yml" + --no-dry-run + --force + args: + stdin: "{{ account_name }}{{ alias_suffix }}" + register: _awsnuke_legacy + ignore_errors: true + retries: "{{ aws_nuke_legacy_retries | default(0) }}" + until: _awsnuke_legacy is succeeded + changed_when: >- + 'No resource to delete.' not in _awsnuke_legacy.stdout_lines + no_log: true + async: 1800 + poll: 30 + delay: 30 + - debug: + var: >- + _awsnuke_legacy + | dict2items + | selectattr('key', 'ne', 'stdout') + | selectattr('key', 'ne', 'stderr') + | items2dict + when: run_aws_nuke_legacy | default(false) | bool - name: Report aws-nuke error fail: @@ -125,3 +155,13 @@ when: - _awsnuke is failed - _awsnuke2 is failed or _awsnuke2 is skipped + + - name: Report aws-nuke-legacy error + fail: + msg: aws-nuke-legacy failed + when: _awsnuke_legacy is failed + + - name: Report aws-nuke-legacy deleted resource(s) + fail: + msg: aws-nuke-legacy deleted resource(s). That should be investigated and reported. + when: _awsnuke_legacy is changed diff --git a/playbooks/roles/infra-aws-sandbox/templates/nuke-config-legacy.yml.j2 b/playbooks/roles/infra-aws-sandbox/templates/nuke-config-legacy.yml.j2 new file mode 100644 index 00000000..379329ca --- /dev/null +++ b/playbooks/roles/infra-aws-sandbox/templates/nuke-config-legacy.yml.j2 @@ -0,0 +1,21 @@ +--- +regions: {{ ( all_regions + ['global'] ) | to_json }} + +account-blocklist: {{ aws_nuke_account_blacklist | to_json }} + +feature-flags: + disable-deletion-protection: + RDSInstance: true + EC2Instance: true + CloudformationStack: true + ELBv2: true + QLDBLedger: true + +accounts: + "{{ account_id }}": + filters: {{ aws_nuke_filters_default | combine(aws_nuke_filters) | to_json }} + +resource-types: + excludes: + # don't nuke OpenSearch Packages, see https://github.com/rebuy-de/aws-nuke/issues/1123 + - OSPackage diff --git a/playbooks/roles/infra-aws-sandbox/templates/nuke-config.yml.j2 b/playbooks/roles/infra-aws-sandbox/templates/nuke-config.yml.j2 index 379329ca..1f8f8f9d 100644 --- a/playbooks/roles/infra-aws-sandbox/templates/nuke-config.yml.j2 +++ b/playbooks/roles/infra-aws-sandbox/templates/nuke-config.yml.j2 @@ -1,15 +1,24 @@ --- regions: {{ ( all_regions + ['global'] ) | to_json }} -account-blocklist: {{ aws_nuke_account_blacklist | to_json }} +blocklist: {{ aws_nuke_account_blacklist | to_json }} -feature-flags: - disable-deletion-protection: - RDSInstance: true - EC2Instance: true - CloudformationStack: true - ELBv2: true - QLDBLedger: true +settings: + EC2Image: + IncludeDisabled: true + IncludeDeprecated: true + DisableDeregistrationProtection: true + EC2Instance: + DisableDeletionProtection: true + DisableStopProtection: true + RDSInstance: + DisableDeletionProtection: true + CloudformationStack: + DisableDeletionProtection: true + ELBv2: + DisableDeletionProtection: true + QLDBLedger: + DisableDeletionProtection: true accounts: "{{ account_id }}": @@ -18,4 +27,24 @@ accounts: resource-types: excludes: # don't nuke OpenSearch Packages, see https://github.com/rebuy-de/aws-nuke/issues/1123 + - AmazonML + - Cloud9Environment + - CloudSearchDomain + - CodeStarProject + - FMSNotificationChannel + - FMSPolicy + - MachineLearningBranchPrediction + - MachineLearningDataSource + - MachineLearningEvaluation + - MachineLearningMLModel - OSPackage + - OpsWorksApp + - OpsWorksCMBackup + - OpsWorksCMServer + - OpsWorksCMServerState + - OpsWorksInstance + - OpsWorksLayer + - OpsWorksUserProfile + - RoboMakerRobotApplication + - RoboMakerSimulationApplication + - RoboMakerSimulationJob diff --git a/tests/001.hurl b/tests/001.hurl index 45ff45b6..9ed78663 100644 --- a/tests/001.hurl +++ b/tests/001.hurl @@ -240,7 +240,7 @@ jsonpath "$.reservation.status" == "updating" GET {{host}}/api/v1/reservations/summit Authorization: Bearer {{access_token}} [Options] -retry: 15 +retry: 25 HTTP 200 [Asserts] jsonpath "$.reservation.status" == "success" diff --git a/tools/sandbox_mark_for_cleanup.hurl b/tools/sandbox_mark_for_cleanup.hurl index e5223995..58ba5c25 100644 --- a/tools/sandbox_mark_for_cleanup.hurl +++ b/tools/sandbox_mark_for_cleanup.hurl @@ -1,3 +1,16 @@ +################################################################################# +# Get an access token using the login token +################################################################################# + +GET {{host}}/api/v1/login +Authorization: Bearer {{login_token}} +HTTP 200 +[Captures] +access_token: jsonpath "$.access_token" +[Asserts] +jsonpath "$.access_token" isString +jsonpath "$.access_token_exp" isString + ################################################################################# # Stop a placement ################################################################################# From 0fee1103a5197140c2187c6f014d136abfd26546 Mon Sep 17 00:00:00 2001 From: Alberto Gonzalez Date: Sat, 2 Nov 2024 07:12:13 +0100 Subject: [PATCH 13/13] Create RoleBinding for Keycloak user to the namespace --- internal/models/ocp_sandbox.go | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/internal/models/ocp_sandbox.go b/internal/models/ocp_sandbox.go index d82c3e0a..fcbe02ee 100644 --- a/internal/models/ocp_sandbox.go +++ b/internal/models/ocp_sandbox.go @@ -1367,6 +1367,40 @@ func (a *OcpSandboxProvider) Request(serviceUuid string, cloud_selector map[stri Username: userAccountName, Password: password, }) + + // Create RoleBind for the Service Account in the Namespace + _, err = clientset.RbacV1().RoleBindings(namespaceName).Create(context.TODO(), &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: userAccountName, + Labels: map[string]string{ + "serviceUuid": serviceUuid, + "guid": annotations["guid"], + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "ClusterRole", + Name: "admin", + }, + Subjects: []rbacv1.Subject{ + { + Kind: "User", + Name: userAccountName, + Namespace: namespaceName, + }, + }, + }, metav1.CreateOptions{}) + + if err != nil { + log.Logger.Error("Error creating OCP RoleBind", "error", err) + if err := clientset.CoreV1().Namespaces().Delete(context.TODO(), namespaceName, metav1.DeleteOptions{}); err != nil { + log.Logger.Error("Error cleaning up the namespace", "error", err) + } + rnew.SetStatus("error") + return + } + + } // Assign ClusterRole sandbox-hcp (created with gitops) to the SA if hcp option was selected