Skip to content

Commit

Permalink
Modified the zombie cluster resurce
Browse files Browse the repository at this point in the history
  • Loading branch information
athiruma committed Oct 8, 2024
1 parent 21c5e34 commit e590893
Show file tree
Hide file tree
Showing 24 changed files with 491 additions and 1,140 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ This tool support the following policies:
* [instance_run](cloud_governance/policy/aws/cleanup/instance_run.py): List the running ec2 instances.
* [unattached_volume](cloud_governance/policy/aws/cleanup/unattached_volume.py): Identify and remove the available EBS
volumes.
* [zombie_cluster_resource](cloud_governance/policy/aws/zombie_cluster_resource.py): Identify the non-live cluster
* [zombie_cluster_resource](cloud_governance/policy/aws/zombie_cluster_resource_test.py): Identify the non-live cluster
resource and delete those resources by resolving dependency. We are deleting more than 20 cluster resources.
* Ebs, Snapshots, AMI, Load Balancer
* VPC, Subnets, Route tables, DHCP, Internet Gateway, NatGateway, Network Interface, ElasticIp, Network ACL,
Expand Down
24 changes: 21 additions & 3 deletions cloud_governance/common/clouds/aws/ec2/ec2_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,9 +624,12 @@ def get_active_instances(self, tag_name: str, tag_value: str, skip_full_scan: bo
active_instances = {}
active_regions = self.get_active_regions()
for region_name in active_regions[::-1]:
filters = [{'Name': f'tag:{tag_name}', 'Values': [tag_value, tag_value.upper(), tag_value.lower(), tag_value.title()]}]
filters = [{'Name': f'tag:{tag_name}',
'Values': [tag_value, tag_value.upper(), tag_value.lower(), tag_value.title()]}]
self.get_ec2_instance_list()
active_instances_in_region = self.get_ec2_instance_list(Filters=filters, ec2_client=boto3.client('ec2', region_name=region_name), ignore_tag=ignore_tag)
active_instances_in_region = self.get_ec2_instance_list(Filters=filters, ec2_client=boto3.client('ec2',
region_name=region_name),
ignore_tag=ignore_tag)
if active_instances_in_region:
if skip_full_scan:
return True
Expand All @@ -642,7 +645,8 @@ def verify_active_instances(self, tag_name: str, tag_value: str):
:return:
"""
ignore_tag = 'TicketId'
return self.get_active_instances(tag_name=tag_name, tag_value=tag_value, skip_full_scan=True, ignore_tag=ignore_tag)
return self.get_active_instances(tag_name=tag_name, tag_value=tag_value, skip_full_scan=True,
ignore_tag=ignore_tag)

def describe_tags(self, **kwargs):
"""
Expand All @@ -665,3 +669,17 @@ def get_running_instance(self):
:rtype:
"""
return self.get_ec2_instance_list(Filters=[{'Name': 'instance-state-name', 'Values': ['running']}])

# Delete Operations

def delete_volumes(self, resource_ids: list) -> bool:
"""
This method delete volume
:param resource_ids:
:return:
"""
try:
[self.ec2_client.delete_volume(VolumeId=resource_id) for resource_id in resource_ids]
return True
except Exception as err:
raise err
11 changes: 11 additions & 0 deletions cloud_governance/common/utils/policy_response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class PolicyResponse:

def __init__(self, deleted, **kwargs):
self.deleted = deleted
self.message = "running on dry_run=True mode, no resource will get harm"
self.error = "No Error"
for key, value in kwargs.items():
setattr(self, key, value)

def set_value(self, key, value):
setattr(self, key, value)
23 changes: 7 additions & 16 deletions cloud_governance/main/environment_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def load_from_yaml(self, ):
if isinstance(yaml_data, dict):
for key, value in yaml_data.items():
if key not in os.environ: # Prefer existing env variables
setattr(self, key, value)
os.environ[key] = str(value)
except FileNotFoundError:
pass
Expand All @@ -60,24 +61,14 @@ def __init__(self):

self.load_from_env()
self.load_from_yaml()
if not hasattr(self, "CLUSTER_PREFIX"):
self.CLUSTER_PREFIX = "kubernetes.io/cluster"

# env files override true ENV. Not best order, but easier to write :/
# .env.generated can be auto-generated (by an external tool) based on the local cluster's configuration.
for env in ".env", ".env.generated":
try:
file_path = os.path.join(os.path.dirname(__file__), env)
with open(file_path) as f:
for line in f.readlines():
key, found, value = line.strip().partition("=")
if not found:
print("ERROR: invalid line in {env}: {line.strip()}")
continue
if key in os.environ:
continue # prefer env to env file
os.environ[key] = value
if not hasattr(self, "DAYS_TO_TAKE_ACTION"):
self.DAYS_TO_TAKE_ACTION = 7

except FileNotFoundError:
pass # ignore
if not hasattr(self, "FORCE_DELETE"):
self.FORCE_DELETE = False

##################################################################################################
# dynamic parameters - configure for local run
Expand Down
39 changes: 14 additions & 25 deletions cloud_governance/main/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,22 @@
from cloud_governance.common.logger.logger_time_stamp import logger_time_stamp, logger
from cloud_governance.policy.policy_operations.aws.tag_cluster.run_tag_cluster_resouces import tag_cluster_resource, \
remove_cluster_resources_tags
from cloud_governance.policy.policy_operations.aws.tag_non_cluster.run_tag_non_cluster_resources import tag_non_cluster_resource, \
from cloud_governance.policy.policy_operations.aws.tag_non_cluster.run_tag_non_cluster_resources import \
tag_non_cluster_resource, \
remove_tag_non_cluster_resource, tag_na_resources
from cloud_governance.policy.policy_operations.aws.tag_user.run_tag_iam_user import tag_iam_user, run_validate_iam_user_tags
from cloud_governance.policy.policy_operations.aws.zombie_cluster.run_zombie_cluster_resources import zombie_cluster_resource
from cloud_governance.policy.policy_operations.aws.tag_user.run_tag_iam_user import tag_iam_user, \
run_validate_iam_user_tags
from cloud_governance.policy.policy_operations.aws.zombie_cluster.run_zombie_cluster_resources import \
zombie_cluster_resource
from cloud_governance.policy.policy_operations.gcp.gcp_policy_runner import GcpPolicyRunner
from cloud_governance.policy.policy_operations.gitleaks.gitleaks import GitLeaks
from cloud_governance.policy.policy_operations.ibm.ibm_operations.ibm_policy_runner import IBMPolicyRunner
from cloud_governance.main.environment_variables import environment_variables
from cloud_governance.main.es_uploader import ESUploader
from cloud_governance.common.clouds.aws.s3.s3_operations import S3Operations
from cloud_governance.policy.policy_operations.aws.zombie_cluster.validate_zombies import ValidateZombies
from cloud_governance.policy.policy_operations.aws.zombie_non_cluster.zombie_non_cluster_polices import ZombieNonClusterPolicies

from cloud_governance.policy.policy_operations.aws.zombie_non_cluster.zombie_non_cluster_polices import \
ZombieNonClusterPolicies

environment_variables_dict = environment_variables.environment_variables_dict
log_level = environment_variables_dict.get('log_level', 'INFO').upper()
Expand Down Expand Up @@ -110,23 +113,6 @@ def run_policy(account: str, policy: str, region: str, dry_run: str):
remove_keys = literal_eval(remove_keys)
tag_iam_user(user_tag_operation=user_tag_operation, file_name=file_name, remove_keys=remove_keys,
username=username)
elif policy == 'zombie_cluster_resource':
policy_output = environment_variables_dict.get('policy_output', '')
resource = environment_variables_dict.get('resource', '')
resource_name = environment_variables_dict.get('resource_name', '')
cluster_tag = environment_variables_dict.get('cluster_tag', '')
service_type = environment_variables_dict.get('service_type', '')
if dry_run == 'no': # delete
zombie_result = zombie_cluster_resource(delete=True, region=region, resource=resource,
cluster_tag=cluster_tag, resource_name=resource_name,
service_type=service_type)
else: # default: yes or other
zombie_result = zombie_cluster_resource(region=region, resource=resource, cluster_tag=cluster_tag,
resource_name=resource_name, service_type=service_type)
if policy_output:
s3operations = S3Operations(region_name=region)
logger.info(s3operations.save_results_to_s3(policy=policy.replace('_', '-'), policy_output=policy_output,
policy_result=zombie_result))
elif policy == 'tag_non_cluster':
# instance_name = environment_variables_dict['resource_name']
mandatory_tags = environment_variables_dict.get('mandatory_tags', {})
Expand Down Expand Up @@ -224,7 +210,8 @@ def main():
ibm_classic_infrastructure_policy_runner = None
is_tag_ibm_classic_infrastructure_runner = policy in environment_variables_dict.get('ibm_policies')
if not is_tag_ibm_classic_infrastructure_runner:
if environment_variables_dict.get('PUBLIC_CLOUD_NAME') and environment_variables_dict.get('PUBLIC_CLOUD_NAME').upper() == 'IBM':
if environment_variables_dict.get('PUBLIC_CLOUD_NAME') and environment_variables_dict.get(
'PUBLIC_CLOUD_NAME').upper() == 'IBM':
is_tag_ibm_classic_infrastructure_runner = policy in environment_variables_dict.get('cost_policies')
if is_tag_ibm_classic_infrastructure_runner:
ibm_classic_infrastructure_policy_runner = IBMPolicyRunner()
Expand All @@ -237,14 +224,16 @@ def main():
cost_explorer_policies_runner = CostReportPolicies()

is_azure_policy_runner = ''
if environment_variables_dict.get('PUBLIC_CLOUD_NAME') and environment_variables_dict.get('PUBLIC_CLOUD_NAME').upper() == 'AZURE':
if environment_variables_dict.get('PUBLIC_CLOUD_NAME') and environment_variables_dict.get(
'PUBLIC_CLOUD_NAME').upper() == 'AZURE':
azure_cost_policy_runner = None
is_azure_policy_runner = policy in environment_variables_dict.get('cost_policies')
if is_azure_policy_runner:
azure_cost_policy_runner = AzurePolicyRunner()

is_gcp_policy_runner = ''
if environment_variables_dict.get('PUBLIC_CLOUD_NAME') and environment_variables_dict.get('PUBLIC_CLOUD_NAME').upper() == 'GCP':
if environment_variables_dict.get('PUBLIC_CLOUD_NAME') and environment_variables_dict.get(
'PUBLIC_CLOUD_NAME').upper() == 'GCP':
gcp_cost_policy_runner = None
is_gcp_policy_runner = policy in environment_variables_dict.get('cost_policies')
if is_gcp_policy_runner:
Expand Down
2 changes: 1 addition & 1 deletion cloud_governance/main/main_oerations/main_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def run(self):
if self._policy in policies and self._policy in ["instance_run", "unattached_volume", "cluster_run",
"ip_unattached", "unused_nat_gateway", "instance_idle",
"zombie_snapshots", "database_idle", "s3_inactive",
"empty_roles"]:
"empty_roles", "zombie_cluster_resource"]:
source = policy_type
if Utils.equal_ignore_case(policy_type, self._public_cloud_name):
source = ''
Expand Down
112 changes: 112 additions & 0 deletions cloud_governance/policy/aws/cleanup/zombie_cluster_resource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
from abc import ABC

from cloud_governance.policy.helpers.aws.policy.zombie_cluster_operations import ZombieClusterOperations


class ZombieClusterResource(ZombieClusterOperations, ABC):

def __init__(self):
super().__init__()
self.zombie_cluster_resource_name = self._environment_variables_dict.get('ZOMBIE_CLUSTER_RESOURCE_NAME')

def get_zombie_cluster_resources(self, resource_list: list, resource_key_id: str,
zombie_cluster_id: str, create_date: str, tags_name: str = 'Tags',
resource_type: str = 'ec2_service'):
"""
This method returns the zombie cluster resources for a given resource type.
:param create_date:
:param tags_name:
:param zombie_cluster_id:
:param resource_list:
:param resource_key_id:
:param resource_type:
:return:
"""
cluster_resources = self._get_cluster_resources(resources_list=resource_list,
zombie_cluster_id=zombie_cluster_id, tags_name=tags_name)
zombie_cluster_resources = self.get_zombie_resources(cluster_resources=cluster_resources.copy())
zombie_cluster_resources = self.process_and_delete_resources(
zombie_cluster_resources=zombie_cluster_resources.copy(),
resource_id_key=resource_key_id,
resource_type=resource_type,
tags_name=tags_name,
create_date=create_date)
return zombie_cluster_resources

def zombie_cluster_volume(self, zombie_cluster_id: str = None):
"""
This method returns the list of zombie cluster volumes, and delete them once they reach the delete days
:param zombie_cluster_id:
:return:
"""
volume_id_key = 'VolumeId'
volume_filters = [{'Name': 'status', 'Values': ['available']}]
if zombie_cluster_id:
volume_filters.append([{'Name': f'tag:{zombie_cluster_id}', 'Values': 'owned'}])

available_volumes = self._ec2_operations.get_volumes(Filters=volume_filters)
return self.get_zombie_cluster_resources(resource_list=available_volumes,
resource_key_id=volume_id_key,
zombie_cluster_id=zombie_cluster_id,
create_date='CreateTime')

def zombie_cluster_snapshot(self, zombie_cluster_id: str = None):
"""
This method returns list of zombie cluster's snapshot according to cluster tag name and cluster name data
:param zombie_cluster_id:
:return:
"""
snapshot_id_key = 'SnapshotId'
snapshot_filters = []
if zombie_cluster_id:
snapshot_filters.append([{'Name': f'tag:{zombie_cluster_id}', 'Values': 'owned'}])
snapshots_data = self._ec2_operations.get_snapshots(Filters=snapshot_filters)
return self.get_zombie_cluster_resources(resource_list=snapshots_data,
resource_key_id=snapshot_id_key,
zombie_cluster_id=zombie_cluster_id,
create_date='StartTime'
)

def zombie_cluster_ami(self, zombie_cluster_id: str = None):
"""
This method returns list of cluster's ami according to cluster tag name and cluster name data
"""
image_id_key = 'ImageId'
image_filters = []
if zombie_cluster_id:
image_filters.append([{'Name': f'tag:{zombie_cluster_id}', 'Values': 'owned'}])
images_data = self._ec2_operations.get_images(Filters=image_filters)
return self.get_zombie_cluster_resources(resource_list=images_data,
resource_key_id=image_id_key,
zombie_cluster_id=zombie_cluster_id,
create_date='CreationDate')

def run_zombie_cluster_pruner(self):
"""
This method run zombie cluster pruner operations
:return:
"""
if self.zombie_cluster_resource_name:
zombie_clusters_resources = [self.zombie_cluster_resource_name]
else:
zombie_clusters_resources = [
self.zombie_cluster_volume,
self.zombie_cluster_snapshot,
self.zombie_cluster_ami
]
zombie_cluster_response = {}
for zombie_cluster in zombie_clusters_resources:
zombie_items = zombie_cluster()
for zombie_tag, zombie_resources in zombie_items.items():
if zombie_tag in zombie_cluster_response:
zombie_cluster_response[zombie_tag]['ResourceIds'].extend(zombie_resources['ResourceIds'])
resource_names = list(set(zombie_cluster_response[zombie_tag]['ResourceNames']))
resource_names.append(zombie_cluster.__name__)
zombie_cluster_response[zombie_tag]['ResourceNames'] = list(resource_names)
else:
zombie_cluster_response.setdefault(zombie_tag, {}).update(zombie_resources)
zombie_cluster_response[zombie_tag].setdefault('ResourceNames', []).append(zombie_cluster.__name__)
return zombie_cluster_response

def run_policy_operations(self):
return self.run_zombie_cluster_pruner()
Loading

0 comments on commit e590893

Please sign in to comment.