Skip to content

Audit and cleanup consistencies #372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 145 additions & 4 deletions tools/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
# Author: Komal Thareja ([email protected])
import argparse
import logging
import os
import re
import traceback
from datetime import datetime, timezone, timedelta
from logging.handlers import RotatingFileHandler
Expand All @@ -50,10 +52,15 @@ class MainClass:
- Remove/Delete slices older than specified number of days
- Remove/Delete dangling network services which connect the ports to deleted/closed VMs
"""
def __init__(self, config_file: str):
def __init__(self, config_file: str, am_config_file: str):
self.am_config_dict = None
with open(config_file) as f:
config_dict = yaml.safe_load(f)

if am_config_file is not None and os.path.exists(am_config_file):
with open(am_config_file) as f:
self.am_config_dict = yaml.safe_load(f)

# Load the config file
self.log_config = config_dict[Constants.CONFIG_LOGGING_SECTION]

Expand Down Expand Up @@ -215,6 +222,132 @@ def delete_dead_closing_slice(self, *, days: int):
self.logger.error(f"Failed to delete slice: {s.get_slice_id()}: e: {e}")
self.logger.error(traceback.format_exc())

def execute_ansible(self, *, inventory_path: str, playbook_path: str, extra_vars: dict,
ansible_python_interpreter: str, sources: str = None, private_key_file: str = None,
host_vars: dict = None, host: str = None, user: str = None):
from fabric_am.util.ansible_helper import AnsibleHelper
ansible_helper = AnsibleHelper(inventory_path=inventory_path, logger=self.logger,
ansible_python_interpreter=ansible_python_interpreter,
sources=sources)

ansible_helper.set_extra_vars(extra_vars=extra_vars)

if host is not None and host_vars is not None and len(host_vars) > 0:
for key, value in host_vars.items():
ansible_helper.add_vars(host=host, var_name=key, value=value)

self.logger.info(f"Executing playbook {playbook_path} extra_vars: {extra_vars} host_vars: {host_vars}")
ansible_helper.run_playbook(playbook_path=playbook_path, private_key_file=private_key_file, user=user)
return ansible_helper.get_result_callback()

def clean_sliver_inconsistencies(self):
try:
actor_type = self.actor_config[Constants.TYPE]
if actor_type.lower() != ActorType.Authority.name.lower() or self.am_config_dict is None:
return

from fabric_am.util.am_constants import AmConstants
pb_section = self.am_config_dict.get(AmConstants.PLAYBOOK_SECTION)
if pb_section is None:
return
inventory_location = pb_section.get(AmConstants.PB_INVENTORY)
pb_dir = pb_section.get(AmConstants.PB_LOCATION)
vm_playbook_name = pb_section.get("VM")
if inventory_location is None or pb_dir is None or vm_playbook_name is None:
return

vm_playbook_path = f"{pb_dir}/{vm_playbook_name}"

ansible_python_interpreter = None
ansible_section = self.am_config_dict.get(AmConstants.ANSIBLE_SECTION)
if ansible_section:
ansible_python_interpreter = ansible_section.get(AmConstants.ANSIBLE_PYTHON_INTERPRETER)

actor_db = ActorDatabase(user=self.database_config[Constants.PROPERTY_CONF_DB_USER],
password=self.database_config[Constants.PROPERTY_CONF_DB_PASSWORD],
database=self.database_config[Constants.PROPERTY_CONF_DB_NAME],
db_host=self.database_config[Constants.PROPERTY_CONF_DB_HOST],
logger=self.logger)

states = [ReservationStates.Active.value,
ReservationStates.ActiveTicketed.value,
ReservationStates.Ticketed.value,
ReservationStates.Nascent.value]

resource_type = ["VM"]

# Get the Active Slivers from CF
slivers = actor_db.get_reservations(states=states, rsv_type=resource_type)
cf_active_sliver_ids = []
if slivers:
for s in slivers:
cf_active_sliver_ids.append(str(s.get_reservation_id()))

self.logger.info(f"Active Slivers: {cf_active_sliver_ids}")

# Get the VMs from Openstack
result_callback_1 = self.execute_ansible(inventory_path=inventory_location,
playbook_path=vm_playbook_path,
extra_vars={"operation": "list"},
ansible_python_interpreter=ansible_python_interpreter)
result_1 = result_callback_1.get_json_result_ok()

os_vms = {}
if result_1 and result_1.get('openstack_servers'):
servers = result_1.get('openstack_servers')
for s in servers:
if s.get('OS-EXT-SRV-ATTR:instance_name') and s.get('name'):
os_vms[s.get('OS-EXT-SRV-ATTR:instance_name')] = s.get('name')

uuid_pattern = r'([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})'

# Cleanup inconsistencies between CF and Open Stack
if len(cf_active_sliver_ids):
for instance, vm_name in os_vms.items():
try:
# Search for UUID in the input string
match = re.search(uuid_pattern, vm_name)

# Extract UUID if found
if match:
sliver_id = match.group(1)
if sliver_id not in cf_active_sliver_ids:
result_2 = self.execute_ansible(inventory_path=inventory_location,
playbook_path=vm_playbook_path,
extra_vars={"operation": "delete", "vmname": vm_name},
ansible_python_interpreter=ansible_python_interpreter)
self.logger.info(f"Deleted instance: {vm_name}; result: {result_2.get_json_result_ok()}")
else:
self.logger.error(f"Sliver Id not found in the input string: {vm_name}")
except Exception as e:
self.logger.error(f"Failed to cleanup CF and openstack inconsistencies instance: {instance} vm: {vm_name}: {e}")
self.logger.error(traceback.format_exc())

# Cleanup inconsistencies between Open Stack and Virsh
result_3 = self.execute_ansible(inventory_path=inventory_location,
playbook_path=f"{pb_dir}/worker_libvirt_operations.yml",
extra_vars={"operation": "listall"},
ansible_python_interpreter=ansible_python_interpreter)

for host, ok_result in result_3.host_ok.items():
try:
if ok_result and ok_result._result:
virsh_vms = ok_result._result.get('stdout_lines', [])
self.logger.info(f"Host: {host} has VMs: {virsh_vms}")
for instance in virsh_vms:
if instance not in os_vms:
results_4 = self.execute_ansible(inventory_path=inventory_location,
playbook_path=vm_playbook_path,
extra_vars={"operation": "delete", "host": str(host)},
ansible_python_interpreter=ansible_python_interpreter)
self.logger.info(f"Deleted instance: {instance}; result: {results_4.get_json_result_ok()}")
except Exception as e:
self.logger.error(f"Failed to cleanup openstack and virsh inconsistencies on {host}: {e}")
self.logger.error(traceback.format_exc())
except Exception as e:
self.logger.error(f"Failed to cleanup inconsistencies: {e}")
self.logger.error(traceback.format_exc())

def handle_command(self, args):
"""
Command Handler
Expand All @@ -230,8 +363,15 @@ def handle_command(self, args):
# Slivers
elif args.command == "slivers":
# Close operation
if args.operation is not None and args.operation == "close":
self.delete_dangling_network_slivers()
if args.operation is not None and args.operation == "cleanup":
self.clean_sliver_inconsistencies()
else:
print(f"Unsupported operation: {args.operation}")
elif args.command == "audit":
# Close operation
if args.operation is not None and args.operation == "audit":
self.delete_dead_closing_slice(days=args.days)
self.clean_sliver_inconsistencies()
else:
print(f"Unsupported operation: {args.operation}")
else:
Expand All @@ -240,12 +380,13 @@ def handle_command(self, args):

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-a", dest='amconfig', required=True, type=str)
parser.add_argument("-f", dest='config', required=True, type=str)
parser.add_argument("-d", dest='days', required=False, type=int, default=30)
parser.add_argument("-c", dest='command', required=True, type=str)
parser.add_argument("-o", dest='operation', required=True, type=str)
args = parser.parse_args()

mc = MainClass(config_file=args.config)
mc = MainClass(config_file=args.config, am_config_file=args.amconfig)
mc.handle_command(args)

3 changes: 2 additions & 1 deletion tools/install.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/sh

echo "0 2 * * * root /usr/local/bin/python3.11 /usr/src/app/audit.py -f /etc/fabric/actor/config/config.yaml -d 30 -c slices -o remove" >> /etc/crontab
echo "0 * * * * root /usr/local/bin/python3.11 /usr/src/app/audit.py -f /etc/fabric/actor/config/config.yaml -a /etc/fabric/actor/config/vm_handler_config.yml -d 30 -c audit -o audit" >> /etc/crontab
#echo "0 2 * * * root /usr/local/bin/python3.11 /usr/src/app/audit.py -f /etc/fabric/actor/config/config.yaml -d 30 -c slices -o remove" >> /etc/crontab
#echo "*/15 * * * * root /usr/local/bin/python3.11 /usr/src/app/audit.py -f /etc/fabric/actor/config/config.yaml -c slivers -o close" >> /etc/crontab
service cron reload
service cron restart