Skip to content

Commit

Permalink
feat: 清理过期的任务统计信息 TencentBlueKing#7668
Browse files Browse the repository at this point in the history
  • Loading branch information
guohelu committed Jan 15, 2025
1 parent 21bbf35 commit 1ac5d15
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 1 deletion.
6 changes: 6 additions & 0 deletions config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,3 +883,9 @@ def check_engine_admin_permission(request, *args, **kwargs):
if "BKAPP_SOPS_BROKER_URL" in os.environ:
BROKER_URL = os.getenv("BKAPP_SOPS_BROKER_URL")
print(f"BROKER_URL: {BROKER_URL}")

# 统计信息清理配置
ENABLE_CLEAN_EXPIRED_STATISTICS = env.ENABLE_CLEAN_EXPIRED_STATISTICS
STATISTICS_VALIDITY_DAY = env.STATISTICS_VALIDITY_DAY
CLEAN_EXPIRED_STATISTICS_BATCH_NUM = env.CLEAN_EXPIRED_STATISTICS_BATCH_NUM
CLEAN_EXPIRED_STATISTICS_CRON = env.CLEAN_EXPIRED_STATISTICS_CRON
6 changes: 6 additions & 0 deletions env.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,9 @@
# bk_audit
BK_AUDIT_ENDPOINT = os.getenv("BK_AUDIT_ENDPOINT", None)
BK_AUDIT_DATA_TOKEN = os.getenv("BK_AUDIT_DATA_TOKEN", None)

# 统计信息清理配置
ENABLE_CLEAN_EXPIRED_STATISTICS = bool(os.getenv("BKAPP_ENABLE_CLEAN_EXPIRED_STATISTICS", False))
STATISTICS_VALIDITY_DAY = int(os.getenv("BKAPP_STATISTICS_VALIDITY_DAY", 730))
CLEAN_EXPIRED_STATISTICS_BATCH_NUM = int(os.getenv("BKAPP_CLEAN_EXPIRED_STATISTICS_BATCH_NUM", 100))
CLEAN_EXPIRED_STATISTICS_CRON = tuple(os.getenv("BKAPP_CLEAN_EXPIRED_STATISTICS_CRON", "30 0 * * *").split())
15 changes: 15 additions & 0 deletions gcloud/contrib/cleaner/pipeline/bamboo_engine_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from django.db.models import QuerySet

from gcloud.taskflow3.models import AutoRetryNodeStrategy, TimeoutNodeConfig
from gcloud.analysis_statistics.models import TaskflowStatistics, TaskflowExecutedNodeStatistics
from pipeline.contrib.periodic_task.models import PeriodicTaskHistory
from pipeline.eri.models import (
ContextValue,
Expand Down Expand Up @@ -84,3 +85,17 @@ def get_clean_pipeline_instance_data(instance_ids: List[str]) -> Dict[str, Query
"periodic_task_history": periodic_task_history,
"pipeline_instances": pipeline_instances,
}


def get_clean_statistics_data(expire_time):
"""
根据过期时间获取过期的统计记录
:param expire_time 过期时间
"""
taskflow_ids = TaskflowStatistics.objects.filter(create_time__lt=expire_time).values_list("id", flat=True)
taskflow_node_ids = TaskflowExecutedNodeStatistics.objects.filter(instance_create_time__lt=expire_time).values_list(
"id", flat=True
)
statistics_data = {TaskflowStatistics: taskflow_ids, TaskflowExecutedNodeStatistics: taskflow_node_ids}

return statistics_data
35 changes: 34 additions & 1 deletion gcloud/contrib/cleaner/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
from django.db import transaction
from django.utils import timezone

from gcloud.contrib.cleaner.pipeline.bamboo_engine_tasks import get_clean_pipeline_instance_data
from gcloud.contrib.cleaner.pipeline.bamboo_engine_tasks import (
get_clean_pipeline_instance_data,
get_clean_statistics_data,
)
from gcloud.contrib.cleaner.signals import pre_delete_pipeline_instance_data
from gcloud.taskflow3.models import TaskFlowInstance
from gcloud.utils.decorators import time_record
Expand Down Expand Up @@ -69,3 +72,33 @@ def clean_expired_v2_task_data():
logger.info(f"[clean_expired_v2_task_data] success clean tasks: {task_ids}")
except Exception as e:
logger.exception(f"[clean_expired_v2_task_data] error: {e}")


@periodic_task(
run_every=(crontab(*settings.CLEAN_EXPIRED_STATISTICS_CRON)), ignore_result=True, queue="task_data_clean"
)
@time_record(logger)
def clear_statistics_info():
"""
清除过期的统计信息
"""
if not settings.ENABLE_CLEAN_EXPIRED_STATISTICS:
logger.info("Skip clean expired statistics data")
return

logger.info("Start clean expired statistics data...")
try:
validity_day = settings.STATISTICS_VALIDITY_DAY
expire_time = timezone.now() - timezone.timedelta(days=validity_day)
batch_num = settings.CLEAN_EXPIRED_STATISTICS_BATCH_NUM

data_to_clean = get_clean_statistics_data(expire_time)

for model, ids in data_to_clean.items():
ids_to_delete = ids[:batch_num]
if ids_to_delete:
model.objects.filter(id__in=ids_to_delete).delete()
logger.info(f"[clear_statistics_info] clean model: {model}, deleted ids: {list(ids_to_delete)}")
logger.info("[clear_statistics_info] success clean statistics")
except Exception as e:
logger.error(f"Failed to clear expired statistics data: {e}")

0 comments on commit 1ac5d15

Please sign in to comment.