forked from TencentBlueKing/bk-nodeman
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: 检查Agent、bkmonitorbeat异常状态并发送邮件告知运维 (closed TencentBlueKing#2512)
- Loading branch information
Showing
4 changed files
with
128 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
116 changes: 116 additions & 0 deletions
116
apps/node_man/periodic_tasks/send_mail_to_maintainer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-节点管理(BlueKing-BK-NODEMAN) available. | ||
Copyright (C) 2017-2022 THL A29 Limited, a Tencent company. All rights reserved. | ||
Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at https://opensource.org/licenses/MIT | ||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on | ||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations under the License. | ||
""" | ||
import hashlib | ||
import time | ||
from collections import defaultdict | ||
from typing import Any, Dict, List, Set | ||
from uuid import uuid4 | ||
|
||
import requests | ||
from celery.schedules import crontab | ||
from celery.task import periodic_task | ||
from django.conf import settings | ||
from django.db.models import QuerySet | ||
|
||
from apps.node_man import constants, models | ||
from common.api import CCApi | ||
from common.log import logger | ||
|
||
|
||
def send_mail_to_maintainer(task_id): | ||
if hasattr(settings, "TAIHU_TOKEN"): | ||
return | ||
logger.info(f"start send_mail_to_maintainer, task_id -> {task_id}") | ||
# 异常Agent HostID | ||
terminated_agent: Set[int] = models.ProcessStatus.objects.filter( | ||
status=constants.ProcStateType.TERMINATED, name=models.ProcessStatus.GSE_AGENT_PROCESS_NAME | ||
).values_list("bk_host_id", flat=True) | ||
# 异常bkmonitorbeat HostID | ||
terminated_plugin: Set[int] = models.ProcessStatus.objects.filter( | ||
status=constants.ProcStateType.TERMINATED, name="bkmonitorbeat" | ||
).values_list("bk_host_id", flat=True) | ||
query_kwargs = {"fields": ["bk_biz_id", "bk_biz_name", "bk_biz_maintainer"]} | ||
try: | ||
biz_infos: List[Dict[str, Any]] = CCApi.search_business(query_kwargs)["info"] | ||
# 去除业务运维为空的数据 | ||
biz_infos: List[Dict[str, Any]] = [biz_info for biz_info in biz_infos if biz_info["bk_biz_maintainer"]] | ||
# 构建成业务ID映射业务信息字典 | ||
biz_id_biz_info_map: Dict[int, Dict[str, Any]] = {biz_info["bk_biz_id"]: biz_info for biz_info in biz_infos} | ||
|
||
except Exception as e: | ||
logger.exception(f"get business info error: {str(e)}") | ||
return | ||
terminated_agent_qs: QuerySet = models.Host.objects.filter(bk_host_id__in=terminated_agent).values( | ||
"bk_biz_id", "inner_ip" | ||
) | ||
terminated_plugin_qs: QuerySet = models.Host.objects.filter(bk_host_id__in=terminated_plugin).values( | ||
"bk_biz_id", "inner_ip" | ||
) | ||
# 异常Agent,bkmonitorbeat以业务分组 | ||
terminated_agent_ips_gby_biz_id: Dict[int, List[str]] = defaultdict(list) | ||
terminated_plugin_ips_gby_biz_id: Dict[int, List[str]] = defaultdict(list) | ||
for host_info in terminated_agent_qs: | ||
terminated_agent_ips_gby_biz_id[host_info["bk_biz_id"]].append(host_info["inner_ip"]) | ||
for plugin_info in terminated_plugin_qs: | ||
terminated_plugin_ips_gby_biz_id[plugin_info["bk_biz_id"]].append(plugin_info["inner_ip"]) | ||
total_handle_biz_ids = set(terminated_plugin_ips_gby_biz_id.keys()) | set(terminated_plugin_ips_gby_biz_id.keys()) | ||
|
||
passid = settings.APP_CODE | ||
# 邮件发送人:配置为蓝鲸,如需更改或配置,在太湖已申请API中添加发件人白名单 | ||
sender = settings.TAIHU_MAIL_SENDER | ||
pass_token = settings.TAIHU_TOKEN | ||
url_path = settings.TAIHU_SEND_MAIL_API | ||
timestamp = str(int(time.time())) # 生成时间戳,注意服务器的时间与标准时间差不能大于180秒 | ||
nonce = str(uuid4()) # 随机字符串,十分钟内不重复即可 | ||
signature = hashlib.sha256() | ||
# 签名算法:x-rio-signature= sha256(x-rio-timestamp+Token+x-rio-nonce+x-rio-timestamp).upper() | ||
string = timestamp + pass_token + nonce + timestamp | ||
signature.update(string.encode()) | ||
signature = signature.hexdigest().upper() # 输出大写的结果 | ||
headers = {"x-rio-paasid": passid, "x-rio-nonce": nonce, "x-rio-timestamp": timestamp, "x-rio-signature": signature} | ||
# 发送邮件必填参数 | ||
data = { | ||
"From": sender, | ||
"To": None, | ||
"Title": "业务-{}-ID:{}:Agent-bkmonitorbeat状态异常告警", | ||
"Content": "Agent异常IP : {}, bkmonitorbeat异常IP : {}", | ||
} | ||
|
||
req_obj = requests.Session() | ||
for bk_biz_id in total_handle_biz_ids: | ||
biz_info = biz_id_biz_info_map.get(bk_biz_id) | ||
# 没有运维信息的业务不发送邮件 | ||
if not biz_info: | ||
continue | ||
biz_name = biz_info["bk_biz_name"] | ||
biz_maintainer = biz_info["bk_biz_maintainer"] | ||
agent_ips = terminated_agent_ips_gby_biz_id.get(bk_biz_id) | ||
plugin_ips = terminated_plugin_ips_gby_biz_id.get(bk_biz_id) | ||
data["To"] = biz_maintainer | ||
data["Title"] = data["Title"].format(biz_name, bk_biz_id) | ||
data["Content"] = data["Content"].format(agent_ips=agent_ips, plugin_ips=plugin_ips) | ||
try: | ||
req_obj.post(url=url_path, headers=headers, json=data) | ||
except Exception as e: | ||
logger.exception(f"bk_biz_id -> {bk_biz_id} send mail to maintainer error: {str(e)}") | ||
continue | ||
logger.info(f"send mail to maintainer success, task_id -> {task_id}") | ||
|
||
|
||
@periodic_task( | ||
queue="default", | ||
options={"queue": "default"}, | ||
run_every=crontab(hour="9", minute="0", day_of_week="*", day_of_month="*", month_of_year="*"), | ||
) | ||
def send_mail_to_maintainer_periodic_task(): | ||
"""定时发送邮件给运维""" | ||
task_id = send_mail_to_maintainer_periodic_task.request.id | ||
send_mail_to_maintainer(task_id) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters