Skip to content

Commit 9d5734b

Browse files
authored
feat: support clearing offline agent instances (#165)
* feat: support clearing offline agent instances * chore: update release notes
1 parent a0d28fa commit 9d5734b

File tree

6 files changed

+212
-28
lines changed

6 files changed

+212
-28
lines changed

docs/content.en/docs/release-notes/_index.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Information about release notes of INFINI Console is provided here.
3030
- Enhance LDAP authentication logging (#156)
3131
- Optimize UI for copying metric requests (#155)
3232
- Enhance deletion tips by adding cluster info for indices
33-
- Retain a single instance when registering duplicate endpoints (#163
33+
- Support clearing offline agent instances (#165)
3434

3535
## 1.28.2 (2025-02-15)
3636

docs/content.zh/docs/release-notes/_index.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@ title: "版本历史"
3030
- 增强 LDAP 身份验证的日志记录 (#156)
3131
- 优化监控报表里拷贝指标请求的 UI (#155)
3232
- 删除索引提示增加集群信息 (#162)
33-
- 自动注册实例时相同 endpoint 的实例不再重复注册 (#163)
3433

3534
## 1.28.2 (2025-02-15)
3635

3736
### Features
3837
- 告警功能支持根据桶之间文档数差值和内容差异告警 (#119)
3938
- 当使用 Easysearch 存储指标时,增加 Rollup 索引生命周期 (#128)
4039
- 增加集群指标采集模式变更事件 (#152)
40+
- 支持清理离线 Agent 实例(#165)
4141

4242
### Bug fix
4343
- 修复 Insight API 处理多时间序列数据时数据丢失的问题 (#127)

plugin/managed/server/instance.go

+169-25
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ package server
3030
import (
3131
"context"
3232
"fmt"
33+
"infini.sh/framework/core/event"
34+
"infini.sh/framework/core/global"
35+
"infini.sh/framework/core/task"
3336
"net/http"
3437
"strconv"
3538
"strings"
@@ -76,6 +79,8 @@ func init() {
7679

7780
//try to connect to instance
7881
api.HandleAPIMethod(api.POST, "/instance/try_connect", handler.RequireLogin(handler.tryConnect))
82+
//clear instance that is not alive in 7 days
83+
api.HandleAPIMethod(api.POST, "/instance/_clear", handler.RequirePermission(handler.clearInstance, enum.PermissionGatewayInstanceWrite))
7984

8085
}
8186

@@ -96,30 +101,7 @@ func (h APIHandler) registerInstance(w http.ResponseWriter, req *http.Request, p
96101
oldInst.ID = obj.ID
97102
exists, err := orm.Get(oldInst)
98103
if exists {
99-
errMsg := fmt.Sprintf("agent [%s] already exists", obj.ID)
100-
h.WriteError(w, errMsg, http.StatusInternalServerError)
101-
return
102-
}
103-
err, result := orm.GetBy("endpoint", obj.Endpoint, oldInst)
104-
if err != nil {
105-
log.Error(err)
106-
h.WriteError(w, err.Error(), http.StatusInternalServerError)
107-
return
108-
}
109-
if len(result.Result) > 0 {
110-
buf := util.MustToJSONBytes(result.Result[0])
111-
util.MustFromJSONBytes(buf, &oldInst)
112-
if oldInst.ID != "" {
113-
//keep old created time
114-
obj.Created = oldInst.Created
115-
log.Infof("remove old instance [%s] with the same endpoint %s", oldInst.ID, oldInst.Endpoint)
116-
err = orm.Delete(nil, oldInst)
117-
if err != nil {
118-
log.Error(err)
119-
h.WriteError(w, err.Error(), http.StatusInternalServerError)
120-
return
121-
}
122-
}
104+
obj.Created = oldInst.Created
123105
}
124106
err = orm.Save(nil, obj)
125107
if err != nil {
@@ -394,6 +376,168 @@ func (h *APIHandler) getInstanceStatus(w http.ResponseWriter, req *http.Request,
394376
}
395377
h.WriteJSON(w, result, http.StatusOK)
396378
}
379+
func (h *APIHandler) clearInstance(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
380+
appName := h.GetParameterOrDefault(req, "app_name", "")
381+
task.RunWithinGroup("clear_instance", func(ctx context.Context) error {
382+
err := h.clearInstanceByAppName(appName)
383+
if err != nil {
384+
log.Error(err)
385+
}
386+
return err
387+
})
388+
h.WriteAckOKJSON(w)
389+
}
390+
391+
func (h *APIHandler) clearInstanceByAppName(appName string) error {
392+
var (
393+
size = 100
394+
from = 0
395+
)
396+
// Paginated query for all running instances
397+
q := orm.Query{
398+
Size: size,
399+
From: from,
400+
}
401+
if appName != "" {
402+
q.Conds = orm.And(
403+
orm.Eq("application.name", appName),
404+
)
405+
}
406+
q.AddSort("created", orm.ASC)
407+
insts := []model.Instance{}
408+
var (
409+
instanceIDs []string
410+
toRemoveIDs []string
411+
instsCache = map[string]*model.Instance{}
412+
)
413+
client := elastic2.GetClient(global.MustLookupString(elastic2.GlobalSystemElasticsearchID))
414+
for {
415+
err, _ := orm.SearchWithJSONMapper(&insts, &q)
416+
if err != nil {
417+
return err
418+
}
419+
for _, inst := range insts {
420+
instanceIDs = append(instanceIDs, inst.ID)
421+
instsCache[inst.ID] = &inst
422+
}
423+
if len(instanceIDs) == 0 {
424+
break
425+
}
426+
aliveInstanceIDs, err := getAliveInstanceIDs(client, instanceIDs)
427+
if err != nil {
428+
return err
429+
}
430+
for _, instanceID := range instanceIDs {
431+
if _, ok := aliveInstanceIDs[instanceID]; !ok {
432+
toRemoveIDs = append(toRemoveIDs, instanceID)
433+
}
434+
}
435+
if len(toRemoveIDs) > 0 {
436+
// Use the same slice to avoid extra allocation
437+
filteredIDs := toRemoveIDs[:0]
438+
// check whether the instance is still online
439+
for _, instanceID := range toRemoveIDs {
440+
if inst, ok := instsCache[instanceID]; ok {
441+
_, err = h.getInstanceInfo(inst.Endpoint, inst.BasicAuth)
442+
if err == nil {
443+
// Skip online instance, do not append to filtered list
444+
continue
445+
}
446+
}
447+
// Keep only offline instances
448+
filteredIDs = append(filteredIDs, instanceID)
449+
}
450+
451+
// Assign back after filtering
452+
toRemoveIDs = filteredIDs
453+
query := util.MapStr{
454+
"query": util.MapStr{
455+
"terms": util.MapStr{
456+
"id": toRemoveIDs,
457+
},
458+
},
459+
}
460+
// remove instances
461+
err = orm.DeleteBy(model.Instance{}, util.MustToJSONBytes(query))
462+
if err != nil {
463+
return fmt.Errorf("failed to delete instance: %w", err)
464+
}
465+
// remove instance related data
466+
query = util.MapStr{
467+
"query": util.MapStr{
468+
"terms": util.MapStr{
469+
"metadata.labels.agent_id": toRemoveIDs,
470+
},
471+
},
472+
}
473+
err = orm.DeleteBy(model.Setting{}, util.MustToJSONBytes(query))
474+
}
475+
476+
// Exit loop when the number of returned records is less than the page size
477+
if len(insts) <= size {
478+
break
479+
}
480+
// Reset instance state for the next iteration
481+
insts = []model.Instance{}
482+
toRemoveIDs = nil
483+
instsCache = make(map[string]*model.Instance)
484+
q.From += size
485+
}
486+
return nil
487+
}
488+
489+
func getAliveInstanceIDs(client elastic2.API, instanceIDs []string) (map[string]struct{}, error) {
490+
query := util.MapStr{
491+
"size": 0,
492+
"query": util.MapStr{
493+
"bool": util.MapStr{
494+
"must": []util.MapStr{
495+
{
496+
"terms": util.MapStr{
497+
"agent.id": instanceIDs,
498+
},
499+
},
500+
{
501+
"range": util.MapStr{
502+
"timestamp": util.MapStr{
503+
"gt": "now-7d",
504+
},
505+
},
506+
},
507+
},
508+
},
509+
},
510+
"aggs": util.MapStr{
511+
"grp_agent_id": util.MapStr{
512+
"terms": util.MapStr{
513+
"field": "agent.id",
514+
},
515+
"aggs": util.MapStr{
516+
"count": util.MapStr{
517+
"value_count": util.MapStr{
518+
"field": "agent.id",
519+
},
520+
},
521+
},
522+
},
523+
},
524+
}
525+
queryDSL := util.MustToJSONBytes(query)
526+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
527+
defer cancel()
528+
response, err := client.QueryDSL(ctx, orm.GetWildcardIndexName(event.Event{}), nil, queryDSL)
529+
if err != nil {
530+
return nil, err
531+
}
532+
ret := map[string]struct{}{}
533+
for _, bk := range response.Aggregations["grp_agent_id"].Buckets {
534+
key := bk["key"].(string)
535+
if bk["doc_count"].(float64) > 0 {
536+
ret[key] = struct{}{}
537+
}
538+
}
539+
return ret, nil
540+
}
397541

398542
func (h *APIHandler) proxy(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
399543
var (
@@ -442,7 +586,7 @@ func (h *APIHandler) getInstanceInfo(endpoint string, basicAuth *model.BasicAuth
442586
obj := &model.Instance{}
443587
_, err := ProxyAgentRequest("runtime", endpoint, req1, obj)
444588
if err != nil {
445-
panic(err)
589+
return nil, err
446590
}
447591
return obj, err
448592

web/src/locales/en-US/agent.js

+3
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,7 @@ export default {
4343

4444
"agent.label.agent_credential": "Agent Credential",
4545
"agent.credential.tip": "No credential required",
46+
"agent.instance.clear.title": "Clear Offline Instances",
47+
"agent.instance.clear.modal.title": "Are you sure you want to clear offline instances?",
48+
"agent.instance.clear.modal.desc": "This operation will delete offline instances that have not reported metrics for 7 days."
4649
};

web/src/locales/zh-CN/agent.js

+3
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,7 @@ export default {
4040

4141
"agent.label.agent_credential": "代理凭据",
4242
"agent.credential.tip": "不需要凭据",
43+
"agent.instance.clear.title": "清理离线实例",
44+
"agent.instance.clear.modal.title": "您确定要清理离线实例?",
45+
"agent.instance.clear.modal.desc": "该操作将会删除离线并且 7 天没有上报指标的实例"
4346
};

web/src/pages/Agent/Instance/index.jsx

+35-1
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,37 @@ const AgentList = (props) => {
379379
}
380380
};
381381

382+
const [clearLoading, setClearLoading] = useState(false)
383+
const onClearClick = async ()=>{
384+
setClearLoading(true);
385+
const statusRes = await request(`/instance/_clear`, {
386+
method: "POST",
387+
queryParams: {
388+
"app_name": "agent",
389+
},
390+
});
391+
if(statusRes && statusRes.acknowledged){
392+
message.success("submit successfully");
393+
}
394+
setClearLoading(false);
395+
}
396+
const showClearConfirm = useCallback(() => {
397+
Modal.confirm({
398+
title: formatMessage({ id: "agent.instance.clear.modal.title" }),
399+
content: (
400+
<>
401+
<div>{formatMessage({ id: "agent.instance.clear.modal.desc" })}</div>
402+
</>
403+
),
404+
okText: "Yes",
405+
okType: "danger",
406+
cancelText: "No",
407+
onOk() {
408+
onClearClick();
409+
},
410+
});
411+
}, []);
412+
382413
return (
383414
<PageHeaderWrapper>
384415
<Card>
@@ -390,7 +421,7 @@ const AgentList = (props) => {
390421
marginBottom: 15,
391422
}}
392423
>
393-
<div style={{ maxWidth: 500, flex: "1 1 auto" }}>
424+
<div style={{ maxWidth: 450, flex: "1 1 auto" }}>
394425
<Search
395426
allowClear
396427
placeholder="Type keyword to search"
@@ -413,6 +444,9 @@ const AgentList = (props) => {
413444
{
414445
hasAuthority("agent.instance:all") && (
415446
<>
447+
<Button loading={clearLoading} onClick={showClearConfirm}>
448+
{formatMessage({ id: "agent.instance.clear.title" })}
449+
</Button>
416450
<Button
417451
type="primary"
418452
onClick={() => {

0 commit comments

Comments
 (0)