Skip to content

Commit

Permalink
1. Move the default ql to the configuration (ccfos#764)
Browse files Browse the repository at this point in the history
2. add slowLogRecordSecond to  log slow query
3. Create a slice with a specified length to avoid dynamic expansion
4. slow query print fetch series time took and the result series num
  • Loading branch information
ning1875 authored Aug 10, 2021
1 parent 8b508fc commit 42fc052
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 17 deletions.
2 changes: 2 additions & 0 deletions backend/prome/prome.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ type PromeSection struct {
MaxConcurrentQuery int `yaml:"maxConcurrentQuery"`
MaxSamples int `yaml:"maxSamples"`
MaxFetchAllSeriesLimitMinute int64 `yaml:"maxFetchAllSeriesLimitMinute"`
SlowLogRecordSecond float64 `yaml:"slowLogRecordSecond"`
DefaultFetchSeriesQl string `yaml:"defaultFetchSeriesQl"`
RemoteWrite []RemoteConfig `yaml:"remoteWrite"`
RemoteRead []RemoteConfig `yaml:"remoteRead"`
}
Expand Down
82 changes: 65 additions & 17 deletions backend/prome/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
const (
LABEL_IDENT = "ident"
LABEL_NAME = "__name__"
DEFAULT_QL = `{__name__=~".*a.*|.*e.*"}`
DEFAULT_STEP = 15
)

Expand Down Expand Up @@ -323,7 +322,7 @@ func (pd *PromeDataSource) CommonQuerySeries(cj *commonQueryObj) storage.SeriesS
qlStrFinal := convertToPromql(cj)

if qlStrFinal == "{}" {
qlStrFinal = DEFAULT_QL
qlStrFinal = pd.Section.DefaultFetchSeriesQl
reqMinute := (cj.End - cj.Start) / 60
// 如果前端啥都没传,要限制下查询series的时间范围,防止高基础查询
if reqMinute > pd.Section.MaxFetchAllSeriesLimitMinute {
Expand Down Expand Up @@ -379,7 +378,19 @@ func (pd *PromeDataSource) CommonQuerySeries(cj *commonQueryObj) storage.SeriesS
}

// Get all series which match matchers.
startTs := time.Now()
s := q.Select(true, hints, matcherSets[0]...)
timeTookSecond := time.Since(startTs).Seconds()
if timeTookSecond > pd.Section.SlowLogRecordSecond {
logger.Warningf("[prome_remote_read_show_slow_log_CommonQuerySeries_select][threshold:%v][timeTookSecond:%v][from:%v][args:%+v][promql:%v]",
pd.Section.SlowLogRecordSecond,
timeTookSecond,
cj.From,
cj,
qlStrFinal,
)
}

return s

}
Expand All @@ -389,6 +400,7 @@ func (pd *PromeDataSource) CommonQuerySeries(cj *commonQueryObj) storage.SeriesS
// TODO 等待prometheus官方对 remote_read label_values 的支持
// Implement: https://github.com/prometheus/prometheus/issues/3351
func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKeyQueryResp {
startTs := time.Now()
respD := &vos.TagKeyQueryResp{
Keys: make([]string, 0),
}
Expand All @@ -400,7 +412,7 @@ func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKe
Metric: "",
})
}

resultSeries := ""
for _, x := range recv.Params {
cj := &commonQueryObj{
Idents: x.Idents,
Expand All @@ -421,8 +433,10 @@ func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKe
logger.Errorf("[prome_query_error][series_set_iter_error][err:%+v]", err)
continue
}
thisSeriesNum := 0
for s.Next() {
series := s.At()
thisSeriesNum++
for _, lb := range series.Labels() {
if lb.Name == LABEL_NAME {
continue
Expand All @@ -436,12 +450,14 @@ func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKe
labelNamesSet[lb.Name] = struct{}{}
}
}
resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)

}
names := make([]string, 0)
names := make([]string, len(labelNamesSet))
i := 0
for key := range labelNamesSet {

names = append(names, key)
names[i] = key
i++
}
sort.Strings(names)
// 因为map中的key是无序的,必须这样才能稳定输出
Expand All @@ -450,12 +466,17 @@ func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKe
}

respD.Keys = names
timeTookSecond := time.Since(startTs).Seconds()
if timeTookSecond > pd.Section.SlowLogRecordSecond {
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryTagKeys][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
}
return respD

}

// 对应prometheus 中的 /api/v1/label/<label_name>/values
func (pd *PromeDataSource) QueryTagValues(recv vos.CommonTagQueryParam) *vos.TagValueQueryResp {
startTs := time.Now()
labelValuesSet := make(map[string]struct{})

if len(recv.Params) == 0 {
Expand All @@ -464,7 +485,7 @@ func (pd *PromeDataSource) QueryTagValues(recv vos.CommonTagQueryParam) *vos.Tag
Metric: "",
})
}

resultSeries := ""
for _, x := range recv.Params {
cj := &commonQueryObj{
Idents: x.Idents,
Expand All @@ -485,9 +506,10 @@ func (pd *PromeDataSource) QueryTagValues(recv vos.CommonTagQueryParam) *vos.Tag
logger.Errorf("[prome_query_error][series_set_iter_error][err:%+v]", err)
continue
}

thisSeriesNum := 0
for s.Next() {
series := s.At()
thisSeriesNum++
for _, lb := range series.Labels() {
if lb.Name == recv.TagKey {
if recv.TagValue != "" {
Expand All @@ -500,24 +522,31 @@ func (pd *PromeDataSource) QueryTagValues(recv vos.CommonTagQueryParam) *vos.Tag
}
}
}
resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)
}
vals := make([]string, 0)
vals := make([]string, len(labelValuesSet))
i := 0
for val := range labelValuesSet {

vals = append(vals, val)
vals[i] = val
i++
}
sort.Strings(vals)
if recv.Limit > 0 && len(vals) > recv.Limit {
vals = vals[:recv.Limit]
}
respD := &vos.TagValueQueryResp{}
respD.Values = vals
timeTookSecond := time.Since(startTs).Seconds()
if timeTookSecond > pd.Section.SlowLogRecordSecond {
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryTagValues][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
}
return respD

}

// 对应prometheus 中的 /api/v1/label/<label_name>/values label_name == __name__
func (pd *PromeDataSource) QueryMetrics(recv vos.MetricQueryParam) *vos.MetricQueryResp {
startTs := time.Now()
cj := &commonQueryObj{
Idents: recv.Idents,
Metric: recv.Metric,
Expand All @@ -544,18 +573,23 @@ func (pd *PromeDataSource) QueryMetrics(recv vos.MetricQueryParam) *vos.MetricQu
sets = append(sets, s)
set := storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)
labelValuesSet := make(map[string]struct{})
//for s.Next() {
resultSeries := ""
thisSeriesNum := 0
for set.Next() {
series := set.At()
thisSeriesNum++
for _, lb := range series.Labels() {
if lb.Name == LABEL_NAME {
labelValuesSet[lb.Value] = struct{}{}
}
}
}
vals := make([]string, 0)
resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)
vals := make([]string, len(labelValuesSet))
i := 0
for val := range labelValuesSet {
vals = append(vals, val)
vals[i] = val
i++
}

sort.Strings(vals)
Expand All @@ -564,11 +598,16 @@ func (pd *PromeDataSource) QueryMetrics(recv vos.MetricQueryParam) *vos.MetricQu
vals = vals[:recv.Limit]
}
respD.Metrics = vals
timeTookSecond := time.Since(startTs).Seconds()
if timeTookSecond > pd.Section.SlowLogRecordSecond {
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryMetrics][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
}
return respD
}

// 对应prometheus 中的 /api/v1/series
func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagPairQueryResp {
startTs := time.Now()
respD := &vos.TagPairQueryResp{
TagPairs: make([]string, 0),
Idents: make([]string, 0),
Expand All @@ -580,6 +619,7 @@ func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagP
Metric: "",
})
}
resultSeries := ""
for _, x := range recv.Params {
cj := &commonQueryObj{
Idents: x.Idents,
Expand All @@ -606,8 +646,10 @@ func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagP
set := storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)

labelIdents := make([]string, 0)
thisSeriesNum := 0
for set.Next() {
series := s.At()
thisSeriesNum++
labelsS := series.Labels()
for _, i := range labelsS {

Expand All @@ -628,13 +670,15 @@ func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagP
}

}
resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)

}

newTags := make([]string, 0)
newTags := make([]string, len(tps))
i := 0
for k := range tps {

newTags = append(newTags, k)
newTags[i] = k
i++
}

sort.Strings(newTags)
Expand All @@ -643,6 +687,10 @@ func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagP
}

respD.TagPairs = newTags
timeTookSecond := time.Since(startTs).Seconds()
if timeTookSecond > pd.Section.SlowLogRecordSecond {
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryTagPairs][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
}
return respD
}

Expand Down
2 changes: 2 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ func Parse() error {
viper.SetDefault("trans.backend.prometheus.maxConcurrentQuery", 30)
viper.SetDefault("trans.backend.prometheus.maxSamples", 50000000)
viper.SetDefault("trans.backend.prometheus.maxFetchAllSeriesLimitMinute", 5)
viper.SetDefault("trans.backend.prometheus.slowLogRecordSecond", 3)
viper.SetDefault("trans.backend.prometheus.defaultFetchSeriesQl", `{__name__=~"system.*"}`)
viper.SetDefault("tpl.alertRulePath", "./etc/alert_rule")
viper.SetDefault("tpl.dashboardPath", "./etc/dashboard")

Expand Down
5 changes: 5 additions & 0 deletions etc/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ trans:
lookbackDeltaMinute: 2
# 查询全量索引时时间窗口限制,降低高基数
maxFetchAllSeriesLimitMinute: 5
# 查询接口耗时超过多少秒就打印warning日志记录
slowLogRecordSecond: 3
# remote_read时,如果没有查询条件则用这条默认的ql查询
# 注意! ql匹配series越多,造成的oom或者慢查询可能越大
defaultFetchSeriesQl: '{__name__=~"system.*"}'
remoteWrite:
# m3db的配置
#- name: m3db01
Expand Down

0 comments on commit 42fc052

Please sign in to comment.