初始化
rules/manager.go
ruleManager := rules.NewManager(&rules.ManagerOptions{ Appendable: fanoutStorage, Notifier: notifier, QueryEngine: queryEngine, Context: ctx, ExternalURL: cfg.web.ExternalURL, Logger: log.With(logger, "component", "rule manager"), })go ruleManager.Run()
配置
rules/manager.go
func (m *Manager) ApplyConfig(conf *config.Config) error { var files []string for _, pat := range conf.RuleFiles { fs, err := filepath.Glob(pat) files = append(files, fs...) } groups, errs := m.loadGroups(time.Duration(conf.GlobalConfig.EvaluationInterval), files...) for _, newg := range groups { go func(newg *Group) { go func() { newg.run() } } }}
主要做了如下几件事情:
- 列出配置的所有rules文件
- 解析rules文件
- 按groupName分组
- group.Run(),此处会启动定时任务,按照配置的频率evaluation_interval执行告警或者汇总规则
运行规则
运行group
rules/manager.go中的group.Run()运行group
iter := func() { start := time.Now() g.Eval(start)}iter()tick := time.NewTicker(g.interval)for { select { default: select { case <-tick.C: //按照频率重新运行该group iter() } }}
运行规则
rules/manager.go中的Eval方法,将循环当前group的所有rule,并执行eval
func (g *Group) Eval(ts time.Time) { for i, rule := range g.rules { func(i int, rule Rule) { vector, err := rule.Eval(g.opts.Context, ts, g.opts.QueryEngine, g.opts.ExternalURL) if ar, ok := rule.(*AlertingRule); ok { g.sendAlerts(ar) } }(i, rule) }}
调用rules/alerting.go的Eval()方法,判断是否需要进行alert:
func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, engine *promql.Engine, externalURL *url.URL) (promql.Vector, error) { res, err := query.Exec(ctx).Vector() //查询 for _, smpl := range res { r.active[h] = &Alert{ //缓存alert ActiveAt: ts, State: StatePending, } } // 调整alert的告警状态 for fp, a := range r.active { if _, ok := resultFPs[fp]; !ok { if a.State == StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > resolvedRetention) { delete(r.active, fp) } if a.State != StateInactive { a.State = StateInactive a.ResolvedAt = ts } continue } if a.State == StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration { a.State = StateFiring } }}
发送告警
rules/manager.go中的sendAlerts方法 , 最终调用Notifier.Send()方法将alert发送给alertmanager
func (g *Group) sendAlerts(rule *AlertingRule) error { var alerts []*notifier.Alert for _, alert := range rule.currentAlerts() { if alert.State == StatePending { continue } } if len(alerts) > 0 { g.opts.Notifier.Send(alerts...) }}