diff --git a/pkg/query-service/integrations/alertManager/model.go b/pkg/query-service/integrations/alertManager/model.go index bb709e430f..19371a9bfd 100644 --- a/pkg/query-service/integrations/alertManager/model.go +++ b/pkg/query-service/integrations/alertManager/model.go @@ -40,6 +40,8 @@ type Alert struct { StartsAt time.Time `json:"startsAt,omitempty"` EndsAt time.Time `json:"endsAt,omitempty"` GeneratorURL string `json:"generatorURL,omitempty"` + + Receivers []string `json:"receivers,omitempty"` } // Name returns the name of the alert. It is equivalent to the "alertname" label. @@ -53,7 +55,7 @@ func (a *Alert) Hash() uint64 { } func (a *Alert) String() string { - s := fmt.Sprintf("%s[%s]", a.Name(), fmt.Sprintf("%016x", a.Hash())[:7]) + s := fmt.Sprintf("%s[%s][%s]", a.Name(), fmt.Sprintf("%016x", a.Hash())[:7], a.Receivers) if a.Resolved() { return s + "[resolved]" } diff --git a/pkg/query-service/rules/alerting.go b/pkg/query-service/rules/alerting.go index 65ee1f60a9..ecd7205557 100644 --- a/pkg/query-service/rules/alerting.go +++ b/pkg/query-service/rules/alerting.go @@ -72,6 +72,9 @@ type Alert struct { GeneratorURL string + // list of preferred receivers, e.g. slack + Receivers []string + Value float64 ActiveAt time.Time FiredAt time.Time @@ -80,7 +83,6 @@ type Alert struct { ValidUntil time.Time } -// todo(amol): need to review this with ankit func (a *Alert) needsSending(ts time.Time, resendDelay time.Duration) bool { if a.State == StatePending { return false diff --git a/pkg/query-service/rules/apiParams.go b/pkg/query-service/rules/apiParams.go index 346b515824..b88fa98fb6 100644 --- a/pkg/query-service/rules/apiParams.go +++ b/pkg/query-service/rules/apiParams.go @@ -35,6 +35,8 @@ type PostableRule struct { // Source captures the source url where rule has been created Source string `json:"source,omitempty"` + PreferredChannels []string `json:"preferredChannels,omitempty"` + // legacy Expr string `yaml:"expr,omitempty" json:"expr,omitempty"` OldYaml string `json:"yaml,omitempty"` diff --git a/pkg/query-service/rules/manager.go b/pkg/query-service/rules/manager.go index a20ec05941..6494320043 100644 --- a/pkg/query-service/rules/manager.go +++ b/pkg/query-service/rules/manager.go @@ -381,12 +381,7 @@ func (m *Manager) prepareTask(acquireLock bool, r *PostableRule, taskName string // create a threshold rule tr, err := NewThresholdRule( ruleId, - r.Alert, - r.RuleCondition, - time.Duration(r.EvalWindow), - r.Labels, - r.Annotations, - r.Source, + r, ) if err != nil { @@ -406,14 +401,8 @@ func (m *Manager) prepareTask(acquireLock bool, r *PostableRule, taskName string // create promql rule pr, err := NewPromRule( ruleId, - r.Alert, - r.RuleCondition, - time.Duration(r.EvalWindow), - r.Labels, - r.Annotations, - // required as promql engine works with logger and not zap + r, log.With(m.logger, "alert", r.Alert), - r.Source, ) if err != nil { @@ -521,6 +510,7 @@ func (m *Manager) prepareNotifyFunc() NotifyFunc { Labels: alert.Labels, Annotations: alert.Annotations, GeneratorURL: generatorURL, + Receivers: alert.Receivers, } if !alert.ResolvedAt.IsZero() { a.EndsAt = alert.ResolvedAt diff --git a/pkg/query-service/rules/promRule.go b/pkg/query-service/rules/promRule.go index f2ceff1a1a..bb995de73a 100644 --- a/pkg/query-service/rules/promRule.go +++ b/pkg/query-service/rules/promRule.go @@ -29,6 +29,8 @@ type PromRule struct { labels plabels.Labels annotations plabels.Labels + preferredChannels []string + mtx sync.Mutex evaluationDuration time.Duration evaluationTimestamp time.Time @@ -45,38 +47,37 @@ type PromRule struct { func NewPromRule( id string, - name string, - ruleCondition *RuleCondition, - evalWindow time.Duration, - labels, annotations map[string]string, + postableRule *PostableRule, logger log.Logger, - source string, ) (*PromRule, error) { - if int64(evalWindow) == 0 { - evalWindow = 5 * time.Minute - } - - if ruleCondition == nil { + if postableRule.RuleCondition == nil { return nil, fmt.Errorf("no rule condition") - } else if !ruleCondition.IsValid() { + } else if !postableRule.RuleCondition.IsValid() { return nil, fmt.Errorf("invalid rule condition") } - zap.S().Info("msg:", "creating new alerting rule", "\t name:", name, "\t condition:", ruleCondition.String()) + p := PromRule{ + id: id, + name: postableRule.Alert, + source: postableRule.Source, + ruleCondition: postableRule.RuleCondition, + evalWindow: time.Duration(postableRule.EvalWindow), + labels: plabels.FromMap(postableRule.Labels), + annotations: plabels.FromMap(postableRule.Annotations), + preferredChannels: postableRule.PreferredChannels, + health: HealthUnknown, + active: map[uint64]*Alert{}, + logger: logger, + } - return &PromRule{ - id: id, - name: name, - source: source, - ruleCondition: ruleCondition, - evalWindow: evalWindow, - labels: plabels.FromMap(labels), - annotations: plabels.FromMap(annotations), - health: HealthUnknown, - active: map[uint64]*Alert{}, - logger: logger, - }, nil + if int64(p.evalWindow) == 0 { + p.evalWindow = 5 * time.Minute + } + + zap.S().Info("msg:", "creating new alerting rule", "\t name:", p.name, "\t condition:", p.ruleCondition.String()) + + return &p, nil } func (r *PromRule) Name() string { @@ -99,6 +100,10 @@ func (r *PromRule) GeneratorURL() string { return prepareRuleGeneratorURL(r.ID(), r.source) } +func (r *PromRule) PreferredChannels() []string { + return r.preferredChannels +} + func (r *PromRule) SetLastError(err error) { r.mtx.Lock() defer r.mtx.Unlock() @@ -382,6 +387,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time, queriers *Queriers) ( State: StatePending, Value: smpl.V, GeneratorURL: r.GeneratorURL(), + Receivers: r.preferredChannels, } } @@ -392,6 +398,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time, queriers *Queriers) ( if alert, ok := r.active[h]; ok && alert.State != StateInactive { alert.Value = a.Value alert.Annotations = a.Annotations + alert.Receivers = r.preferredChannels continue } @@ -429,11 +436,12 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time, queriers *Queriers) ( func (r *PromRule) String() string { ar := PostableRule{ - Alert: r.name, - RuleCondition: r.ruleCondition, - EvalWindow: Duration(r.evalWindow), - Labels: r.labels.Map(), - Annotations: r.annotations.Map(), + Alert: r.name, + RuleCondition: r.ruleCondition, + EvalWindow: Duration(r.evalWindow), + Labels: r.labels.Map(), + Annotations: r.annotations.Map(), + PreferredChannels: r.preferredChannels, } byt, err := yaml.Marshal(ar) diff --git a/pkg/query-service/rules/rule.go b/pkg/query-service/rules/rule.go index ba5c934172..9a2ac1bad0 100644 --- a/pkg/query-service/rules/rule.go +++ b/pkg/query-service/rules/rule.go @@ -19,6 +19,8 @@ type Rule interface { State() AlertState ActiveAlerts() []*Alert + PreferredChannels() []string + Eval(context.Context, time.Time, *Queriers) (interface{}, error) String() string // Query() string diff --git a/pkg/query-service/rules/thresholdRule.go b/pkg/query-service/rules/thresholdRule.go index 3358842051..dc0c29be97 100644 --- a/pkg/query-service/rules/thresholdRule.go +++ b/pkg/query-service/rules/thresholdRule.go @@ -32,6 +32,7 @@ type ThresholdRule struct { labels labels.Labels annotations labels.Labels + preferredChannels []string mtx sync.Mutex evaluationDuration time.Duration evaluationTimestamp time.Time @@ -46,39 +47,35 @@ type ThresholdRule struct { func NewThresholdRule( id string, - name string, - ruleCondition *RuleCondition, - evalWindow time.Duration, - l, a map[string]string, - source string, + p *PostableRule, ) (*ThresholdRule, error) { - if int64(evalWindow) == 0 { - evalWindow = 5 * time.Minute - } - - if ruleCondition == nil { + if p.RuleCondition == nil { return nil, fmt.Errorf("no rule condition") - } else if !ruleCondition.IsValid() { + } else if !p.RuleCondition.IsValid() { return nil, fmt.Errorf("invalid rule condition") } - thresholdRule := &ThresholdRule{ - id: id, - name: name, - source: source, - ruleCondition: ruleCondition, - evalWindow: evalWindow, - labels: labels.FromMap(l), - annotations: labels.FromMap(a), - - health: HealthUnknown, - active: map[uint64]*Alert{}, + t := ThresholdRule{ + id: id, + name: p.Alert, + source: p.Source, + ruleCondition: p.RuleCondition, + evalWindow: time.Duration(p.EvalWindow), + labels: labels.FromMap(p.Labels), + annotations: labels.FromMap(p.Annotations), + preferredChannels: p.PreferredChannels, + health: HealthUnknown, + active: map[uint64]*Alert{}, } - zap.S().Info("msg:", "creating new alerting rule", "\t name:", name, "\t condition:", ruleCondition.String(), "\t generatorURL:", thresholdRule.GeneratorURL()) + if int64(t.evalWindow) == 0 { + t.evalWindow = 5 * time.Minute + } - return thresholdRule, nil + zap.S().Info("msg:", "creating new alerting rule", "\t name:", t.name, "\t condition:", t.ruleCondition.String(), "\t generatorURL:", t.GeneratorURL()) + + return &t, nil } func (r *ThresholdRule) Name() string { @@ -97,6 +94,10 @@ func (r *ThresholdRule) GeneratorURL() string { return prepareRuleGeneratorURL(r.ID(), r.source) } +func (r *ThresholdRule) PreferredChannels() []string { + return r.preferredChannels +} + func (r *ThresholdRule) target() *float64 { if r.ruleCondition == nil { return nil @@ -479,6 +480,7 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer } } } + zap.S().Debugf("ruleid:", r.ID(), "\t resultmap(potential alerts):", len(resultMap)) for _, sample := range resultMap { // check alert rule condition before dumping results @@ -486,7 +488,7 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer result = append(result, sample) } } - + zap.S().Debugf("ruleid:", r.ID(), "\t result (found alerts):", len(result)) return result, nil } @@ -615,6 +617,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Querie State: StatePending, Value: smpl.V, GeneratorURL: r.GeneratorURL(), + Receivers: r.preferredChannels, } } @@ -628,6 +631,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Querie alert.Value = a.Value alert.Annotations = a.Annotations + alert.Receivers = r.preferredChannels continue } @@ -665,11 +669,12 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Querie func (r *ThresholdRule) String() string { ar := PostableRule{ - Alert: r.name, - RuleCondition: r.ruleCondition, - EvalWindow: Duration(r.evalWindow), - Labels: r.labels.Map(), - Annotations: r.annotations.Map(), + Alert: r.name, + RuleCondition: r.ruleCondition, + EvalWindow: Duration(r.evalWindow), + Labels: r.labels.Map(), + Annotations: r.annotations.Map(), + PreferredChannels: r.preferredChannels, } byt, err := yaml.Marshal(ar)