Alerts: Test Notifications in Rules

This commit is contained in:
Amol Umbark 2022-08-04 17:24:15 +05:30 committed by GitHub
parent 80c96af5a4
commit 1ebf64589f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 202 additions and 130 deletions

View File

@ -304,12 +304,14 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router) {
router.HandleFunc("/api/v1/channels/{id}", AdminAccess(aH.deleteChannel)).Methods(http.MethodDelete) router.HandleFunc("/api/v1/channels/{id}", AdminAccess(aH.deleteChannel)).Methods(http.MethodDelete)
router.HandleFunc("/api/v1/channels", EditAccess(aH.createChannel)).Methods(http.MethodPost) router.HandleFunc("/api/v1/channels", EditAccess(aH.createChannel)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/testChannel", EditAccess(aH.testChannel)).Methods(http.MethodPost) router.HandleFunc("/api/v1/testChannel", EditAccess(aH.testChannel)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/rules", ViewAccess(aH.listRules)).Methods(http.MethodGet) router.HandleFunc("/api/v1/rules", ViewAccess(aH.listRules)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/rules/{id}", ViewAccess(aH.getRule)).Methods(http.MethodGet) router.HandleFunc("/api/v1/rules/{id}", ViewAccess(aH.getRule)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/rules", EditAccess(aH.createRule)).Methods(http.MethodPost) router.HandleFunc("/api/v1/rules", EditAccess(aH.createRule)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/rules/{id}", EditAccess(aH.editRule)).Methods(http.MethodPut) router.HandleFunc("/api/v1/rules/{id}", EditAccess(aH.editRule)).Methods(http.MethodPut)
router.HandleFunc("/api/v1/rules/{id}", EditAccess(aH.deleteRule)).Methods(http.MethodDelete) router.HandleFunc("/api/v1/rules/{id}", EditAccess(aH.deleteRule)).Methods(http.MethodDelete)
router.HandleFunc("/api/v1/rules/{id}", EditAccess(aH.patchRule)).Methods(http.MethodPatch) router.HandleFunc("/api/v1/rules/{id}", EditAccess(aH.patchRule)).Methods(http.MethodPatch)
router.HandleFunc("/api/v1/testRule", EditAccess(aH.testRule)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/dashboards", ViewAccess(aH.getDashboards)).Methods(http.MethodGet) router.HandleFunc("/api/v1/dashboards", ViewAccess(aH.getDashboards)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/dashboards", EditAccess(aH.createDashboards)).Methods(http.MethodPost) router.HandleFunc("/api/v1/dashboards", EditAccess(aH.createDashboards)).Methods(http.MethodPost)
@ -771,6 +773,32 @@ func (aH *APIHandler) createDashboards(w http.ResponseWriter, r *http.Request) {
} }
func (aH *APIHandler) testRule(w http.ResponseWriter, r *http.Request) {
defer r.Body.Close()
body, err := ioutil.ReadAll(r.Body)
if err != nil {
zap.S().Errorf("Error in getting req body in test rule API\n", err)
respondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil)
return
}
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
alertCount, apiRrr := aH.ruleManager.TestNotification(ctx, string(body))
if apiRrr != nil {
respondError(w, apiRrr, nil)
return
}
response := map[string]interface{}{
"alertCount": alertCount,
"message": "notification sent",
}
aH.respond(w, response)
}
func (aH *APIHandler) deleteRule(w http.ResponseWriter, r *http.Request) { func (aH *APIHandler) deleteRule(w http.ResponseWriter, r *http.Request) {
id := mux.Vars(r)["id"] id := mux.Vars(r)["id"]
@ -963,6 +991,7 @@ func (aH *APIHandler) createRule(w http.ResponseWriter, r *http.Request) {
aH.respond(w, "rule successfully added") aH.respond(w, "rule successfully added")
} }
func (aH *APIHandler) queryRangeMetricsFromClickhouse(w http.ResponseWriter, r *http.Request) { func (aH *APIHandler) queryRangeMetricsFromClickhouse(w http.ResponseWriter, r *http.Request) {
} }

View File

@ -23,6 +23,8 @@ const (
// AlertForStateMetricName is the metric name for 'for' state of alert. // AlertForStateMetricName is the metric name for 'for' state of alert.
alertForStateMetricName = "ALERTS_FOR_STATE" alertForStateMetricName = "ALERTS_FOR_STATE"
TestAlertPostFix = "_TEST_ALERT"
) )
type RuleType string type RuleType string

View File

@ -18,6 +18,16 @@ import (
// this file contains api request and responses to be // this file contains api request and responses to be
// served over http // served over http
// newApiErrorInternal returns a new api error object of type internal
func newApiErrorInternal(err error) *model.ApiError {
return &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
// newApiErrorBadData returns a new api error object of bad request type
func newApiErrorBadData(err error) *model.ApiError {
return &model.ApiError{Typ: model.ErrorBadData, Err: err}
}
// PostableRule is used to create alerting rule from HTTP api // PostableRule is used to create alerting rule from HTTP api
type PostableRule struct { type PostableRule struct {
Alert string `yaml:"alert,omitempty" json:"alert,omitempty"` Alert string `yaml:"alert,omitempty" json:"alert,omitempty"`

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/google/uuid"
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
@ -19,6 +20,8 @@ import (
// opentracing "github.com/opentracing/opentracing-go" // opentracing "github.com/opentracing/opentracing-go"
am "go.signoz.io/query-service/integrations/alertManager" am "go.signoz.io/query-service/integrations/alertManager"
"go.signoz.io/query-service/model"
"go.signoz.io/query-service/utils/labels"
) )
// namespace for prom metrics // namespace for prom metrics
@ -38,7 +41,6 @@ func prepareTaskName(ruleId interface{}) string {
default: default:
return fmt.Sprintf("%v-groupname", ruleId) return fmt.Sprintf("%v-groupname", ruleId)
} }
} }
// ManagerOptions bundles options for the Manager. // ManagerOptions bundles options for the Manager.
@ -382,6 +384,7 @@ func (m *Manager) prepareTask(acquireLock bool, r *PostableRule, taskName string
tr, err := NewThresholdRule( tr, err := NewThresholdRule(
ruleId, ruleId,
r, r,
ThresholdRuleOpts{},
) )
if err != nil { if err != nil {
@ -403,6 +406,7 @@ func (m *Manager) prepareTask(acquireLock bool, r *PostableRule, taskName string
ruleId, ruleId,
r, r,
log.With(m.logger, "alert", r.Alert), log.With(m.logger, "alert", r.Alert),
PromRuleOpts{},
) )
if err != nil { if err != nil {
@ -683,3 +687,84 @@ func (m *Manager) PatchRule(ruleStr string, ruleId string) (*GettableRule, error
return &response, nil return &response, nil
} }
// TestNotification prepares a dummy rule for given rule parameters and
// sends a test notification. returns alert count and error (if any)
func (m *Manager) TestNotification(ctx context.Context, ruleStr string) (int, *model.ApiError) {
parsedRule, errs := ParsePostableRule([]byte(ruleStr))
if len(errs) > 0 {
zap.S().Errorf("msg: failed to parse rule from request:", "\t error: ", errs)
return 0, newApiErrorBadData(errs[0])
}
var alertname = parsedRule.Alert
if alertname == "" {
// alertname is not mandatory for testing, so picking
// a random string here
alertname = uuid.New().String()
}
// append name to indicate this is test alert
parsedRule.Alert = fmt.Sprintf("%s%s", alertname, TestAlertPostFix)
var rule Rule
var err error
if parsedRule.RuleType == RuleTypeThreshold {
// add special labels for test alerts
parsedRule.Labels[labels.AlertAdditionalInfoLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
parsedRule.Labels[labels.RuleSourceLabel] = ""
parsedRule.Labels[labels.AlertRuleIdLabel] = ""
// create a threshold rule
rule, err = NewThresholdRule(
alertname,
parsedRule,
ThresholdRuleOpts{
SendUnmatched: true,
SendAlways: true,
},
)
if err != nil {
zap.S().Errorf("msg: failed to prepare a new threshold rule for test:", "\t error: ", err)
return 0, newApiErrorBadData(err)
}
} else if parsedRule.RuleType == RuleTypeProm {
// create promql rule
rule, err = NewPromRule(
alertname,
parsedRule,
log.With(m.logger, "alert", alertname),
PromRuleOpts{
SendAlways: true,
},
)
if err != nil {
zap.S().Errorf("msg: failed to prepare a new promql rule for test:", "\t error: ", err)
return 0, newApiErrorBadData(err)
}
} else {
return 0, newApiErrorBadData(fmt.Errorf("failed to derive ruletype with given information"))
}
// set timestamp to current utc time
ts := time.Now().UTC()
count, err := rule.Eval(ctx, ts, m.opts.Queriers)
if err != nil {
zap.S().Warn("msg:", "Evaluating rule failed", "\t rule:", rule, "\t err: ", err)
return 0, newApiErrorInternal(fmt.Errorf("rule evaluation failed"))
}
alertsFound := count.(int)
rule.SendAlerts(ctx, ts, 0, time.Duration(1*time.Minute), m.prepareNotifyFunc())
return alertsFound, nil
}

View File

@ -18,6 +18,12 @@ import (
yaml "gopkg.in/yaml.v2" yaml "gopkg.in/yaml.v2"
) )
type PromRuleOpts struct {
// SendAlways will send alert irresepective of resendDelay
// or other params
SendAlways bool
}
type PromRule struct { type PromRule struct {
id string id string
name string name string
@ -43,12 +49,14 @@ type PromRule struct {
active map[uint64]*Alert active map[uint64]*Alert
logger log.Logger logger log.Logger
opts PromRuleOpts
} }
func NewPromRule( func NewPromRule(
id string, id string,
postableRule *PostableRule, postableRule *PostableRule,
logger log.Logger, logger log.Logger,
opts PromRuleOpts,
) (*PromRule, error) { ) (*PromRule, error) {
if postableRule.RuleCondition == nil { if postableRule.RuleCondition == nil {
@ -69,13 +77,20 @@ func NewPromRule(
health: HealthUnknown, health: HealthUnknown,
active: map[uint64]*Alert{}, active: map[uint64]*Alert{},
logger: logger, logger: logger,
opts: opts,
} }
if int64(p.evalWindow) == 0 { if int64(p.evalWindow) == 0 {
p.evalWindow = 5 * time.Minute p.evalWindow = 5 * time.Minute
} }
query, err := p.getPqlQuery()
zap.S().Info("msg:", "creating new alerting rule", "\t name:", p.name, "\t condition:", p.ruleCondition.String()) if err != nil {
// can not generate a valid prom QL query
return nil, err
}
zap.S().Info("msg:", "creating new alerting rule", "\t name:", p.name, "\t condition:", p.ruleCondition.String(), "\t query:", query)
return &p, nil return &p, nil
} }
@ -172,24 +187,6 @@ func (r *PromRule) sample(alert *Alert, ts time.Time) pql.Sample {
return s return s
} }
// forStateSample returns the sample for ALERTS_FOR_STATE.
func (r *PromRule) forStateSample(alert *Alert, ts time.Time, v float64) pql.Sample {
lb := plabels.NewBuilder(r.labels)
alertLabels := alert.Labels.(plabels.Labels)
for _, l := range alertLabels {
lb.Set(l.Name, l.Value)
}
lb.Set(plabels.MetricName, alertForStateMetricName)
lb.Set(plabels.AlertName, r.name)
s := pql.Sample{
Metric: lb.Labels(),
Point: pql.Point{T: timestamp.FromTime(ts), V: v},
}
return s
}
// GetEvaluationDuration returns the time in seconds it took to evaluate the alerting rule. // GetEvaluationDuration returns the time in seconds it took to evaluate the alerting rule.
func (r *PromRule) GetEvaluationDuration() time.Duration { func (r *PromRule) GetEvaluationDuration() time.Duration {
r.mtx.Lock() r.mtx.Lock()
@ -265,7 +262,7 @@ func (r *PromRule) ForEachActiveAlert(f func(*Alert)) {
func (r *PromRule) SendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, interval time.Duration, notifyFunc NotifyFunc) { func (r *PromRule) SendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, interval time.Duration, notifyFunc NotifyFunc) {
alerts := []*Alert{} alerts := []*Alert{}
r.ForEachActiveAlert(func(alert *Alert) { r.ForEachActiveAlert(func(alert *Alert) {
if alert.needsSending(ts, resendDelay) { if r.opts.SendAlways || alert.needsSending(ts, resendDelay) {
alert.LastSentAt = ts alert.LastSentAt = ts
// Allow for two Eval or Alertmanager send failures. // Allow for two Eval or Alertmanager send failures.
delta := resendDelay delta := resendDelay
@ -289,7 +286,6 @@ func (r *PromRule) getPqlQuery() (string, error) {
if query == "" { if query == "" {
return query, fmt.Errorf("a promquery needs to be set for this rule to function") return query, fmt.Errorf("a promquery needs to be set for this rule to function")
} }
if r.ruleCondition.Target != nil && r.ruleCondition.CompareOp != CompareOpNone { if r.ruleCondition.Target != nil && r.ruleCondition.CompareOp != CompareOpNone {
query = fmt.Sprintf("%s %s %f", query, ResolveCompareOp(r.ruleCondition.CompareOp), *r.ruleCondition.Target) query = fmt.Sprintf("%s %s %f", query, ResolveCompareOp(r.ruleCondition.CompareOp), *r.ruleCondition.Target)
return query, nil return query, nil
@ -321,7 +317,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time, queriers *Queriers) (
defer r.mtx.Unlock() defer r.mtx.Unlock()
resultFPs := map[uint64]struct{}{} resultFPs := map[uint64]struct{}{}
var vec pql.Vector
var alerts = make(map[uint64]*Alert, len(res)) var alerts = make(map[uint64]*Alert, len(res))
for _, smpl := range res { for _, smpl := range res {
@ -358,6 +354,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time, queriers *Queriers) (
for _, l := range r.labels { for _, l := range r.labels {
lb.Set(l.Name, expand(l.Value)) lb.Set(l.Name, expand(l.Value))
} }
lb.Set(qslabels.AlertNameLabel, r.Name()) lb.Set(qslabels.AlertNameLabel, r.Name())
lb.Set(qslabels.AlertRuleIdLabel, r.ID()) lb.Set(qslabels.AlertRuleIdLabel, r.ID())
lb.Set(qslabels.RuleSourceLabel, r.GeneratorURL()) lb.Set(qslabels.RuleSourceLabel, r.GeneratorURL())
@ -429,8 +426,8 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time, queriers *Queriers) (
} }
r.health = HealthGood r.health = HealthGood
r.lastError = err r.lastError = err
return vec, nil
return len(r.active), nil
} }
func (r *PromRule) String() string { func (r *PromRule) String() string {

View File

@ -6,7 +6,6 @@ import (
"github.com/go-kit/log" "github.com/go-kit/log"
opentracing "github.com/opentracing/opentracing-go" opentracing "github.com/opentracing/opentracing-go"
plabels "github.com/prometheus/prometheus/pkg/labels" plabels "github.com/prometheus/prometheus/pkg/labels"
pql "github.com/prometheus/prometheus/promql"
"go.uber.org/zap" "go.uber.org/zap"
"sort" "sort"
"sync" "sync"
@ -313,7 +312,6 @@ func (g *PromRuleTask) CopyState(fromTask Task) error {
// Eval runs a single evaluation cycle in which all rules are evaluated sequentially. // Eval runs a single evaluation cycle in which all rules are evaluated sequentially.
func (g *PromRuleTask) Eval(ctx context.Context, ts time.Time) { func (g *PromRuleTask) Eval(ctx context.Context, ts time.Time) {
zap.S().Info("promql rule task:", g.name, "\t eval started at:", ts) zap.S().Info("promql rule task:", g.name, "\t eval started at:", ts)
var samplesTotal float64
for i, rule := range g.rules { for i, rule := range g.rules {
if rule == nil { if rule == nil {
continue continue
@ -336,7 +334,7 @@ func (g *PromRuleTask) Eval(ctx context.Context, ts time.Time) {
rule.SetEvaluationTimestamp(t) rule.SetEvaluationTimestamp(t)
}(time.Now()) }(time.Now())
data, err := rule.Eval(ctx, ts, g.opts.Queriers) _, err := rule.Eval(ctx, ts, g.opts.Queriers)
if err != nil { if err != nil {
rule.SetHealth(HealthBad) rule.SetHealth(HealthBad)
rule.SetLastError(err) rule.SetLastError(err)
@ -350,21 +348,8 @@ func (g *PromRuleTask) Eval(ctx context.Context, ts time.Time) {
//} //}
return return
} }
vector := data.(pql.Vector)
samplesTotal += float64(len(vector))
rule.SendAlerts(ctx, ts, g.opts.ResendDelay, g.frequency, g.notify) rule.SendAlerts(ctx, ts, g.opts.ResendDelay, g.frequency, g.notify)
seriesReturned := make(map[string]plabels.Labels, len(g.seriesInPreviousEval[i]))
defer func() {
g.seriesInPreviousEval[i] = seriesReturned
}()
for _, s := range vector {
seriesReturned[s.Metric.String()] = s.Metric
}
}(i, rule) }(i, rule)
} }
} }

View File

@ -14,17 +14,15 @@ import (
// RuleTask holds a rule (with composite queries) // RuleTask holds a rule (with composite queries)
// and evaluates the rule at a given frequency // and evaluates the rule at a given frequency
type RuleTask struct { type RuleTask struct {
name string name string
file string file string
frequency time.Duration frequency time.Duration
rules []Rule rules []Rule
seriesInPreviousEval []map[string]labels.Labels // One per Rule. opts *ManagerOptions
staleSeries []labels.Labels mtx sync.Mutex
opts *ManagerOptions evaluationDuration time.Duration
mtx sync.Mutex evaluationTime time.Duration
evaluationDuration time.Duration lastEvaluation time.Time
evaluationTime time.Duration
lastEvaluation time.Time
markStale bool markStale bool
done chan struct{} done chan struct{}
@ -46,16 +44,15 @@ func newRuleTask(name, file string, frequency time.Duration, rules []Rule, opts
zap.S().Info("msg:", "initiating a new rule task", "\t name:", name, "\t frequency:", frequency) zap.S().Info("msg:", "initiating a new rule task", "\t name:", name, "\t frequency:", frequency)
return &RuleTask{ return &RuleTask{
name: name, name: name,
file: file, file: file,
pause: false, pause: false,
frequency: frequency, frequency: frequency,
rules: rules, rules: rules,
opts: opts, opts: opts,
seriesInPreviousEval: make([]map[string]labels.Labels, len(rules)), done: make(chan struct{}),
done: make(chan struct{}), terminated: make(chan struct{}),
terminated: make(chan struct{}), notify: notify,
notify: notify,
} }
} }
@ -126,24 +123,6 @@ func (g *RuleTask) Run(ctx context.Context) {
tick := time.NewTicker(g.frequency) tick := time.NewTicker(g.frequency)
defer tick.Stop() defer tick.Stop()
// defer cleanup
defer func() {
if !g.markStale {
return
}
go func(now time.Time) {
for _, rule := range g.seriesInPreviousEval {
for _, r := range rule {
g.staleSeries = append(g.staleSeries, r)
}
}
// That can be garbage collected at this point.
g.seriesInPreviousEval = nil
}(time.Now())
}()
iter() iter()
// let the group iterate and run // let the group iterate and run
@ -285,17 +264,15 @@ func (g *RuleTask) CopyState(fromTask Task) error {
ruleMap[nameAndLabels] = append(l, fi) ruleMap[nameAndLabels] = append(l, fi)
} }
for i, rule := range g.rules { for _, rule := range g.rules {
nameAndLabels := nameAndLabels(rule) nameAndLabels := nameAndLabels(rule)
indexes := ruleMap[nameAndLabels] indexes := ruleMap[nameAndLabels]
if len(indexes) == 0 { if len(indexes) == 0 {
continue continue
} }
fi := indexes[0] fi := indexes[0]
g.seriesInPreviousEval[i] = from.seriesInPreviousEval[fi]
ruleMap[nameAndLabels] = indexes[1:] ruleMap[nameAndLabels] = indexes[1:]
// todo(amol): support other rules too here
ar, ok := rule.(*ThresholdRule) ar, ok := rule.(*ThresholdRule)
if !ok { if !ok {
continue continue
@ -310,18 +287,6 @@ func (g *RuleTask) CopyState(fromTask Task) error {
} }
} }
// Handle deleted and unmatched duplicate rules.
// todo(amol): possibly not needed any more
g.staleSeries = from.staleSeries
for fi, fromRule := range from.rules {
nameAndLabels := nameAndLabels(fromRule)
l := ruleMap[nameAndLabels]
if len(l) != 0 {
for _, series := range from.seriesInPreviousEval[fi] {
g.staleSeries = append(g.staleSeries, series)
}
}
}
return nil return nil
} }
@ -330,7 +295,6 @@ func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
zap.S().Debugf("msg:", "rule task eval started", "\t name:", g.name, "\t start time:", ts) zap.S().Debugf("msg:", "rule task eval started", "\t name:", g.name, "\t start time:", ts)
var samplesTotal float64
for i, rule := range g.rules { for i, rule := range g.rules {
if rule == nil { if rule == nil {
continue continue
@ -353,7 +317,7 @@ func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
rule.SetEvaluationTimestamp(t) rule.SetEvaluationTimestamp(t)
}(time.Now()) }(time.Now())
data, err := rule.Eval(ctx, ts, g.opts.Queriers) _, err := rule.Eval(ctx, ts, g.opts.Queriers)
if err != nil { if err != nil {
rule.SetHealth(HealthBad) rule.SetHealth(HealthBad)
rule.SetLastError(err) rule.SetLastError(err)
@ -368,18 +332,8 @@ func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
return return
} }
vector := data.(Vector)
samplesTotal += float64(len(vector))
rule.SendAlerts(ctx, ts, g.opts.ResendDelay, g.frequency, g.notify) rule.SendAlerts(ctx, ts, g.opts.ResendDelay, g.frequency, g.notify)
seriesReturned := make(map[string]labels.Labels, len(g.seriesInPreviousEval[i]))
for _, s := range vector {
seriesReturned[s.Metric.String()] = s.Metric
}
g.seriesInPreviousEval[i] = seriesReturned
}(i, rule) }(i, rule)
} }
} }

View File

@ -43,11 +43,25 @@ type ThresholdRule struct {
// map of active alerts // map of active alerts
active map[uint64]*Alert active map[uint64]*Alert
opts ThresholdRuleOpts
}
type ThresholdRuleOpts struct {
// sendUnmatched sends observed metric values
// even if they dont match the rule condition. this is
// useful in testing the rule
SendUnmatched bool
// sendAlways will send alert irresepective of resendDelay
// or other params
SendAlways bool
} }
func NewThresholdRule( func NewThresholdRule(
id string, id string,
p *PostableRule, p *PostableRule,
opts ThresholdRuleOpts,
) (*ThresholdRule, error) { ) (*ThresholdRule, error) {
if p.RuleCondition == nil { if p.RuleCondition == nil {
@ -67,6 +81,7 @@ func NewThresholdRule(
preferredChannels: p.PreferredChannels, preferredChannels: p.PreferredChannels,
health: HealthUnknown, health: HealthUnknown,
active: map[uint64]*Alert{}, active: map[uint64]*Alert{},
opts: opts,
} }
if int64(t.evalWindow) == 0 { if int64(t.evalWindow) == 0 {
@ -105,6 +120,14 @@ func (r *ThresholdRule) target() *float64 {
return r.ruleCondition.Target return r.ruleCondition.Target
} }
func (r *ThresholdRule) targetVal() float64 {
if r.ruleCondition == nil || r.ruleCondition.Target == nil {
return 0
}
return *r.ruleCondition.Target
}
func (r *ThresholdRule) matchType() MatchType { func (r *ThresholdRule) matchType() MatchType {
if r.ruleCondition == nil { if r.ruleCondition == nil {
return AtleastOnce return AtleastOnce
@ -188,25 +211,7 @@ func (r *ThresholdRule) sample(alert *Alert, ts time.Time) Sample {
Metric: lb.Labels(), Metric: lb.Labels(),
Point: Point{T: timestamp.FromTime(ts), V: 1}, Point: Point{T: timestamp.FromTime(ts), V: 1},
} }
return s
}
// forStateSample returns the sample for ALERTS_FOR_STATE.
func (r *ThresholdRule) forStateSample(alert *Alert, ts time.Time, v float64) Sample {
lb := labels.NewBuilder(r.labels)
alertLabels := alert.Labels.(labels.Labels)
for _, l := range alertLabels {
lb.Set(l.Name, l.Value)
}
lb.Set(labels.MetricNameLabel, alertForStateMetricName)
lb.Set(labels.AlertNameLabel, r.name)
s := Sample{
Metric: lb.Labels(),
Point: Point{T: timestamp.FromTime(ts), V: v},
}
return s return s
} }
@ -283,10 +288,10 @@ func (r *ThresholdRule) ForEachActiveAlert(f func(*Alert)) {
} }
func (r *ThresholdRule) SendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, interval time.Duration, notifyFunc NotifyFunc) { func (r *ThresholdRule) SendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, interval time.Duration, notifyFunc NotifyFunc) {
zap.S().Info("msg:", "initiating send alerts (if any)", "\t rule:", r.Name()) zap.S().Info("msg:", "sending alerts", "\t rule:", r.Name())
alerts := []*Alert{} alerts := []*Alert{}
r.ForEachActiveAlert(func(alert *Alert) { r.ForEachActiveAlert(func(alert *Alert) {
if alert.needsSending(ts, resendDelay) { if r.opts.SendAlways || alert.needsSending(ts, resendDelay) {
alert.LastSentAt = ts alert.LastSentAt = ts
// Allow for two Eval or Alertmanager send failures. // Allow for two Eval or Alertmanager send failures.
delta := resendDelay delta := resendDelay
@ -483,8 +488,9 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer
zap.S().Debugf("ruleid:", r.ID(), "\t resultmap(potential alerts):", len(resultMap)) zap.S().Debugf("ruleid:", r.ID(), "\t resultmap(potential alerts):", len(resultMap))
for _, sample := range resultMap { for _, sample := range resultMap {
// check alert rule condition before dumping results // check alert rule condition before dumping results, if sendUnmatchedResults
if r.CheckCondition(sample.Point.V) { // is set then add results irrespective of condition
if r.opts.SendUnmatched || r.CheckCondition(sample.Point.V) {
result = append(result, sample) result = append(result, sample)
} }
} }
@ -549,7 +555,6 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Querie
defer r.mtx.Unlock() defer r.mtx.Unlock()
resultFPs := map[uint64]struct{}{} resultFPs := map[uint64]struct{}{}
var vec Vector
var alerts = make(map[uint64]*Alert, len(res)) var alerts = make(map[uint64]*Alert, len(res))
for _, smpl := range res { for _, smpl := range res {
@ -563,6 +568,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Querie
// who are not used to Go's templating system. // who are not used to Go's templating system.
defs := "{{$labels := .Labels}}{{$value := .Value}}" defs := "{{$labels := .Labels}}{{$value := .Value}}"
// utility function to apply go template on labels and annots
expand := func(text string) string { expand := func(text string) string {
tmpl := NewTemplateExpander( tmpl := NewTemplateExpander(
@ -662,8 +668,8 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Querie
} }
r.health = HealthGood r.health = HealthGood
r.lastError = err r.lastError = err
return vec, nil
return len(r.active), nil
} }
func (r *ThresholdRule) String() string { func (r *ThresholdRule) String() string {

View File

@ -24,6 +24,10 @@ const (
AlertRuleIdLabel = "ruleId" AlertRuleIdLabel = "ruleId"
RuleSourceLabel = "ruleSource" RuleSourceLabel = "ruleSource"
RuleThresholdLabel = "threshold"
AlertAdditionalInfoLabel = "additionalInfo"
AlertSummaryLabel = "summary"
) )
// Label is a key/value pair of strings. // Label is a key/value pair of strings.