fix: send alert default annotations for missing data alert (#5315)

This commit is contained in:
Srikanth Chekuri 2024-08-09 15:31:39 +05:30 committed by GitHub
parent a4878f6430
commit 156905afc7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 133 additions and 16 deletions

View File

@ -44,14 +44,25 @@ type ThresholdRule struct {
name string name string
source string source string
ruleCondition *RuleCondition ruleCondition *RuleCondition
evalWindow time.Duration // evalWindow is the time window used for evaluating the rule
holdDuration time.Duration // i.e each time we lookback from the current time, we look at data for the last
labels labels.Labels // evalWindow duration
annotations labels.Labels evalWindow time.Duration
// holdDuration is the duration for which the alert waits before firing
holdDuration time.Duration
// holds the static set of labels and annotations for the rule
// these are the same for all alerts created for this rule
labels labels.Labels
annotations labels.Labels
preferredChannels []string // preferredChannels is the list of channels to send the alert to
mtx sync.Mutex // if the rule is triggered
evaluationDuration time.Duration preferredChannels []string
mtx sync.Mutex
// the time it took to evaluate the rule
evaluationDuration time.Duration
// the timestamp of the last evaluation
evaluationTimestamp time.Time evaluationTimestamp time.Time
health RuleHealth health RuleHealth
@ -61,6 +72,10 @@ type ThresholdRule struct {
// map of active alerts // map of active alerts
active map[uint64]*Alert active map[uint64]*Alert
// Ever since we introduced the new metrics query builder, the version is "v4"
// for all the rules
// if the version is "v3", then we use the old querier
// if the version is "v4", then we use the new querierV2
version string version string
// temporalityMap is a map of metric name to temporality // temporalityMap is a map of metric name to temporality
// to avoid fetching temporality for the same metric multiple times // to avoid fetching temporality for the same metric multiple times
@ -70,10 +85,18 @@ type ThresholdRule struct {
opts ThresholdRuleOpts opts ThresholdRuleOpts
// lastTimestampWithDatapoints is the timestamp of the last datapoint we observed
// for this rule
// this is used for missing data alerts
lastTimestampWithDatapoints time.Time lastTimestampWithDatapoints time.Time
typ string
querier interfaces.Querier // Type of the rule
// One of ["LOGS_BASED_ALERT", "TRACES_BASED_ALERT", "METRIC_BASED_ALERT", "EXCEPTIONS_BASED_ALERT"]
typ string
// querier is used for alerts created before the introduction of new metrics query builder
querier interfaces.Querier
// querierV2 is used for alerts created after the introduction of new metrics query builder
querierV2 interfaces.Querier querierV2 interfaces.Querier
reader interfaces.Reader reader interfaces.Reader
@ -942,13 +965,11 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Querie
annotations := make(labels.Labels, 0, len(r.annotations)) annotations := make(labels.Labels, 0, len(r.annotations))
for _, a := range r.annotations { for _, a := range r.annotations {
if smpl.IsMissing {
if a.Name == labels.AlertDescriptionLabel || a.Name == labels.AlertSummaryLabel {
a.Value = labels.AlertMissingData
}
}
annotations = append(annotations, labels.Label{Name: normalizeLabelName(a.Name), Value: expand(a.Value)}) annotations = append(annotations, labels.Label{Name: normalizeLabelName(a.Name), Value: expand(a.Value)})
} }
if smpl.IsMissing {
lb.Set(labels.AlertNameLabel, "[No data] "+r.Name())
}
// Links with timestamps should go in annotations since labels // Links with timestamps should go in annotations since labels
// is used alert grouping, and we want to group alerts with the same // is used alert grouping, and we want to group alerts with the same

View File

@ -1099,3 +1099,101 @@ func TestThresholdRuleUnitCombinations(t *testing.T) {
} }
} }
} }
func TestThresholdRuleNoData(t *testing.T) {
postableRule := PostableRule{
AlertName: "Units test",
AlertType: "METRIC_BASED_ALERT",
RuleType: RuleTypeThreshold,
EvalWindow: Duration(5 * time.Minute),
Frequency: Duration(1 * time.Minute),
RuleCondition: &RuleCondition{
CompositeQuery: &v3.CompositeQuery{
QueryType: v3.QueryTypeBuilder,
BuilderQueries: map[string]*v3.BuilderQuery{
"A": {
QueryName: "A",
StepInterval: 60,
AggregateAttribute: v3.AttributeKey{
Key: "signoz_calls_total",
},
AggregateOperator: v3.AggregateOperatorSumRate,
DataSource: v3.DataSourceMetrics,
Expression: "A",
},
},
},
AlertOnAbsent: true,
},
}
fm := featureManager.StartManager()
mock, err := cmock.NewClickHouseWithQueryMatcher(nil, &queryMatcherAny{})
if err != nil {
t.Errorf("an error '%s' was not expected when opening a stub database connection", err)
}
cols := make([]cmock.ColumnType, 0)
cols = append(cols, cmock.ColumnType{Name: "value", Type: "Float64"})
cols = append(cols, cmock.ColumnType{Name: "attr", Type: "String"})
cols = append(cols, cmock.ColumnType{Name: "timestamp", Type: "String"})
cases := []struct {
values [][]interface{}
expectNoData bool
}{
{
values: [][]interface{}{},
expectNoData: true,
},
}
for idx, c := range cases {
rows := cmock.NewRows(cols, c.values)
// We are testing the eval logic after the query is run
// so we don't care about the query string here
queryString := "SELECT any"
mock.
ExpectQuery(queryString).
WillReturnRows(rows)
var target float64 = 0
postableRule.RuleCondition.CompareOp = ValueIsEq
postableRule.RuleCondition.MatchType = AtleastOnce
postableRule.RuleCondition.Target = &target
postableRule.Annotations = map[string]string{
"description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})",
"summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}",
}
options := clickhouseReader.NewOptions("", 0, 0, 0, "", "archiveNamespace")
reader := clickhouseReader.NewReaderFromClickhouseConnection(mock, options, nil, "", fm, "")
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{}, fm, reader)
rule.temporalityMap = map[string]map[v3.Temporality]bool{
"signoz_calls_total": {
v3.Delta: true,
},
}
if err != nil {
assert.NoError(t, err)
}
queriers := Queriers{
Ch: mock,
}
retVal, err := rule.Eval(context.Background(), time.Now(), &queriers)
if err != nil {
assert.NoError(t, err)
}
assert.Equal(t, 1, retVal.(int), "case %d", idx)
for _, item := range rule.active {
if c.expectNoData {
assert.True(t, strings.Contains(item.Labels.Get(labels.AlertNameLabel), "[No data]"), "case %d", idx)
} else {
assert.False(t, strings.Contains(item.Labels.Get(labels.AlertNameLabel), "[No data]"), "case %d", idx)
}
}
}
}

View File

@ -27,8 +27,6 @@ const (
RuleThresholdLabel = "threshold" RuleThresholdLabel = "threshold"
AlertSummaryLabel = "summary" AlertSummaryLabel = "summary"
AlertDescriptionLabel = "description" AlertDescriptionLabel = "description"
AlertMissingData = "Missing data"
) )
// Label is a key/value pair of strings. // Label is a key/value pair of strings.