fix: test notification missing for anomaly alert (#6391)

This commit is contained in:
Srikanth Chekuri 2024-11-08 21:10:09 +05:30 committed by GitHub
parent 22c10f9479
commit 831540eaf0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 293 additions and 112 deletions

View File

@ -31,7 +31,6 @@ import (
"go.signoz.io/signoz/ee/query-service/rules"
baseauth "go.signoz.io/signoz/pkg/query-service/auth"
"go.signoz.io/signoz/pkg/query-service/migrate"
"go.signoz.io/signoz/pkg/query-service/model"
v3 "go.signoz.io/signoz/pkg/query-service/model/v3"
licensepkg "go.signoz.io/signoz/ee/query-service/license"
@ -348,7 +347,7 @@ func (s *Server) createPublicServer(apiHandler *api.APIHandler) (*http.Server, e
}
if user.User.OrgId == "" {
return nil, model.UnauthorizedError(errors.New("orgId is missing in the claims"))
return nil, basemodel.UnauthorizedError(errors.New("orgId is missing in the claims"))
}
return user, nil
@ -765,8 +764,9 @@ func makeRulesManager(
Cache: cache,
EvalDelay: baseconst.GetEvalDelay(),
PrepareTaskFunc: rules.PrepareTaskFunc,
UseLogsNewSchema: useLogsNewSchema,
PrepareTaskFunc: rules.PrepareTaskFunc,
PrepareTestRuleFunc: rules.TestNotification,
UseLogsNewSchema: useLogsNewSchema,
}
// create Manager

View File

@ -1,10 +1,15 @@
package rules
import (
"context"
"fmt"
"time"
"github.com/google/uuid"
basemodel "go.signoz.io/signoz/pkg/query-service/model"
baserules "go.signoz.io/signoz/pkg/query-service/rules"
"go.signoz.io/signoz/pkg/query-service/utils/labels"
"go.uber.org/zap"
)
func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error) {
@ -79,6 +84,106 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
return task, nil
}
// TestNotification prepares a dummy rule for given rule parameters and
// sends a test notification. returns alert count and error (if any)
func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.ApiError) {
ctx := context.Background()
if opts.Rule == nil {
return 0, basemodel.BadRequest(fmt.Errorf("rule is required"))
}
parsedRule := opts.Rule
var alertname = parsedRule.AlertName
if alertname == "" {
// alertname is not mandatory for testing, so picking
// a random string here
alertname = uuid.New().String()
}
// append name to indicate this is test alert
parsedRule.AlertName = fmt.Sprintf("%s%s", alertname, baserules.TestAlertPostFix)
var rule baserules.Rule
var err error
if parsedRule.RuleType == baserules.RuleTypeThreshold {
// add special labels for test alerts
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
parsedRule.Labels[labels.RuleSourceLabel] = ""
parsedRule.Labels[labels.AlertRuleIdLabel] = ""
// create a threshold rule
rule, err = baserules.NewThresholdRule(
alertname,
parsedRule,
opts.FF,
opts.Reader,
opts.UseLogsNewSchema,
baserules.WithSendAlways(),
baserules.WithSendUnmatched(),
)
if err != nil {
zap.L().Error("failed to prepare a new threshold rule for test", zap.String("name", rule.Name()), zap.Error(err))
return 0, basemodel.BadRequest(err)
}
} else if parsedRule.RuleType == baserules.RuleTypeProm {
// create promql rule
rule, err = baserules.NewPromRule(
alertname,
parsedRule,
opts.Logger,
opts.Reader,
opts.ManagerOpts.PqlEngine,
baserules.WithSendAlways(),
baserules.WithSendUnmatched(),
)
if err != nil {
zap.L().Error("failed to prepare a new promql rule for test", zap.String("name", rule.Name()), zap.Error(err))
return 0, basemodel.BadRequest(err)
}
} else if parsedRule.RuleType == baserules.RuleTypeAnomaly {
// create anomaly rule
rule, err = NewAnomalyRule(
alertname,
parsedRule,
opts.FF,
opts.Reader,
opts.Cache,
baserules.WithSendAlways(),
baserules.WithSendUnmatched(),
)
if err != nil {
zap.L().Error("failed to prepare a new anomaly rule for test", zap.String("name", rule.Name()), zap.Error(err))
return 0, basemodel.BadRequest(err)
}
} else {
return 0, basemodel.BadRequest(fmt.Errorf("failed to derive ruletype with given information"))
}
// set timestamp to current utc time
ts := time.Now().UTC()
count, err := rule.Eval(ctx, ts)
if err != nil {
zap.L().Error("evaluating rule failed", zap.String("rule", rule.Name()), zap.Error(err))
return 0, basemodel.InternalError(fmt.Errorf("rule evaluation failed"))
}
alertsFound, ok := count.(int)
if !ok {
return 0, basemodel.InternalError(fmt.Errorf("something went wrong"))
}
rule.SendAlerts(ctx, ts, 0, time.Duration(1*time.Minute), opts.NotifyFunc)
return alertsFound, nil
}
// newTask returns an appropriate group for
// rule type
func newTask(taskType baserules.TaskType, name string, frequency time.Duration, rules []baserules.Rule, opts *baserules.ManagerOptions, notify baserules.NotifyFunc, ruleDB baserules.RuleDB) baserules.Task {

View File

@ -617,6 +617,7 @@ type AlertsInfo struct {
TotalAlerts int `json:"totalAlerts"`
LogsBasedAlerts int `json:"logsBasedAlerts"`
MetricBasedAlerts int `json:"metricBasedAlerts"`
AnomalyBasedAlerts int `json:"anomalyBasedAlerts"`
TracesBasedAlerts int `json:"tracesBasedAlerts"`
TotalChannels int `json:"totalChannels"`
SlackChannels int `json:"slackChannels"`

View File

@ -42,16 +42,6 @@ var (
// this file contains api request and responses to be
// served over http
// newApiErrorInternal returns a new api error object of type internal
func newApiErrorInternal(err error) *model.ApiError {
return &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
// newApiErrorBadData returns a new api error object of bad request type
func newApiErrorBadData(err error) *model.ApiError {
return &model.ApiError{Typ: model.ErrorBadData, Err: err}
}
// PostableRule is used to create alerting rule from HTTP api
type PostableRule struct {
AlertName string `yaml:"alert,omitempty" json:"alert,omitempty"`

View File

@ -8,7 +8,7 @@ import (
func TestIsAllQueriesDisabled(t *testing.T) {
testCases := []*v3.CompositeQuery{
&v3.CompositeQuery{
{
BuilderQueries: map[string]*v3.BuilderQuery{
"query1": {
Disabled: true,
@ -20,10 +20,10 @@ func TestIsAllQueriesDisabled(t *testing.T) {
QueryType: v3.QueryTypeBuilder,
},
nil,
&v3.CompositeQuery{
{
QueryType: v3.QueryTypeBuilder,
},
&v3.CompositeQuery{
{
QueryType: v3.QueryTypeBuilder,
BuilderQueries: map[string]*v3.BuilderQuery{
"query1": {
@ -34,10 +34,10 @@ func TestIsAllQueriesDisabled(t *testing.T) {
},
},
},
&v3.CompositeQuery{
{
QueryType: v3.QueryTypePromQL,
},
&v3.CompositeQuery{
{
QueryType: v3.QueryTypePromQL,
PromQueries: map[string]*v3.PromQuery{
"query3": {
@ -45,7 +45,7 @@ func TestIsAllQueriesDisabled(t *testing.T) {
},
},
},
&v3.CompositeQuery{
{
QueryType: v3.QueryTypePromQL,
PromQueries: map[string]*v3.PromQuery{
"query3": {
@ -53,10 +53,10 @@ func TestIsAllQueriesDisabled(t *testing.T) {
},
},
},
&v3.CompositeQuery{
{
QueryType: v3.QueryTypeClickHouseSQL,
},
&v3.CompositeQuery{
{
QueryType: v3.QueryTypeClickHouseSQL,
ClickHouseQueries: map[string]*v3.ClickHouseQuery{
"query4": {
@ -64,7 +64,7 @@ func TestIsAllQueriesDisabled(t *testing.T) {
},
},
},
&v3.CompositeQuery{
{
QueryType: v3.QueryTypeClickHouseSQL,
ClickHouseQueries: map[string]*v3.ClickHouseQuery{
"query4": {

View File

@ -599,6 +599,9 @@ func (r *ruleDB) GetAlertsInfo(ctx context.Context) (*model.AlertsInfo, error) {
}
}
}
if rule.RuleType == RuleTypeAnomaly {
alertsInfo.AnomalyBasedAlerts = alertsInfo.AnomalyBasedAlerts + 1
}
} else if rule.AlertType == AlertTypeTraces {
alertsInfo.TracesBasedAlerts = alertsInfo.TracesBasedAlerts + 1
}

View File

@ -10,8 +10,6 @@ import (
"sync"
"time"
"github.com/google/uuid"
"go.uber.org/zap"
"errors"
@ -24,7 +22,6 @@ import (
"go.signoz.io/signoz/pkg/query-service/model"
pqle "go.signoz.io/signoz/pkg/query-service/pqlEngine"
"go.signoz.io/signoz/pkg/query-service/telemetry"
"go.signoz.io/signoz/pkg/query-service/utils/labels"
)
type PrepareTaskOptions struct {
@ -41,6 +38,19 @@ type PrepareTaskOptions struct {
UseLogsNewSchema bool
}
type PrepareTestRuleOptions struct {
Rule *PostableRule
RuleDB RuleDB
Logger *zap.Logger
Reader interfaces.Reader
Cache cache.Cache
FF interfaces.FeatureLookup
ManagerOpts *ManagerOptions
NotifyFunc NotifyFunc
UseLogsNewSchema bool
}
const taskNamesuffix = "webAppEditor"
func RuleIdFromTaskName(n string) string {
@ -81,6 +91,8 @@ type ManagerOptions struct {
PrepareTaskFunc func(opts PrepareTaskOptions) (Task, error)
PrepareTestRuleFunc func(opts PrepareTestRuleOptions) (int, *model.ApiError)
UseLogsNewSchema bool
}
@ -99,10 +111,11 @@ type Manager struct {
logger *zap.Logger
featureFlags interfaces.FeatureLookup
reader interfaces.Reader
cache cache.Cache
prepareTaskFunc func(opts PrepareTaskOptions) (Task, error)
featureFlags interfaces.FeatureLookup
reader interfaces.Reader
cache cache.Cache
prepareTaskFunc func(opts PrepareTaskOptions) (Task, error)
prepareTestRuleFunc func(opts PrepareTestRuleOptions) (int, *model.ApiError)
UseLogsNewSchema bool
}
@ -123,6 +136,9 @@ func defaultOptions(o *ManagerOptions) *ManagerOptions {
if o.PrepareTaskFunc == nil {
o.PrepareTaskFunc = defaultPrepareTaskFunc
}
if o.PrepareTestRuleFunc == nil {
o.PrepareTestRuleFunc = defaultTestNotification
}
return o
}
@ -203,17 +219,18 @@ func NewManager(o *ManagerOptions) (*Manager, error) {
telemetry.GetInstance().SetAlertsInfoCallback(db.GetAlertsInfo)
m := &Manager{
tasks: map[string]Task{},
rules: map[string]Rule{},
notifier: notifier,
ruleDB: db,
opts: o,
block: make(chan struct{}),
logger: o.Logger,
featureFlags: o.FeatureFlags,
reader: o.Reader,
cache: o.Cache,
prepareTaskFunc: o.PrepareTaskFunc,
tasks: map[string]Task{},
rules: map[string]Rule{},
notifier: notifier,
ruleDB: db,
opts: o,
block: make(chan struct{}),
logger: o.Logger,
featureFlags: o.FeatureFlags,
reader: o.Reader,
cache: o.Cache,
prepareTaskFunc: o.PrepareTaskFunc,
prepareTestRuleFunc: o.PrepareTestRuleFunc,
}
return m, nil
}
@ -788,78 +805,20 @@ func (m *Manager) TestNotification(ctx context.Context, ruleStr string) (int, *m
parsedRule, err := ParsePostableRule([]byte(ruleStr))
if err != nil {
return 0, newApiErrorBadData(err)
return 0, model.BadRequest(err)
}
var alertname = parsedRule.AlertName
if alertname == "" {
// alertname is not mandatory for testing, so picking
// a random string here
alertname = uuid.New().String()
}
alertCount, apiErr := m.prepareTestRuleFunc(PrepareTestRuleOptions{
Rule: parsedRule,
RuleDB: m.ruleDB,
Logger: m.logger,
Reader: m.reader,
Cache: m.cache,
FF: m.featureFlags,
ManagerOpts: m.opts,
NotifyFunc: m.prepareNotifyFunc(),
UseLogsNewSchema: m.opts.UseLogsNewSchema,
})
// append name to indicate this is test alert
parsedRule.AlertName = fmt.Sprintf("%s%s", alertname, TestAlertPostFix)
var rule Rule
if parsedRule.RuleType == RuleTypeThreshold {
// add special labels for test alerts
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
parsedRule.Labels[labels.RuleSourceLabel] = ""
parsedRule.Labels[labels.AlertRuleIdLabel] = ""
// create a threshold rule
rule, err = NewThresholdRule(
alertname,
parsedRule,
m.featureFlags,
m.reader,
m.opts.UseLogsNewSchema,
WithSendAlways(),
WithSendUnmatched(),
)
if err != nil {
zap.L().Error("failed to prepare a new threshold rule for test", zap.String("name", rule.Name()), zap.Error(err))
return 0, newApiErrorBadData(err)
}
} else if parsedRule.RuleType == RuleTypeProm {
// create promql rule
rule, err = NewPromRule(
alertname,
parsedRule,
m.logger,
m.reader,
m.opts.PqlEngine,
WithSendAlways(),
WithSendUnmatched(),
)
if err != nil {
zap.L().Error("failed to prepare a new promql rule for test", zap.String("name", rule.Name()), zap.Error(err))
return 0, newApiErrorBadData(err)
}
} else {
return 0, newApiErrorBadData(fmt.Errorf("failed to derive ruletype with given information"))
}
// set timestamp to current utc time
ts := time.Now().UTC()
count, err := rule.Eval(ctx, ts)
if err != nil {
zap.L().Error("evaluating rule failed", zap.String("rule", rule.Name()), zap.Error(err))
return 0, newApiErrorInternal(fmt.Errorf("rule evaluation failed"))
}
alertsFound, ok := count.(int)
if !ok {
return 0, newApiErrorInternal(fmt.Errorf("something went wrong"))
}
rule.SendAlerts(ctx, ts, 0, time.Duration(1*time.Minute), m.prepareNotifyFunc())
return alertsFound, nil
return alertCount, apiErr
}

View File

@ -233,6 +233,7 @@ func AlertTemplateData(labels map[string]string, value string, threshold string)
// consistent across the platform.
// If there is a go template block, it won't be replaced.
// The example for existing go template block is: {{$threshold}} or {{$value}} or any other valid go template syntax.
// See templates_test.go for examples.
func (te *TemplateExpander) preprocessTemplate() {
// Handle the $variable syntax
reDollar := regexp.MustCompile(`({{.*?}})|(\$(\w+(?:\.\w+)*))`)
@ -256,6 +257,19 @@ func (te *TemplateExpander) preprocessTemplate() {
rest := submatches[2]
return fmt.Sprintf(`{{index .Labels "%s"%s}}`, path, rest)
})
// Handle the {{$variable}} syntax
// skip the special case for {{$threshold}} and {{$value}}
reVariable := regexp.MustCompile(`{{\s*\$\s*([a-zA-Z0-9_.]+)\s*}}`)
te.text = reVariable.ReplaceAllStringFunc(te.text, func(match string) string {
if strings.HasPrefix(match, "{{$threshold}}") || strings.HasPrefix(match, "{{$value}}") {
return match
}
// get the variable name from {{$variable}} syntax
variable := strings.TrimPrefix(match, "{{$")
variable = strings.TrimSuffix(variable, "}}")
return fmt.Sprintf(`{{index .Labels "%s"}}`, variable)
})
}
// Funcs adds the functions in fm to the Expander's function map.
@ -335,6 +349,7 @@ func (te TemplateExpander) ExpandHTML(templateFiles []string) (result string, re
// ParseTest parses the templates and returns the error if any.
func (te TemplateExpander) ParseTest() error {
te.preprocessTemplate()
_, err := text_template.New(te.name).Funcs(te.funcMap).Option("missingkey=zero").Parse(te.text)
if err != nil {
return err

View File

@ -74,3 +74,14 @@ func TestTemplateExpander_WithLablesDotSyntax(t *testing.T) {
}
require.Equal(t, "test my-service exceeds 100 and observed at 200", result)
}
func TestTemplateExpander_WithVariableSyntax(t *testing.T) {
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
data := AlertTemplateData(map[string]string{"service.name": "my-service"}, "200", "100")
expander := NewTemplateExpander(context.Background(), defs+"test {{$service.name}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, times.Time(time.Now().Unix()), nil)
result, err := expander.Expand()
if err != nil {
t.Fatal(err)
}
require.Equal(t, "test my-service exceeds 100 and observed at 200", result)
}

View File

@ -0,0 +1,97 @@
package rules
import (
"context"
"fmt"
"time"
"github.com/google/uuid"
"go.signoz.io/signoz/pkg/query-service/model"
"go.signoz.io/signoz/pkg/query-service/utils/labels"
"go.uber.org/zap"
)
// TestNotification prepares a dummy rule for given rule parameters and
// sends a test notification. returns alert count and error (if any)
func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError) {
ctx := context.Background()
if opts.Rule == nil {
return 0, model.BadRequest(fmt.Errorf("rule is required"))
}
parsedRule := opts.Rule
var alertname = parsedRule.AlertName
if alertname == "" {
// alertname is not mandatory for testing, so picking
// a random string here
alertname = uuid.New().String()
}
// append name to indicate this is test alert
parsedRule.AlertName = fmt.Sprintf("%s%s", alertname, TestAlertPostFix)
var rule Rule
var err error
if parsedRule.RuleType == RuleTypeThreshold {
// add special labels for test alerts
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
parsedRule.Labels[labels.RuleSourceLabel] = ""
parsedRule.Labels[labels.AlertRuleIdLabel] = ""
// create a threshold rule
rule, err = NewThresholdRule(
alertname,
parsedRule,
opts.FF,
opts.Reader,
opts.UseLogsNewSchema,
WithSendAlways(),
WithSendUnmatched(),
)
if err != nil {
zap.L().Error("failed to prepare a new threshold rule for test", zap.String("name", rule.Name()), zap.Error(err))
return 0, model.BadRequest(err)
}
} else if parsedRule.RuleType == RuleTypeProm {
// create promql rule
rule, err = NewPromRule(
alertname,
parsedRule,
opts.Logger,
opts.Reader,
opts.ManagerOpts.PqlEngine,
WithSendAlways(),
WithSendUnmatched(),
)
if err != nil {
zap.L().Error("failed to prepare a new promql rule for test", zap.String("name", rule.Name()), zap.Error(err))
return 0, model.BadRequest(err)
}
} else {
return 0, model.BadRequest(fmt.Errorf("failed to derive ruletype with given information"))
}
// set timestamp to current utc time
ts := time.Now().UTC()
count, err := rule.Eval(ctx, ts)
if err != nil {
zap.L().Error("evaluating rule failed", zap.String("rule", rule.Name()), zap.Error(err))
return 0, model.InternalError(fmt.Errorf("rule evaluation failed"))
}
alertsFound, ok := count.(int)
if !ok {
return 0, model.InternalError(fmt.Errorf("something went wrong"))
}
rule.SendAlerts(ctx, ts, 0, time.Duration(1*time.Minute), opts.NotifyFunc)
return alertsFound, nil
}