diff --git a/ee/query-service/app/server.go b/ee/query-service/app/server.go index cf54693ba8..1c44338a77 100644 --- a/ee/query-service/app/server.go +++ b/ee/query-service/app/server.go @@ -31,7 +31,6 @@ import ( "go.signoz.io/signoz/ee/query-service/rules" baseauth "go.signoz.io/signoz/pkg/query-service/auth" "go.signoz.io/signoz/pkg/query-service/migrate" - "go.signoz.io/signoz/pkg/query-service/model" v3 "go.signoz.io/signoz/pkg/query-service/model/v3" licensepkg "go.signoz.io/signoz/ee/query-service/license" @@ -348,7 +347,7 @@ func (s *Server) createPublicServer(apiHandler *api.APIHandler) (*http.Server, e } if user.User.OrgId == "" { - return nil, model.UnauthorizedError(errors.New("orgId is missing in the claims")) + return nil, basemodel.UnauthorizedError(errors.New("orgId is missing in the claims")) } return user, nil @@ -765,8 +764,9 @@ func makeRulesManager( Cache: cache, EvalDelay: baseconst.GetEvalDelay(), - PrepareTaskFunc: rules.PrepareTaskFunc, - UseLogsNewSchema: useLogsNewSchema, + PrepareTaskFunc: rules.PrepareTaskFunc, + PrepareTestRuleFunc: rules.TestNotification, + UseLogsNewSchema: useLogsNewSchema, } // create Manager diff --git a/ee/query-service/rules/manager.go b/ee/query-service/rules/manager.go index e44bbcf82b..9843d108d8 100644 --- a/ee/query-service/rules/manager.go +++ b/ee/query-service/rules/manager.go @@ -1,10 +1,15 @@ package rules import ( + "context" "fmt" "time" + "github.com/google/uuid" + basemodel "go.signoz.io/signoz/pkg/query-service/model" baserules "go.signoz.io/signoz/pkg/query-service/rules" + "go.signoz.io/signoz/pkg/query-service/utils/labels" + "go.uber.org/zap" ) func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error) { @@ -79,6 +84,106 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error) return task, nil } +// TestNotification prepares a dummy rule for given rule parameters and +// sends a test notification. returns alert count and error (if any) +func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.ApiError) { + + ctx := context.Background() + + if opts.Rule == nil { + return 0, basemodel.BadRequest(fmt.Errorf("rule is required")) + } + + parsedRule := opts.Rule + var alertname = parsedRule.AlertName + if alertname == "" { + // alertname is not mandatory for testing, so picking + // a random string here + alertname = uuid.New().String() + } + + // append name to indicate this is test alert + parsedRule.AlertName = fmt.Sprintf("%s%s", alertname, baserules.TestAlertPostFix) + + var rule baserules.Rule + var err error + + if parsedRule.RuleType == baserules.RuleTypeThreshold { + + // add special labels for test alerts + parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target) + parsedRule.Labels[labels.RuleSourceLabel] = "" + parsedRule.Labels[labels.AlertRuleIdLabel] = "" + + // create a threshold rule + rule, err = baserules.NewThresholdRule( + alertname, + parsedRule, + opts.FF, + opts.Reader, + opts.UseLogsNewSchema, + baserules.WithSendAlways(), + baserules.WithSendUnmatched(), + ) + + if err != nil { + zap.L().Error("failed to prepare a new threshold rule for test", zap.String("name", rule.Name()), zap.Error(err)) + return 0, basemodel.BadRequest(err) + } + + } else if parsedRule.RuleType == baserules.RuleTypeProm { + + // create promql rule + rule, err = baserules.NewPromRule( + alertname, + parsedRule, + opts.Logger, + opts.Reader, + opts.ManagerOpts.PqlEngine, + baserules.WithSendAlways(), + baserules.WithSendUnmatched(), + ) + + if err != nil { + zap.L().Error("failed to prepare a new promql rule for test", zap.String("name", rule.Name()), zap.Error(err)) + return 0, basemodel.BadRequest(err) + } + } else if parsedRule.RuleType == baserules.RuleTypeAnomaly { + // create anomaly rule + rule, err = NewAnomalyRule( + alertname, + parsedRule, + opts.FF, + opts.Reader, + opts.Cache, + baserules.WithSendAlways(), + baserules.WithSendUnmatched(), + ) + if err != nil { + zap.L().Error("failed to prepare a new anomaly rule for test", zap.String("name", rule.Name()), zap.Error(err)) + return 0, basemodel.BadRequest(err) + } + } else { + return 0, basemodel.BadRequest(fmt.Errorf("failed to derive ruletype with given information")) + } + + // set timestamp to current utc time + ts := time.Now().UTC() + + count, err := rule.Eval(ctx, ts) + if err != nil { + zap.L().Error("evaluating rule failed", zap.String("rule", rule.Name()), zap.Error(err)) + return 0, basemodel.InternalError(fmt.Errorf("rule evaluation failed")) + } + alertsFound, ok := count.(int) + if !ok { + return 0, basemodel.InternalError(fmt.Errorf("something went wrong")) + } + rule.SendAlerts(ctx, ts, 0, time.Duration(1*time.Minute), opts.NotifyFunc) + + return alertsFound, nil +} + // newTask returns an appropriate group for // rule type func newTask(taskType baserules.TaskType, name string, frequency time.Duration, rules []baserules.Rule, opts *baserules.ManagerOptions, notify baserules.NotifyFunc, ruleDB baserules.RuleDB) baserules.Task { diff --git a/pkg/query-service/model/response.go b/pkg/query-service/model/response.go index 3a720aed5e..61be36f170 100644 --- a/pkg/query-service/model/response.go +++ b/pkg/query-service/model/response.go @@ -617,6 +617,7 @@ type AlertsInfo struct { TotalAlerts int `json:"totalAlerts"` LogsBasedAlerts int `json:"logsBasedAlerts"` MetricBasedAlerts int `json:"metricBasedAlerts"` + AnomalyBasedAlerts int `json:"anomalyBasedAlerts"` TracesBasedAlerts int `json:"tracesBasedAlerts"` TotalChannels int `json:"totalChannels"` SlackChannels int `json:"slackChannels"` diff --git a/pkg/query-service/rules/api_params.go b/pkg/query-service/rules/api_params.go index b3c174b147..e4c76a6d71 100644 --- a/pkg/query-service/rules/api_params.go +++ b/pkg/query-service/rules/api_params.go @@ -42,16 +42,6 @@ var ( // this file contains api request and responses to be // served over http -// newApiErrorInternal returns a new api error object of type internal -func newApiErrorInternal(err error) *model.ApiError { - return &model.ApiError{Typ: model.ErrorInternal, Err: err} -} - -// newApiErrorBadData returns a new api error object of bad request type -func newApiErrorBadData(err error) *model.ApiError { - return &model.ApiError{Typ: model.ErrorBadData, Err: err} -} - // PostableRule is used to create alerting rule from HTTP api type PostableRule struct { AlertName string `yaml:"alert,omitempty" json:"alert,omitempty"` diff --git a/pkg/query-service/rules/api_params_test.go b/pkg/query-service/rules/api_params_test.go index 6a1245d0fe..9c3092ff90 100644 --- a/pkg/query-service/rules/api_params_test.go +++ b/pkg/query-service/rules/api_params_test.go @@ -8,7 +8,7 @@ import ( func TestIsAllQueriesDisabled(t *testing.T) { testCases := []*v3.CompositeQuery{ - &v3.CompositeQuery{ + { BuilderQueries: map[string]*v3.BuilderQuery{ "query1": { Disabled: true, @@ -20,10 +20,10 @@ func TestIsAllQueriesDisabled(t *testing.T) { QueryType: v3.QueryTypeBuilder, }, nil, - &v3.CompositeQuery{ + { QueryType: v3.QueryTypeBuilder, }, - &v3.CompositeQuery{ + { QueryType: v3.QueryTypeBuilder, BuilderQueries: map[string]*v3.BuilderQuery{ "query1": { @@ -34,10 +34,10 @@ func TestIsAllQueriesDisabled(t *testing.T) { }, }, }, - &v3.CompositeQuery{ + { QueryType: v3.QueryTypePromQL, }, - &v3.CompositeQuery{ + { QueryType: v3.QueryTypePromQL, PromQueries: map[string]*v3.PromQuery{ "query3": { @@ -45,7 +45,7 @@ func TestIsAllQueriesDisabled(t *testing.T) { }, }, }, - &v3.CompositeQuery{ + { QueryType: v3.QueryTypePromQL, PromQueries: map[string]*v3.PromQuery{ "query3": { @@ -53,10 +53,10 @@ func TestIsAllQueriesDisabled(t *testing.T) { }, }, }, - &v3.CompositeQuery{ + { QueryType: v3.QueryTypeClickHouseSQL, }, - &v3.CompositeQuery{ + { QueryType: v3.QueryTypeClickHouseSQL, ClickHouseQueries: map[string]*v3.ClickHouseQuery{ "query4": { @@ -64,7 +64,7 @@ func TestIsAllQueriesDisabled(t *testing.T) { }, }, }, - &v3.CompositeQuery{ + { QueryType: v3.QueryTypeClickHouseSQL, ClickHouseQueries: map[string]*v3.ClickHouseQuery{ "query4": { diff --git a/pkg/query-service/rules/db.go b/pkg/query-service/rules/db.go index 697ea63f92..c9db38201b 100644 --- a/pkg/query-service/rules/db.go +++ b/pkg/query-service/rules/db.go @@ -599,6 +599,9 @@ func (r *ruleDB) GetAlertsInfo(ctx context.Context) (*model.AlertsInfo, error) { } } } + if rule.RuleType == RuleTypeAnomaly { + alertsInfo.AnomalyBasedAlerts = alertsInfo.AnomalyBasedAlerts + 1 + } } else if rule.AlertType == AlertTypeTraces { alertsInfo.TracesBasedAlerts = alertsInfo.TracesBasedAlerts + 1 } diff --git a/pkg/query-service/rules/manager.go b/pkg/query-service/rules/manager.go index c41d0bbe50..50ad7b5430 100644 --- a/pkg/query-service/rules/manager.go +++ b/pkg/query-service/rules/manager.go @@ -10,8 +10,6 @@ import ( "sync" "time" - "github.com/google/uuid" - "go.uber.org/zap" "errors" @@ -24,7 +22,6 @@ import ( "go.signoz.io/signoz/pkg/query-service/model" pqle "go.signoz.io/signoz/pkg/query-service/pqlEngine" "go.signoz.io/signoz/pkg/query-service/telemetry" - "go.signoz.io/signoz/pkg/query-service/utils/labels" ) type PrepareTaskOptions struct { @@ -41,6 +38,19 @@ type PrepareTaskOptions struct { UseLogsNewSchema bool } +type PrepareTestRuleOptions struct { + Rule *PostableRule + RuleDB RuleDB + Logger *zap.Logger + Reader interfaces.Reader + Cache cache.Cache + FF interfaces.FeatureLookup + ManagerOpts *ManagerOptions + NotifyFunc NotifyFunc + + UseLogsNewSchema bool +} + const taskNamesuffix = "webAppEditor" func RuleIdFromTaskName(n string) string { @@ -81,6 +91,8 @@ type ManagerOptions struct { PrepareTaskFunc func(opts PrepareTaskOptions) (Task, error) + PrepareTestRuleFunc func(opts PrepareTestRuleOptions) (int, *model.ApiError) + UseLogsNewSchema bool } @@ -99,10 +111,11 @@ type Manager struct { logger *zap.Logger - featureFlags interfaces.FeatureLookup - reader interfaces.Reader - cache cache.Cache - prepareTaskFunc func(opts PrepareTaskOptions) (Task, error) + featureFlags interfaces.FeatureLookup + reader interfaces.Reader + cache cache.Cache + prepareTaskFunc func(opts PrepareTaskOptions) (Task, error) + prepareTestRuleFunc func(opts PrepareTestRuleOptions) (int, *model.ApiError) UseLogsNewSchema bool } @@ -123,6 +136,9 @@ func defaultOptions(o *ManagerOptions) *ManagerOptions { if o.PrepareTaskFunc == nil { o.PrepareTaskFunc = defaultPrepareTaskFunc } + if o.PrepareTestRuleFunc == nil { + o.PrepareTestRuleFunc = defaultTestNotification + } return o } @@ -203,17 +219,18 @@ func NewManager(o *ManagerOptions) (*Manager, error) { telemetry.GetInstance().SetAlertsInfoCallback(db.GetAlertsInfo) m := &Manager{ - tasks: map[string]Task{}, - rules: map[string]Rule{}, - notifier: notifier, - ruleDB: db, - opts: o, - block: make(chan struct{}), - logger: o.Logger, - featureFlags: o.FeatureFlags, - reader: o.Reader, - cache: o.Cache, - prepareTaskFunc: o.PrepareTaskFunc, + tasks: map[string]Task{}, + rules: map[string]Rule{}, + notifier: notifier, + ruleDB: db, + opts: o, + block: make(chan struct{}), + logger: o.Logger, + featureFlags: o.FeatureFlags, + reader: o.Reader, + cache: o.Cache, + prepareTaskFunc: o.PrepareTaskFunc, + prepareTestRuleFunc: o.PrepareTestRuleFunc, } return m, nil } @@ -788,78 +805,20 @@ func (m *Manager) TestNotification(ctx context.Context, ruleStr string) (int, *m parsedRule, err := ParsePostableRule([]byte(ruleStr)) if err != nil { - return 0, newApiErrorBadData(err) + return 0, model.BadRequest(err) } - var alertname = parsedRule.AlertName - if alertname == "" { - // alertname is not mandatory for testing, so picking - // a random string here - alertname = uuid.New().String() - } + alertCount, apiErr := m.prepareTestRuleFunc(PrepareTestRuleOptions{ + Rule: parsedRule, + RuleDB: m.ruleDB, + Logger: m.logger, + Reader: m.reader, + Cache: m.cache, + FF: m.featureFlags, + ManagerOpts: m.opts, + NotifyFunc: m.prepareNotifyFunc(), + UseLogsNewSchema: m.opts.UseLogsNewSchema, + }) - // append name to indicate this is test alert - parsedRule.AlertName = fmt.Sprintf("%s%s", alertname, TestAlertPostFix) - - var rule Rule - - if parsedRule.RuleType == RuleTypeThreshold { - - // add special labels for test alerts - parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target) - parsedRule.Labels[labels.RuleSourceLabel] = "" - parsedRule.Labels[labels.AlertRuleIdLabel] = "" - - // create a threshold rule - rule, err = NewThresholdRule( - alertname, - parsedRule, - m.featureFlags, - m.reader, - m.opts.UseLogsNewSchema, - WithSendAlways(), - WithSendUnmatched(), - ) - - if err != nil { - zap.L().Error("failed to prepare a new threshold rule for test", zap.String("name", rule.Name()), zap.Error(err)) - return 0, newApiErrorBadData(err) - } - - } else if parsedRule.RuleType == RuleTypeProm { - - // create promql rule - rule, err = NewPromRule( - alertname, - parsedRule, - m.logger, - m.reader, - m.opts.PqlEngine, - WithSendAlways(), - WithSendUnmatched(), - ) - - if err != nil { - zap.L().Error("failed to prepare a new promql rule for test", zap.String("name", rule.Name()), zap.Error(err)) - return 0, newApiErrorBadData(err) - } - } else { - return 0, newApiErrorBadData(fmt.Errorf("failed to derive ruletype with given information")) - } - - // set timestamp to current utc time - ts := time.Now().UTC() - - count, err := rule.Eval(ctx, ts) - if err != nil { - zap.L().Error("evaluating rule failed", zap.String("rule", rule.Name()), zap.Error(err)) - return 0, newApiErrorInternal(fmt.Errorf("rule evaluation failed")) - } - alertsFound, ok := count.(int) - if !ok { - return 0, newApiErrorInternal(fmt.Errorf("something went wrong")) - } - rule.SendAlerts(ctx, ts, 0, time.Duration(1*time.Minute), m.prepareNotifyFunc()) - - return alertsFound, nil + return alertCount, apiErr } diff --git a/pkg/query-service/rules/templates.go b/pkg/query-service/rules/templates.go index b5487011ce..49db785cee 100644 --- a/pkg/query-service/rules/templates.go +++ b/pkg/query-service/rules/templates.go @@ -233,6 +233,7 @@ func AlertTemplateData(labels map[string]string, value string, threshold string) // consistent across the platform. // If there is a go template block, it won't be replaced. // The example for existing go template block is: {{$threshold}} or {{$value}} or any other valid go template syntax. +// See templates_test.go for examples. func (te *TemplateExpander) preprocessTemplate() { // Handle the $variable syntax reDollar := regexp.MustCompile(`({{.*?}})|(\$(\w+(?:\.\w+)*))`) @@ -256,6 +257,19 @@ func (te *TemplateExpander) preprocessTemplate() { rest := submatches[2] return fmt.Sprintf(`{{index .Labels "%s"%s}}`, path, rest) }) + + // Handle the {{$variable}} syntax + // skip the special case for {{$threshold}} and {{$value}} + reVariable := regexp.MustCompile(`{{\s*\$\s*([a-zA-Z0-9_.]+)\s*}}`) + te.text = reVariable.ReplaceAllStringFunc(te.text, func(match string) string { + if strings.HasPrefix(match, "{{$threshold}}") || strings.HasPrefix(match, "{{$value}}") { + return match + } + // get the variable name from {{$variable}} syntax + variable := strings.TrimPrefix(match, "{{$") + variable = strings.TrimSuffix(variable, "}}") + return fmt.Sprintf(`{{index .Labels "%s"}}`, variable) + }) } // Funcs adds the functions in fm to the Expander's function map. @@ -335,6 +349,7 @@ func (te TemplateExpander) ExpandHTML(templateFiles []string) (result string, re // ParseTest parses the templates and returns the error if any. func (te TemplateExpander) ParseTest() error { + te.preprocessTemplate() _, err := text_template.New(te.name).Funcs(te.funcMap).Option("missingkey=zero").Parse(te.text) if err != nil { return err diff --git a/pkg/query-service/rules/templates_test.go b/pkg/query-service/rules/templates_test.go index 66d958e8f3..0fc0bd779a 100644 --- a/pkg/query-service/rules/templates_test.go +++ b/pkg/query-service/rules/templates_test.go @@ -74,3 +74,14 @@ func TestTemplateExpander_WithLablesDotSyntax(t *testing.T) { } require.Equal(t, "test my-service exceeds 100 and observed at 200", result) } + +func TestTemplateExpander_WithVariableSyntax(t *testing.T) { + defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}" + data := AlertTemplateData(map[string]string{"service.name": "my-service"}, "200", "100") + expander := NewTemplateExpander(context.Background(), defs+"test {{$service.name}} exceeds {{$threshold}} and observed at {{$value}}", "test", data, times.Time(time.Now().Unix()), nil) + result, err := expander.Expand() + if err != nil { + t.Fatal(err) + } + require.Equal(t, "test my-service exceeds 100 and observed at 200", result) +} diff --git a/pkg/query-service/rules/test_notification.go b/pkg/query-service/rules/test_notification.go new file mode 100644 index 0000000000..37fb2e5f1b --- /dev/null +++ b/pkg/query-service/rules/test_notification.go @@ -0,0 +1,97 @@ +package rules + +import ( + "context" + "fmt" + "time" + + "github.com/google/uuid" + "go.signoz.io/signoz/pkg/query-service/model" + "go.signoz.io/signoz/pkg/query-service/utils/labels" + "go.uber.org/zap" +) + +// TestNotification prepares a dummy rule for given rule parameters and +// sends a test notification. returns alert count and error (if any) +func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError) { + + ctx := context.Background() + + if opts.Rule == nil { + return 0, model.BadRequest(fmt.Errorf("rule is required")) + } + + parsedRule := opts.Rule + var alertname = parsedRule.AlertName + if alertname == "" { + // alertname is not mandatory for testing, so picking + // a random string here + alertname = uuid.New().String() + } + + // append name to indicate this is test alert + parsedRule.AlertName = fmt.Sprintf("%s%s", alertname, TestAlertPostFix) + + var rule Rule + var err error + + if parsedRule.RuleType == RuleTypeThreshold { + + // add special labels for test alerts + parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target) + parsedRule.Labels[labels.RuleSourceLabel] = "" + parsedRule.Labels[labels.AlertRuleIdLabel] = "" + + // create a threshold rule + rule, err = NewThresholdRule( + alertname, + parsedRule, + opts.FF, + opts.Reader, + opts.UseLogsNewSchema, + WithSendAlways(), + WithSendUnmatched(), + ) + + if err != nil { + zap.L().Error("failed to prepare a new threshold rule for test", zap.String("name", rule.Name()), zap.Error(err)) + return 0, model.BadRequest(err) + } + + } else if parsedRule.RuleType == RuleTypeProm { + + // create promql rule + rule, err = NewPromRule( + alertname, + parsedRule, + opts.Logger, + opts.Reader, + opts.ManagerOpts.PqlEngine, + WithSendAlways(), + WithSendUnmatched(), + ) + + if err != nil { + zap.L().Error("failed to prepare a new promql rule for test", zap.String("name", rule.Name()), zap.Error(err)) + return 0, model.BadRequest(err) + } + } else { + return 0, model.BadRequest(fmt.Errorf("failed to derive ruletype with given information")) + } + + // set timestamp to current utc time + ts := time.Now().UTC() + + count, err := rule.Eval(ctx, ts) + if err != nil { + zap.L().Error("evaluating rule failed", zap.String("rule", rule.Name()), zap.Error(err)) + return 0, model.InternalError(fmt.Errorf("rule evaluation failed")) + } + alertsFound, ok := count.(int) + if !ok { + return 0, model.InternalError(fmt.Errorf("something went wrong")) + } + rule.SendAlerts(ctx, ts, 0, time.Duration(1*time.Minute), opts.NotifyFunc) + + return alertsFound, nil +}