signoz/pkg/query-service/rules/test_notification.go
Vibhu Pandey 1f33928bf9
feat(alertmanager): integrate with ruler (#7222)
### Summary

Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts:

#### Frontend
- The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. 

For instance, we have gone from the response `{
    "status": "success",
    "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"......
}` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......`

- `msteams` has been changed to `msteamsv2` wherever applicable

#### Ruler
The following changes have been done in the ruler component:

- Removal of the old alertmanager and notifier
- The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional
- Introduction of a new `testPrepareNotifyFunc` for sending test notifications
- Integration with the new alertmanager

#### Alertmanager
Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler

- Addition of ruleId based matching
- Support for marshalling the global configuration directly from the upstream alertmanager
- Addition of orgId to the legacy alertmanager
- Support for always adding defaults to both routes and receivers while creating them
- Migration to create the required alertmanager tables
- Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one.

#### Related Issues / PR's

Closes https://github.com/SigNoz/platform-pod/issues/404
Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-09 20:00:42 +00:00

101 lines
2.7 KiB
Go

package rules
import (
"context"
"fmt"
"time"
"github.com/google/uuid"
"go.signoz.io/signoz/pkg/query-service/model"
"go.signoz.io/signoz/pkg/query-service/utils/labels"
"go.uber.org/zap"
)
// TestNotification prepares a dummy rule for given rule parameters and
// sends a test notification. returns alert count and error (if any)
func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError) {
ctx := context.Background()
if opts.Rule == nil {
return 0, model.BadRequest(fmt.Errorf("rule is required"))
}
parsedRule := opts.Rule
var alertname = parsedRule.AlertName
if alertname == "" {
// alertname is not mandatory for testing, so picking
// a random string here
alertname = uuid.New().String()
}
// append name to indicate this is test alert
parsedRule.AlertName = fmt.Sprintf("%s%s", alertname, TestAlertPostFix)
var rule Rule
var err error
if parsedRule.RuleType == RuleTypeThreshold {
// add special labels for test alerts
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
parsedRule.Labels[labels.RuleSourceLabel] = ""
parsedRule.Labels[labels.AlertRuleIdLabel] = ""
// create a threshold rule
rule, err = NewThresholdRule(
alertname,
parsedRule,
opts.FF,
opts.Reader,
opts.UseLogsNewSchema,
opts.UseTraceNewSchema,
WithSendAlways(),
WithSendUnmatched(),
WithSQLStore(opts.SQLStore),
)
if err != nil {
zap.L().Error("failed to prepare a new threshold rule for test", zap.String("name", rule.Name()), zap.Error(err))
return 0, model.BadRequest(err)
}
} else if parsedRule.RuleType == RuleTypeProm {
// create promql rule
rule, err = NewPromRule(
alertname,
parsedRule,
opts.Logger,
opts.Reader,
opts.ManagerOpts.PqlEngine,
WithSendAlways(),
WithSendUnmatched(),
WithSQLStore(opts.SQLStore),
)
if err != nil {
zap.L().Error("failed to prepare a new promql rule for test", zap.String("name", rule.Name()), zap.Error(err))
return 0, model.BadRequest(err)
}
} else {
return 0, model.BadRequest(fmt.Errorf("failed to derive ruletype with given information"))
}
// set timestamp to current utc time
ts := time.Now().UTC()
count, err := rule.Eval(ctx, ts)
if err != nil {
zap.L().Error("evaluating rule failed", zap.String("rule", rule.Name()), zap.Error(err))
return 0, model.InternalError(fmt.Errorf("rule evaluation failed"))
}
alertsFound, ok := count.(int)
if !ok {
return 0, model.InternalError(fmt.Errorf("something went wrong"))
}
rule.SendAlerts(ctx, ts, 0, time.Duration(1*time.Minute), opts.NotifyFunc)
return alertsFound, nil
}