mirror of
https://git.mirrors.martin98.com/https://github.com/SigNoz/signoz
synced 2025-06-04 11:25:52 +08:00

### Summary Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts: #### Frontend - The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. For instance, we have gone from the response `{ "status": "success", "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"...... }` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......` - `msteams` has been changed to `msteamsv2` wherever applicable #### Ruler The following changes have been done in the ruler component: - Removal of the old alertmanager and notifier - The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional - Introduction of a new `testPrepareNotifyFunc` for sending test notifications - Integration with the new alertmanager #### Alertmanager Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler - Addition of ruleId based matching - Support for marshalling the global configuration directly from the upstream alertmanager - Addition of orgId to the legacy alertmanager - Support for always adding defaults to both routes and receivers while creating them - Migration to create the required alertmanager tables - Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one. #### Related Issues / PR's Closes https://github.com/SigNoz/platform-pod/issues/404 Closes https://github.com/SigNoz/platform-pod/issues/176
101 lines
2.7 KiB
Go
101 lines
2.7 KiB
Go
package rules
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"go.signoz.io/signoz/pkg/query-service/model"
|
|
"go.signoz.io/signoz/pkg/query-service/utils/labels"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// TestNotification prepares a dummy rule for given rule parameters and
|
|
// sends a test notification. returns alert count and error (if any)
|
|
func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError) {
|
|
|
|
ctx := context.Background()
|
|
|
|
if opts.Rule == nil {
|
|
return 0, model.BadRequest(fmt.Errorf("rule is required"))
|
|
}
|
|
|
|
parsedRule := opts.Rule
|
|
var alertname = parsedRule.AlertName
|
|
if alertname == "" {
|
|
// alertname is not mandatory for testing, so picking
|
|
// a random string here
|
|
alertname = uuid.New().String()
|
|
}
|
|
|
|
// append name to indicate this is test alert
|
|
parsedRule.AlertName = fmt.Sprintf("%s%s", alertname, TestAlertPostFix)
|
|
|
|
var rule Rule
|
|
var err error
|
|
|
|
if parsedRule.RuleType == RuleTypeThreshold {
|
|
|
|
// add special labels for test alerts
|
|
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
|
|
parsedRule.Labels[labels.RuleSourceLabel] = ""
|
|
parsedRule.Labels[labels.AlertRuleIdLabel] = ""
|
|
|
|
// create a threshold rule
|
|
rule, err = NewThresholdRule(
|
|
alertname,
|
|
parsedRule,
|
|
opts.FF,
|
|
opts.Reader,
|
|
opts.UseLogsNewSchema,
|
|
opts.UseTraceNewSchema,
|
|
WithSendAlways(),
|
|
WithSendUnmatched(),
|
|
WithSQLStore(opts.SQLStore),
|
|
)
|
|
|
|
if err != nil {
|
|
zap.L().Error("failed to prepare a new threshold rule for test", zap.String("name", rule.Name()), zap.Error(err))
|
|
return 0, model.BadRequest(err)
|
|
}
|
|
|
|
} else if parsedRule.RuleType == RuleTypeProm {
|
|
|
|
// create promql rule
|
|
rule, err = NewPromRule(
|
|
alertname,
|
|
parsedRule,
|
|
opts.Logger,
|
|
opts.Reader,
|
|
opts.ManagerOpts.PqlEngine,
|
|
WithSendAlways(),
|
|
WithSendUnmatched(),
|
|
WithSQLStore(opts.SQLStore),
|
|
)
|
|
|
|
if err != nil {
|
|
zap.L().Error("failed to prepare a new promql rule for test", zap.String("name", rule.Name()), zap.Error(err))
|
|
return 0, model.BadRequest(err)
|
|
}
|
|
} else {
|
|
return 0, model.BadRequest(fmt.Errorf("failed to derive ruletype with given information"))
|
|
}
|
|
|
|
// set timestamp to current utc time
|
|
ts := time.Now().UTC()
|
|
|
|
count, err := rule.Eval(ctx, ts)
|
|
if err != nil {
|
|
zap.L().Error("evaluating rule failed", zap.String("rule", rule.Name()), zap.Error(err))
|
|
return 0, model.InternalError(fmt.Errorf("rule evaluation failed"))
|
|
}
|
|
alertsFound, ok := count.(int)
|
|
if !ok {
|
|
return 0, model.InternalError(fmt.Errorf("something went wrong"))
|
|
}
|
|
rule.SendAlerts(ctx, ts, 0, time.Duration(1*time.Minute), opts.NotifyFunc)
|
|
|
|
return alertsFound, nil
|
|
}
|