Vibhu Pandey 1f33928bf9
feat(alertmanager): integrate with ruler (#7222)
### Summary

Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts:

#### Frontend
- The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. 

For instance, we have gone from the response `{
    "status": "success",
    "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"......
}` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......`

- `msteams` has been changed to `msteamsv2` wherever applicable

#### Ruler
The following changes have been done in the ruler component:

- Removal of the old alertmanager and notifier
- The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional
- Introduction of a new `testPrepareNotifyFunc` for sending test notifications
- Integration with the new alertmanager

#### Alertmanager
Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler

- Addition of ruleId based matching
- Support for marshalling the global configuration directly from the upstream alertmanager
- Addition of orgId to the legacy alertmanager
- Support for always adding defaults to both routes and receivers while creating them
- Migration to create the required alertmanager tables
- Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one.

#### Related Issues / PR's

Closes https://github.com/SigNoz/platform-pod/issues/404
Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-09 20:00:42 +00:00

105 lines
4.4 KiB
Go

package alertmanagerserver
import (
"net/url"
"time"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/common/model"
"go.signoz.io/signoz/pkg/types/alertmanagertypes"
)
type Config struct {
// The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself.
// See https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L155C54-L155C249
ExternalURL *url.URL `mapstructure:"external_url"`
// GlobalConfig is the global configuration for the alertmanager
Global alertmanagertypes.GlobalConfig `mapstructure:"global" yaml:"global"`
// Config of the root node of the routing tree.
Route alertmanagertypes.RouteConfig `mapstructure:"route"`
// Configuration for alerts.
Alerts AlertsConfig `mapstructure:"alerts"`
// Configuration for silences.
Silences SilencesConfig `mapstructure:"silences"`
// Configuration for the notification log.
NFLog NFLogConfig `mapstructure:"nflog"`
}
type AlertsConfig struct {
// Interval between garbage collection of alerts.
// See https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L152
GCInterval time.Duration `mapstructure:"gc_interval"`
}
type SilencesConfig struct {
// Maximum number of silences, including expired silences. If negative or zero, no limit is set.
// See https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L150C64-L150C157
Max int `mapstructure:"max"`
// Maximum size of the silences in bytes. If negative or zero, no limit is set.
// See https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L150C64-L150C157
MaxSizeBytes int `mapstructure:"max_size_bytes"`
// Interval between garbage collection and snapshotting of the silences. The snapshot will be stored in the state store.
// The upstream alertmanager config (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L149) has
// been split between silences and nflog.
MaintenanceInterval time.Duration `mapstructure:"maintenance_interval"`
// Retention of the silences.
Retention time.Duration `mapstructure:"retention"`
}
type NFLogConfig struct {
// Interval between garbage collection and snapshotting of the notification logs. The snapshot will be stored in the state store.
// The upstream alertmanager config (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L149) has
// been split between silences and nflog.
MaintenanceInterval time.Duration `mapstructure:"maintenance_interval"`
// Retention of the notification logs.
Retention time.Duration `mapstructure:"retention"`
}
func NewConfig() Config {
return Config{
ExternalURL: &url.URL{
Scheme: "http",
Host: "localhost:8080",
},
Global: alertmanagertypes.GlobalConfig{
// Corresponds to the default in upstream (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/config/config.go#L727)
ResolveTimeout: model.Duration(5 * time.Minute),
SMTPHello: "localhost",
SMTPFrom: "alertmanager@signoz.io",
SMTPSmarthost: config.HostPort{Host: "localhost", Port: "25"},
SMTPRequireTLS: true,
},
Route: alertmanagertypes.RouteConfig{
GroupByStr: []string{"alertname"},
GroupInterval: 5 * time.Minute,
GroupWait: 30 * time.Second,
RepeatInterval: 4 * time.Hour,
},
// Corresponds to the default in upstream (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L152)
Alerts: AlertsConfig{
GCInterval: 30 * time.Minute,
},
// Corresponds to the default in upstream (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L149-L151)
Silences: SilencesConfig{
Max: 0,
MaxSizeBytes: 0,
MaintenanceInterval: 15 * time.Minute,
Retention: 120 * time.Hour,
},
// Corresponds to the default in upstream (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L149)
NFLog: NFLogConfig{
MaintenanceInterval: 15 * time.Minute,
Retention: 120 * time.Hour,
},
}
}