diff --git a/conf/example.yaml b/conf/example.yaml index f0f081c2ea..0375f72928 100644 --- a/conf/example.yaml +++ b/conf/example.yaml @@ -70,26 +70,74 @@ sqlstore: ##################### APIServer ##################### apiserver: timeout: + # Default request timeout. default: 60s + # Maximum request timeout. max: 600s + # List of routes to exclude from request timeout. excluded_routes: - /api/v1/logs/tail - /api/v3/logs/livetail logging: + # List of routes to exclude from request responselogging. excluded_routes: - /api/v1/health ##################### TelemetryStore ##################### telemetrystore: - # specifies the telemetrystore provider to use. + # Specifies the telemetrystore provider to use. provider: clickhouse - clickhouse: - # The DSN to use for ClickHouse. - dsn: http://localhost:9000 # Maximum number of idle connections in the connection pool. max_idle_conns: 50 # Maximum number of open connections to the database. max_open_conns: 100 # Maximum time to wait for a connection to be established. - dial_timeout: 5s \ No newline at end of file + dial_timeout: 5s + clickhouse: + # The DSN to use for ClickHouse. + dsn: http://localhost:9000 + +##################### Alertmanager ##################### +alertmanager: + # Specifies the alertmanager provider to use. + provider: legacy + legacy: + # The API URL (with prefix) of the legacy Alertmanager instance. + api_url: http://localhost:9093/api + signoz: + # The poll interval for periodically syncing the alertmanager with the config in the store. + poll_interval: 1m + # The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself. + external_url: http://localhost:9093 + # The global configuration for the alertmanager. All the exahustive fields can be found in the upstream: https://github.com/prometheus/alertmanager/blob/efa05feffd644ba4accb526e98a8c6545d26a783/config/config.go#L833 + global: + # ResolveTimeout is the time after which an alert is declared resolved if it has not been updated. + resolve_timeout: 5m + route: + # GroupByStr is the list of labels to group alerts by. + group_by: + - alertname + # GroupInterval is the interval at which alerts are grouped. + group_interval: 1m + # GroupWait is the time to wait before sending alerts to receivers. + group_wait: 1m + # RepeatInterval is the interval at which alerts are repeated. + repeat_interval: 1h + alerts: + # Interval between garbage collection of alerts. + gc_interval: 30m + silences: + # Maximum number of silences, including expired silences. If negative or zero, no limit is set. + max: 0 + # Maximum size of the silences in bytes. If negative or zero, no limit is set. + max_size_bytes: 0 + # Interval between garbage collection and snapshotting of the silences. The snapshot will be stored in the state store. + maintenance_interval: 15m + # Retention of the silences. + retention: 120h + nflog: + # Interval between garbage collection and snapshotting of the notification logs. The snapshot will be stored in the state store. + maintenance_interval: 15m + # Retention of the notification logs. + retention: 120h diff --git a/ee/query-service/app/api/api.go b/ee/query-service/app/api/api.go index 24989c8140..6b57928c85 100644 --- a/ee/query-service/app/api/api.go +++ b/ee/query-service/app/api/api.go @@ -11,6 +11,7 @@ import ( "go.signoz.io/signoz/ee/query-service/interfaces" "go.signoz.io/signoz/ee/query-service/license" "go.signoz.io/signoz/ee/query-service/usage" + "go.signoz.io/signoz/pkg/alertmanager" baseapp "go.signoz.io/signoz/pkg/query-service/app" "go.signoz.io/signoz/pkg/query-service/app/cloudintegrations" "go.signoz.io/signoz/pkg/query-service/app/integrations" @@ -20,6 +21,7 @@ import ( basemodel "go.signoz.io/signoz/pkg/query-service/model" rules "go.signoz.io/signoz/pkg/query-service/rules" "go.signoz.io/signoz/pkg/query-service/version" + "go.signoz.io/signoz/pkg/signoz" "go.signoz.io/signoz/pkg/types/authtypes" ) @@ -51,7 +53,7 @@ type APIHandler struct { } // NewAPIHandler returns an APIHandler -func NewAPIHandler(opts APIHandlerOptions) (*APIHandler, error) { +func NewAPIHandler(opts APIHandlerOptions, signoz *signoz.SigNoz) (*APIHandler, error) { baseHandler, err := baseapp.NewAPIHandler(baseapp.APIHandlerOpts{ Reader: opts.DataConnector, @@ -67,6 +69,8 @@ func NewAPIHandler(opts APIHandlerOptions) (*APIHandler, error) { FluxInterval: opts.FluxInterval, UseLogsNewSchema: opts.UseLogsNewSchema, UseTraceNewSchema: opts.UseTraceNewSchema, + AlertmanagerAPI: alertmanager.NewAPI(signoz.Alertmanager), + Signoz: signoz, }) if err != nil { diff --git a/ee/query-service/app/api/auth.go b/ee/query-service/app/api/auth.go index 23ddeb1d0f..6ae3241975 100644 --- a/ee/query-service/app/api/auth.go +++ b/ee/query-service/app/api/auth.go @@ -134,7 +134,7 @@ func (ah *APIHandler) registerUser(w http.ResponseWriter, r *http.Request) { return } - _, registerError := baseauth.Register(ctx, req) + _, registerError := baseauth.Register(ctx, req, ah.Signoz.Alertmanager) if !registerError.IsNil() { RespondError(w, apierr, nil) return diff --git a/ee/query-service/app/server.go b/ee/query-service/app/server.go index 2b6f0e4d05..4346747a0c 100644 --- a/ee/query-service/app/server.go +++ b/ee/query-service/app/server.go @@ -23,8 +23,10 @@ import ( "go.signoz.io/signoz/ee/query-service/integrations/gateway" "go.signoz.io/signoz/ee/query-service/interfaces" "go.signoz.io/signoz/ee/query-service/rules" + "go.signoz.io/signoz/pkg/alertmanager" "go.signoz.io/signoz/pkg/http/middleware" "go.signoz.io/signoz/pkg/signoz" + "go.signoz.io/signoz/pkg/sqlstore" "go.signoz.io/signoz/pkg/types" "go.signoz.io/signoz/pkg/types/authtypes" "go.signoz.io/signoz/pkg/web" @@ -45,7 +47,6 @@ import ( "go.signoz.io/signoz/pkg/query-service/cache" baseconst "go.signoz.io/signoz/pkg/query-service/constants" "go.signoz.io/signoz/pkg/query-service/healthcheck" - basealm "go.signoz.io/signoz/pkg/query-service/integrations/alertManager" baseint "go.signoz.io/signoz/pkg/query-service/interfaces" basemodel "go.signoz.io/signoz/pkg/query-service/model" pqle "go.signoz.io/signoz/pkg/query-service/pqlEngine" @@ -176,8 +177,8 @@ func NewServer(serverOptions *ServerOptions) (*Server, error) { } <-readerReady - rm, err := makeRulesManager(serverOptions.PromConfigPath, - baseconst.GetAlertManagerApiPrefix(), + rm, err := makeRulesManager( + serverOptions.PromConfigPath, serverOptions.RuleRepoURL, serverOptions.SigNoz.SQLStore.SQLxDB(), reader, @@ -186,6 +187,8 @@ func NewServer(serverOptions *ServerOptions) (*Server, error) { lm, serverOptions.UseLogsNewSchema, serverOptions.UseTraceNewSchema, + serverOptions.SigNoz.Alertmanager, + serverOptions.SigNoz.SQLStore, ) if err != nil { @@ -268,7 +271,7 @@ func NewServer(serverOptions *ServerOptions) (*Server, error) { JWT: serverOptions.Jwt, } - apiHandler, err := api.NewAPIHandler(apiOpts) + apiHandler, err := api.NewAPIHandler(apiOpts, serverOptions.SigNoz) if err != nil { return nil, err } @@ -530,7 +533,6 @@ func (s *Server) Stop() error { func makeRulesManager( promConfigPath, - alertManagerURL string, ruleRepoURL string, db *sqlx.DB, ch baseint.Reader, @@ -538,39 +540,34 @@ func makeRulesManager( disableRules bool, fm baseint.FeatureLookup, useLogsNewSchema bool, - useTraceNewSchema bool) (*baserules.Manager, error) { - + useTraceNewSchema bool, + alertmanager alertmanager.Alertmanager, + sqlstore sqlstore.SQLStore, +) (*baserules.Manager, error) { // create engine pqle, err := pqle.FromConfigPath(promConfigPath) if err != nil { return nil, fmt.Errorf("failed to create pql engine : %v", err) } - // notifier opts - notifierOpts := basealm.NotifierOptions{ - QueueCapacity: 10000, - Timeout: 1 * time.Second, - AlertManagerURLs: []string{alertManagerURL}, - } - // create manager opts managerOpts := &baserules.ManagerOptions{ - NotifierOpts: notifierOpts, - PqlEngine: pqle, - RepoURL: ruleRepoURL, - DBConn: db, - Context: context.Background(), - Logger: zap.L(), - DisableRules: disableRules, - FeatureFlags: fm, - Reader: ch, - Cache: cache, - EvalDelay: baseconst.GetEvalDelay(), - + PqlEngine: pqle, + RepoURL: ruleRepoURL, + DBConn: db, + Context: context.Background(), + Logger: zap.L(), + DisableRules: disableRules, + FeatureFlags: fm, + Reader: ch, + Cache: cache, + EvalDelay: baseconst.GetEvalDelay(), PrepareTaskFunc: rules.PrepareTaskFunc, UseLogsNewSchema: useLogsNewSchema, UseTraceNewSchema: useTraceNewSchema, PrepareTestRuleFunc: rules.TestNotification, + Alertmanager: alertmanager, + SQLStore: sqlstore, } // create Manager diff --git a/ee/query-service/main.go b/ee/query-service/main.go index 5fab8286c9..79284d439f 100644 --- a/ee/query-service/main.go +++ b/ee/query-service/main.go @@ -7,7 +7,6 @@ import ( "os" "os/signal" "strconv" - "syscall" "time" "go.opentelemetry.io/otel/sdk/resource" @@ -150,7 +149,14 @@ func main() { zap.L().Fatal("Failed to create config", zap.Error(err)) } - signoz, err := signoz.New(context.Background(), config, signoz.NewProviderConfig()) + signoz, err := signoz.New( + context.Background(), + config, + signoz.NewCacheProviderFactories(), + signoz.NewWebProviderFactories(), + signoz.NewSQLStoreProviderFactories(), + signoz.NewTelemetryStoreProviderFactories(), + ) if err != nil { zap.L().Fatal("Failed to create signoz struct", zap.Error(err)) } @@ -198,16 +204,19 @@ func main() { zap.L().Fatal("Failed to initialize auth cache", zap.Error(err)) } - signalsChannel := make(chan os.Signal, 1) - signal.Notify(signalsChannel, os.Interrupt, syscall.SIGTERM) + signoz.Start(context.Background()) - for { - select { - case status := <-server.HealthCheckStatus(): - zap.L().Info("Received HealthCheck status: ", zap.Int("status", int(status))) - case <-signalsChannel: - zap.L().Fatal("Received OS Interrupt Signal ... ") - server.Stop() - } + if err := signoz.Wait(context.Background()); err != nil { + zap.L().Fatal("Failed to start signoz", zap.Error(err)) + } + + err = server.Stop() + if err != nil { + zap.L().Fatal("Failed to stop server", zap.Error(err)) + } + + err = signoz.Stop(context.Background()) + if err != nil { + zap.L().Fatal("Failed to stop signoz", zap.Error(err)) } } diff --git a/ee/query-service/rules/manager.go b/ee/query-service/rules/manager.go index 00e0882f36..e28afbdb2e 100644 --- a/ee/query-service/rules/manager.go +++ b/ee/query-service/rules/manager.go @@ -28,6 +28,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error) opts.UseLogsNewSchema, opts.UseTraceNewSchema, baserules.WithEvalDelay(opts.ManagerOpts.EvalDelay), + baserules.WithSQLStore(opts.SQLStore), ) if err != nil { @@ -48,6 +49,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error) opts.Logger, opts.Reader, opts.ManagerOpts.PqlEngine, + baserules.WithSQLStore(opts.SQLStore), ) if err != nil { @@ -68,6 +70,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error) opts.Reader, opts.Cache, baserules.WithEvalDelay(opts.ManagerOpts.EvalDelay), + baserules.WithSQLStore(opts.SQLStore), ) if err != nil { return task, err @@ -126,6 +129,7 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap opts.UseTraceNewSchema, baserules.WithSendAlways(), baserules.WithSendUnmatched(), + baserules.WithSQLStore(opts.SQLStore), ) if err != nil { @@ -144,6 +148,7 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap opts.ManagerOpts.PqlEngine, baserules.WithSendAlways(), baserules.WithSendUnmatched(), + baserules.WithSQLStore(opts.SQLStore), ) if err != nil { @@ -160,6 +165,7 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap opts.Cache, baserules.WithSendAlways(), baserules.WithSendUnmatched(), + baserules.WithSQLStore(opts.SQLStore), ) if err != nil { zap.L().Error("failed to prepare a new anomaly rule for test", zap.String("name", rule.Name()), zap.Error(err)) diff --git a/frontend/src/api/alerts/getTriggered.ts b/frontend/src/api/alerts/getTriggered.ts index 6955cc315c..8bad1d3da5 100644 --- a/frontend/src/api/alerts/getTriggered.ts +++ b/frontend/src/api/alerts/getTriggered.ts @@ -13,13 +13,11 @@ const getTriggered = async ( const response = await axios.get(`/alerts?${queryParams}`); - const amData = JSON.parse(response.data.data); - return { statusCode: 200, error: null, message: response.data.status, - payload: amData.data, + payload: response.data.data, }; } catch (error) { return ErrorResponseHandler(error as AxiosError); diff --git a/frontend/src/api/channels/createMsTeams.ts b/frontend/src/api/channels/createMsTeams.ts index ef9d309a97..269d7c2103 100644 --- a/frontend/src/api/channels/createMsTeams.ts +++ b/frontend/src/api/channels/createMsTeams.ts @@ -10,7 +10,7 @@ const create = async ( try { const response = await axios.post('/channels', { name: props.name, - msteams_configs: [ + msteamsv2_configs: [ { send_resolved: props.send_resolved, webhook_url: props.webhook_url, diff --git a/frontend/src/api/channels/editMsTeams.ts b/frontend/src/api/channels/editMsTeams.ts index 293688f6c2..fddc9485b7 100644 --- a/frontend/src/api/channels/editMsTeams.ts +++ b/frontend/src/api/channels/editMsTeams.ts @@ -10,7 +10,7 @@ const editMsTeams = async ( try { const response = await axios.put(`/channels/${props.id}`, { name: props.name, - msteams_configs: [ + msteamsv2_configs: [ { send_resolved: props.send_resolved, webhook_url: props.webhook_url, diff --git a/frontend/src/api/channels/testMsTeams.ts b/frontend/src/api/channels/testMsTeams.ts index 3b4fc21b23..60474e3438 100644 --- a/frontend/src/api/channels/testMsTeams.ts +++ b/frontend/src/api/channels/testMsTeams.ts @@ -10,7 +10,7 @@ const testMsTeams = async ( try { const response = await axios.post('/testChannel', { name: props.name, - msteams_configs: [ + msteamsv2_configs: [ { send_resolved: true, webhook_url: props.webhook_url, diff --git a/frontend/src/pages/ChannelsEdit/index.tsx b/frontend/src/pages/ChannelsEdit/index.tsx index 9925c84849..ba421261c8 100644 --- a/frontend/src/pages/ChannelsEdit/index.tsx +++ b/frontend/src/pages/ChannelsEdit/index.tsx @@ -53,8 +53,8 @@ function ChannelsEdit(): JSX.Element { }; } - if (value && 'msteams_configs' in value) { - const msteamsConfig = value.msteams_configs[0]; + if (value && 'msteamsv2_configs' in value) { + const msteamsConfig = value.msteamsv2_configs[0]; channel = msteamsConfig; return { type: ChannelType.MsTeams, diff --git a/go.mod b/go.mod index 87a4de0d54..dc126c3cc2 100644 --- a/go.mod +++ b/go.mod @@ -71,7 +71,6 @@ require ( go.uber.org/zap v1.27.0 golang.org/x/crypto v0.32.0 golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 - golang.org/x/net v0.33.0 golang.org/x/oauth2 v0.24.0 golang.org/x/sync v0.10.0 golang.org/x/text v0.21.0 @@ -267,6 +266,7 @@ require ( go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/atomic v1.11.0 // indirect golang.org/x/mod v0.22.0 // indirect + golang.org/x/net v0.33.0 // indirect golang.org/x/sys v0.29.0 // indirect golang.org/x/time v0.6.0 // indirect golang.org/x/tools v0.28.0 // indirect diff --git a/pkg/alertmanager/alertmanager.go b/pkg/alertmanager/alertmanager.go index 4f9f87e659..79c6a76ce7 100644 --- a/pkg/alertmanager/alertmanager.go +++ b/pkg/alertmanager/alertmanager.go @@ -49,4 +49,7 @@ type Alertmanager interface { // GetConfig gets the config for the organization. GetConfig(context.Context, string) (*alertmanagertypes.Config, error) + + // SetDefaultConfig sets the default config for the organization. + SetDefaultConfig(context.Context, string) error } diff --git a/pkg/alertmanager/alertmanagerserver/config.go b/pkg/alertmanager/alertmanagerserver/config.go index fbc27ec517..c7e06297d5 100644 --- a/pkg/alertmanager/alertmanagerserver/config.go +++ b/pkg/alertmanager/alertmanagerserver/config.go @@ -12,10 +12,10 @@ import ( type Config struct { // The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself. // See https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/cmd/alertmanager/main.go#L155C54-L155C249 - ExternalUrl *url.URL `mapstructure:"external_url"` + ExternalURL *url.URL `mapstructure:"external_url"` // GlobalConfig is the global configuration for the alertmanager - Global alertmanagertypes.GlobalConfig `mapstructure:"global"` + Global alertmanagertypes.GlobalConfig `mapstructure:"global" yaml:"global"` // Config of the root node of the routing tree. Route alertmanagertypes.RouteConfig `mapstructure:"route"` @@ -66,8 +66,9 @@ type NFLogConfig struct { func NewConfig() Config { return Config{ - ExternalUrl: &url.URL{ - Host: "localhost:8080", + ExternalURL: &url.URL{ + Scheme: "http", + Host: "localhost:8080", }, Global: alertmanagertypes.GlobalConfig{ // Corresponds to the default in upstream (https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/config/config.go#L727) diff --git a/pkg/alertmanager/alertmanagerserver/server.go b/pkg/alertmanager/alertmanagerserver/server.go index abcbf3842a..2358393f6a 100644 --- a/pkg/alertmanager/alertmanagerserver/server.go +++ b/pkg/alertmanager/alertmanagerserver/server.go @@ -223,7 +223,7 @@ func (server *Server) SetConfig(ctx context.Context, alertmanagerConfig *alertma return err } - server.tmpl.ExternalURL = server.srvConfig.ExternalUrl + server.tmpl.ExternalURL = server.srvConfig.ExternalURL // Build the routing tree and record which receivers are used. routes := dispatch.NewRoute(config.Route, nil) diff --git a/pkg/alertmanager/alertmanagerserver/server_test.go b/pkg/alertmanager/alertmanagerserver/server_test.go index 339fddf4c7..54f407d88a 100644 --- a/pkg/alertmanager/alertmanagerserver/server_test.go +++ b/pkg/alertmanager/alertmanagerserver/server_test.go @@ -26,7 +26,7 @@ func TestServerSetConfigAndStop(t *testing.T) { server, err := New(context.Background(), slog.New(slog.NewTextHandler(io.Discard, nil)), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore()) require.NoError(t, err) - amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{}, "1") + amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{GroupInterval: 1 * time.Minute, RepeatInterval: 1 * time.Minute, GroupWait: 1 * time.Minute}, "1") require.NoError(t, err) assert.NoError(t, server.SetConfig(context.Background(), amConfig)) @@ -37,7 +37,7 @@ func TestServerTestReceiverTypeWebhook(t *testing.T) { server, err := New(context.Background(), slog.New(slog.NewTextHandler(io.Discard, nil)), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore()) require.NoError(t, err) - amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{}, "1") + amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{GroupInterval: 1 * time.Minute, RepeatInterval: 1 * time.Minute, GroupWait: 1 * time.Minute}, "1") require.NoError(t, err) webhookListener, err := net.Listen("tcp", "localhost:0") diff --git a/pkg/alertmanager/config.go b/pkg/alertmanager/config.go index 861c707a8f..03deab8fdd 100644 --- a/pkg/alertmanager/config.go +++ b/pkg/alertmanager/config.go @@ -1,8 +1,6 @@ package alertmanager import ( - "errors" - "fmt" "net/url" "time" @@ -15,7 +13,7 @@ type Config struct { Provider string `mapstructure:"provider"` // Internal is the internal alertmanager configuration. - Signoz Signoz `mapstructure:"signoz"` + Signoz Signoz `mapstructure:"signoz" yaml:"signoz"` // Legacy is the legacy alertmanager configuration. Legacy Legacy `mapstructure:"legacy"` @@ -26,12 +24,12 @@ type Signoz struct { PollInterval time.Duration `mapstructure:"poll_interval"` // Config is the config for the alertmanager server. - alertmanagerserver.Config `mapstructure:",squash"` + alertmanagerserver.Config `mapstructure:",squash" yaml:",squash"` } type Legacy struct { // ApiURL is the URL of the legacy signoz alertmanager. - ApiURL string `mapstructure:"api_url"` + ApiURL *url.URL `mapstructure:"api_url"` } func NewConfigFactory() factory.ConfigFactory { @@ -42,26 +40,19 @@ func newConfig() factory.Config { return Config{ Provider: "legacy", Legacy: Legacy{ - ApiURL: "http://alertmanager:9093/api", + ApiURL: &url.URL{ + Scheme: "http", + Host: "alertmanager:9093", + Path: "/api", + }, }, Signoz: Signoz{ - PollInterval: 15 * time.Second, + PollInterval: 1 * time.Minute, Config: alertmanagerserver.NewConfig(), }, } } func (c Config) Validate() error { - if c.Provider == "legacy" { - if c.Legacy.ApiURL == "" { - return errors.New("api_url is required") - } - - _, err := url.Parse(c.Legacy.ApiURL) - if err != nil { - return fmt.Errorf("api_url %q is invalid: %w", c.Legacy.ApiURL, err) - } - } - return nil } diff --git a/pkg/alertmanager/config_test.go b/pkg/alertmanager/config_test.go index 435e92efb7..62038f5406 100644 --- a/pkg/alertmanager/config_test.go +++ b/pkg/alertmanager/config_test.go @@ -2,8 +2,11 @@ package alertmanager import ( "context" + "net/url" "testing" + "time" + "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.signoz.io/signoz/pkg/config" @@ -14,6 +17,9 @@ import ( func TestNewWithEnvProvider(t *testing.T) { t.Setenv("SIGNOZ_ALERTMANAGER_PROVIDER", "legacy") t.Setenv("SIGNOZ_ALERTMANAGER_LEGACY_API__URL", "http://localhost:9093/api") + t.Setenv("SIGNOZ_ALERTMANAGER_SIGNOZ_ROUTE_REPEAT__INTERVAL", "5m") + t.Setenv("SIGNOZ_ALERTMANAGER_SIGNOZ_EXTERNAL__URL", "https://example.com/test") + t.Setenv("SIGNOZ_ALERTMANAGER_SIGNOZ_GLOBAL_RESOLVE__TIMEOUT", "10s") conf, err := config.New( context.Background(), @@ -30,15 +36,26 @@ func TestNewWithEnvProvider(t *testing.T) { require.NoError(t, err) actual := &Config{} - err = conf.Unmarshal("alertmanager", actual) + err = conf.Unmarshal("alertmanager", actual, "yaml") require.NoError(t, err) def := NewConfigFactory().New().(Config) + def.Signoz.Global.ResolveTimeout = model.Duration(10 * time.Second) + def.Signoz.Route.RepeatInterval = 5 * time.Minute + def.Signoz.ExternalURL = &url.URL{ + Scheme: "https", + Host: "example.com", + Path: "/test", + } expected := &Config{ Provider: "legacy", Legacy: Legacy{ - ApiURL: "http://localhost:9093/api", + ApiURL: &url.URL{ + Scheme: "http", + Host: "localhost:9093", + Path: "/api", + }, }, Signoz: def.Signoz, } diff --git a/pkg/alertmanager/legacyalertmanager/provider.go b/pkg/alertmanager/legacyalertmanager/provider.go index 1b971acec7..09a8d894a8 100644 --- a/pkg/alertmanager/legacyalertmanager/provider.go +++ b/pkg/alertmanager/legacyalertmanager/provider.go @@ -37,6 +37,7 @@ type provider struct { configStore alertmanagertypes.ConfigStore batcher *alertmanagerbatcher.Batcher url *url.URL + orgID string } func NewFactory(sqlstore sqlstore.SQLStore) factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config] { @@ -49,11 +50,6 @@ func New(ctx context.Context, providerSettings factory.ProviderSettings, config settings := factory.NewScopedProviderSettings(providerSettings, "go.signoz.io/signoz/pkg/alertmanager/legacyalertmanager") configStore := sqlalertmanagerstore.NewConfigStore(sqlstore) - url, err := url.Parse(config.Legacy.ApiURL) - if err != nil { - return nil, err - } - return &provider{ config: config, settings: settings, @@ -62,7 +58,7 @@ func New(ctx context.Context, providerSettings factory.ProviderSettings, config }, configStore: configStore, batcher: alertmanagerbatcher.New(settings.Logger(), alertmanagerbatcher.NewConfig()), - url: url, + url: config.Legacy.ApiURL, }, nil } @@ -73,8 +69,25 @@ func (provider *provider) Start(ctx context.Context) error { } for alerts := range provider.batcher.C { - if err := provider.putAlerts(ctx, "", alerts); err != nil { - provider.settings.Logger().Error("failed to send alerts to alertmanager", "error", err) + // For the first time, we need to get the orgID from the config store. + // Since this is the legacy alertmanager, we get the first org from the store. + if provider.orgID == "" { + orgIDs, err := provider.configStore.ListOrgs(ctx) + if err != nil { + provider.settings.Logger().ErrorContext(ctx, "failed to send alerts to alertmanager", "error", err) + continue + } + + if len(orgIDs) == 0 { + provider.settings.Logger().ErrorContext(ctx, "failed to send alerts to alertmanager", "error", "no orgs found") + continue + } + + provider.orgID = orgIDs[0] + } + + if err := provider.putAlerts(ctx, provider.orgID, alerts); err != nil { + provider.settings.Logger().ErrorContext(ctx, "failed to send alerts to alertmanager", "error", err) } } @@ -125,17 +138,24 @@ func (provider *provider) putAlerts(ctx context.Context, orgID string, alerts al return err } - legacyAlerts := make([]postableAlert, len(alerts)) - for i, alert := range alerts { - receivers, err := cfg.ReceiverNamesFromRuleID(alert.Alert.Labels["ruleID"]) - if err != nil { - return err + var legacyAlerts []postableAlert + for _, alert := range alerts { + ruleID, ok := alert.Alert.Labels[alertmanagertypes.RuleIDMatcherName] + if !ok { + provider.settings.Logger().WarnContext(ctx, "cannot find ruleID for alert, skipping sending alert to alertmanager", "alert", alert) + continue } - legacyAlerts[i] = postableAlert{ + receivers := cfg.ReceiverNamesFromRuleID(ruleID) + if len(receivers) == 0 { + provider.settings.Logger().WarnContext(ctx, "cannot find receivers for alert, skipping sending alert to alertmanager", "ruleID", ruleID, "alert", alert) + continue + } + + legacyAlerts = append(legacyAlerts, postableAlert{ PostableAlert: alert, Receivers: receivers, - } + }) } url := provider.url.JoinPath(alertsPath) @@ -169,7 +189,7 @@ func (provider *provider) putAlerts(ctx context.Context, orgID string, alerts al func (provider *provider) TestReceiver(ctx context.Context, orgID string, receiver alertmanagertypes.Receiver) error { url := provider.url.JoinPath(testReceiverPath) - body, err := json.Marshal(receiver) + body, err := json.Marshal(alertmanagertypes.MSTeamsV2ReceiverToMSTeamsReceiver(receiver)) if err != nil { return err } @@ -198,12 +218,13 @@ func (provider *provider) TestReceiver(ctx context.Context, orgID string, receiv func (provider *provider) TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error { url := provider.url.JoinPath(alertsPath) - legacyAlert := postableAlert{ + legacyAlerts := make([]postableAlert, 1) + legacyAlerts[0] = postableAlert{ PostableAlert: alert, Receivers: receivers, } - body, err := json.Marshal(legacyAlert) + body, err := json.Marshal(legacyAlerts) if err != nil { return err } @@ -234,7 +255,18 @@ func (provider *provider) ListChannels(ctx context.Context, orgID string) ([]*al } func (provider *provider) ListAllChannels(ctx context.Context) ([]*alertmanagertypes.Channel, error) { - return provider.configStore.ListAllChannels(ctx) + channels, err := provider.configStore.ListAllChannels(ctx) + if err != nil { + return nil, err + } + + for _, channel := range channels { + if err := channel.MSTeamsV2ToMSTeams(); err != nil { + return nil, err + } + } + + return channels, nil } func (provider *provider) GetChannelByID(ctx context.Context, orgID string, channelID int) (*alertmanagertypes.Channel, error) { @@ -264,7 +296,7 @@ func (provider *provider) UpdateChannelByReceiverAndID(ctx context.Context, orgI err = provider.configStore.UpdateChannel(ctx, orgID, channel, alertmanagertypes.WithCb(func(ctx context.Context) error { url := provider.url.JoinPath(routesPath) - body, err := json.Marshal(receiver) + body, err := json.Marshal(alertmanagertypes.MSTeamsV2ReceiverToMSTeamsReceiver(receiver)) if err != nil { return err } @@ -315,7 +347,7 @@ func (provider *provider) CreateChannel(ctx context.Context, orgID string, recei return provider.configStore.CreateChannel(ctx, channel, alertmanagertypes.WithCb(func(ctx context.Context) error { url := provider.url.JoinPath(routesPath) - body, err := json.Marshal(receiver) + body, err := json.Marshal(alertmanagertypes.MSTeamsV2ReceiverToMSTeamsReceiver(receiver)) if err != nil { return err } @@ -407,3 +439,12 @@ func (provider *provider) Stop(ctx context.Context) error { func (provider *provider) GetConfig(ctx context.Context, orgID string) (*alertmanagertypes.Config, error) { return provider.configStore.Get(ctx, orgID) } + +func (provider *provider) SetDefaultConfig(ctx context.Context, orgID string) error { + config, err := alertmanagertypes.NewDefaultConfig(provider.config.Signoz.Config.Global, provider.config.Signoz.Config.Route, orgID) + if err != nil { + return err + } + + return provider.configStore.Set(ctx, config) +} diff --git a/pkg/alertmanager/service.go b/pkg/alertmanager/service.go index 2c8f59049f..235a3db92e 100644 --- a/pkg/alertmanager/service.go +++ b/pkg/alertmanager/service.go @@ -85,6 +85,9 @@ func (service *Service) SyncServers(ctx context.Context) error { } func (service *Service) GetAlerts(ctx context.Context, orgID string, params alertmanagertypes.GettableAlertsParams) (alertmanagertypes.DeprecatedGettableAlerts, error) { + service.serversMtx.RLock() + defer service.serversMtx.RUnlock() + server, err := service.getServer(orgID) if err != nil { return nil, err @@ -99,6 +102,9 @@ func (service *Service) GetAlerts(ctx context.Context, orgID string, params aler } func (service *Service) PutAlerts(ctx context.Context, orgID string, alerts alertmanagertypes.PostableAlerts) error { + service.serversMtx.RLock() + defer service.serversMtx.RUnlock() + server, err := service.getServer(orgID) if err != nil { return err @@ -108,6 +114,9 @@ func (service *Service) PutAlerts(ctx context.Context, orgID string, alerts aler } func (service *Service) TestReceiver(ctx context.Context, orgID string, receiver alertmanagertypes.Receiver) error { + service.serversMtx.RLock() + defer service.serversMtx.RUnlock() + server, err := service.getServer(orgID) if err != nil { return err @@ -117,6 +126,9 @@ func (service *Service) TestReceiver(ctx context.Context, orgID string, receiver } func (service *Service) TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error { + service.serversMtx.RLock() + defer service.serversMtx.RUnlock() + server, err := service.getServer(orgID) if err != nil { return err @@ -144,17 +156,6 @@ func (service *Service) newServer(ctx context.Context, orgID string) (*alertmana return nil, err } - beforeCompareAndSelectHash := config.StoreableConfig().Hash - config, err = service.compareAndSelectConfig(ctx, config) - if err != nil { - return nil, err - } - - if beforeCompareAndSelectHash == config.StoreableConfig().Hash { - service.settings.Logger().Debug("skipping config store update for org", "orgID", orgID, "hash", config.StoreableConfig().Hash) - return server, nil - } - err = service.configStore.Set(ctx, config) if err != nil { return nil, err @@ -174,56 +175,18 @@ func (service *Service) getConfig(ctx context.Context, orgID string) (*alertmana if err != nil { return nil, err } - - config.SetGlobalConfig(service.config.Global) - if config.AlertmanagerConfig().Route == nil { - config.SetRouteConfig(service.config.Route) - } else { - config.UpdateRouteConfig(service.config.Route) - } } + if err := config.SetGlobalConfig(service.config.Global); err != nil { + return nil, err + } + config.SetRouteConfig(service.config.Route) + return config, nil } -// compareAndSelectConfig compares the existing config with the config derived from channels. -// If the hash of the config and the channels mismatch, the config derived from channels is returned. -func (service *Service) compareAndSelectConfig(ctx context.Context, incomingConfig *alertmanagertypes.Config) (*alertmanagertypes.Config, error) { - channels, err := service.configStore.ListChannels(ctx, incomingConfig.StoreableConfig().OrgID) - if err != nil { - return nil, err - } - - matchers, err := service.configStore.GetMatchers(ctx, incomingConfig.StoreableConfig().OrgID) - if err != nil { - return nil, err - } - - config, err := alertmanagertypes.NewConfigFromChannels(service.config.Global, service.config.Route, channels, incomingConfig.StoreableConfig().OrgID) - if err != nil { - return nil, err - } - - for ruleID, receivers := range matchers { - err = config.CreateRuleIDMatcher(ruleID, receivers) - if err != nil { - return nil, err - } - } - - if incomingConfig.StoreableConfig().Hash != config.StoreableConfig().Hash { - service.settings.Logger().InfoContext(ctx, "mismatch found, updating config to match channels and matchers") - return config, nil - } - - return incomingConfig, nil - -} - +// getServer returns the server for the given orgID. It should be called with the lock held. func (service *Service) getServer(orgID string) (*alertmanagerserver.Server, error) { - service.serversMtx.RLock() - defer service.serversMtx.RUnlock() - server, ok := service.servers[orgID] if !ok { return nil, errors.Newf(errors.TypeNotFound, ErrCodeAlertmanagerNotFound, "alertmanager not found for org %s", orgID) diff --git a/pkg/alertmanager/signozalertmanager/provider.go b/pkg/alertmanager/signozalertmanager/provider.go index 17c4b9b0bf..122a292a97 100644 --- a/pkg/alertmanager/signozalertmanager/provider.go +++ b/pkg/alertmanager/signozalertmanager/provider.go @@ -170,3 +170,12 @@ func (provider *provider) SetConfig(ctx context.Context, config *alertmanagertyp func (provider *provider) GetConfig(ctx context.Context, orgID string) (*alertmanagertypes.Config, error) { return provider.configStore.Get(ctx, orgID) } + +func (provider *provider) SetDefaultConfig(ctx context.Context, orgID string) error { + config, err := alertmanagertypes.NewDefaultConfig(provider.config.Signoz.Config.Global, provider.config.Signoz.Config.Route, orgID) + if err != nil { + return err + } + + return provider.configStore.Set(ctx, config) +} diff --git a/pkg/config/conf.go b/pkg/config/conf.go index 5b34dddf16..f890849d4b 100644 --- a/pkg/config/conf.go +++ b/pkg/config/conf.go @@ -1,9 +1,13 @@ package config import ( + "net/url" + "reflect" + "github.com/go-viper/mapstructure/v2" "github.com/knadh/koanf/providers/confmap" "github.com/knadh/koanf/v2" + yamlv2 "gopkg.in/yaml.v2" ) const ( @@ -53,19 +57,30 @@ func (conf *Conf) MergeAt(input *Conf, path string) error { // Unmarshal unmarshals the configuration at the given path into the input. // It uses a WeaklyTypedInput to allow for more flexible unmarshalling. -func (conf *Conf) Unmarshal(path string, input any) error { - dc := &mapstructure.DecoderConfig{ - TagName: "mapstructure", - WeaklyTypedInput: true, - DecodeHook: mapstructure.ComposeDecodeHookFunc( - mapstructure.StringToSliceHookFunc(","), - mapstructure.StringToTimeDurationHookFunc(), - mapstructure.TextUnmarshallerHookFunc(), - ), - Result: input, +func (conf *Conf) Unmarshal(path string, input any, tags ...string) error { + tags = append([]string{"mapstructure"}, tags...) + + for _, tag := range tags { + dc := &mapstructure.DecoderConfig{ + TagName: tag, + WeaklyTypedInput: true, + DecodeHook: mapstructure.ComposeDecodeHookFunc( + mapstructure.StringToSliceHookFunc(","), + mapstructure.StringToTimeDurationHookFunc(), + mapstructure.TextUnmarshallerHookFunc(), + StringToURLHookFunc(), + YamlV2UnmarshalHookFunc(), + ), + Result: input, + } + + err := conf.Koanf.UnmarshalWithConf(path, input, koanf.UnmarshalConf{Tag: tag, DecoderConfig: dc}) + if err != nil { + return err + } } - return conf.Koanf.UnmarshalWithConf(path, input, koanf.UnmarshalConf{Tag: "mapstructure", DecoderConfig: dc}) + return nil } // Set sets the configuration at the given key. @@ -88,3 +103,51 @@ func (conf *Conf) Set(key string, input any) error { return nil } + +func StringToURLHookFunc() mapstructure.DecodeHookFunc { + return func( + f reflect.Type, + t reflect.Type, + data interface{}, + ) (interface{}, error) { + if f.Kind() != reflect.String { + return data, nil + } + if t != reflect.TypeOf(url.URL{}) { + return data, nil + } + + // Convert it by parsing + u, err := url.Parse(data.(string)) + return u, err + } +} + +func YamlV2UnmarshalHookFunc() mapstructure.DecodeHookFunc { + return func( + f reflect.Type, + t reflect.Type, + data interface{}, + ) (interface{}, error) { + if f.Kind() != reflect.String { + return data, nil + } + result := reflect.New(t).Interface() + _, ok := result.(yamlv2.Unmarshaler) + if !ok { + return data, nil + } + + str, ok := data.(string) + if !ok { + str = reflect.Indirect(reflect.ValueOf(&data)).Elem().String() + } + + if err := yamlv2.Unmarshal([]byte(str), result); err != nil { + return nil, err + } + + return result, nil + } + +} diff --git a/pkg/factory/registry.go b/pkg/factory/registry.go index 2eda44279b..be7d95c6d4 100644 --- a/pkg/factory/registry.go +++ b/pkg/factory/registry.go @@ -40,7 +40,7 @@ func NewRegistry(logger *slog.Logger, services ...NamedService) (*Registry, erro }, nil } -func (r *Registry) Start(ctx context.Context) error { +func (r *Registry) Start(ctx context.Context) { for _, s := range r.services.GetInOrder() { go func(s NamedService) { r.logger.InfoContext(ctx, "starting service", "service", s.Name()) @@ -49,7 +49,6 @@ func (r *Registry) Start(ctx context.Context) error { }(s) } - return nil } func (r *Registry) Wait(ctx context.Context) error { diff --git a/pkg/factory/registry_test.go b/pkg/factory/registry_test.go index 57a7b0df67..6d55a3bec2 100644 --- a/pkg/factory/registry_test.go +++ b/pkg/factory/registry_test.go @@ -41,7 +41,7 @@ func TestRegistryWith2Services(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - require.NoError(t, registry.Start(ctx)) + registry.Start(ctx) require.NoError(t, registry.Wait(ctx)) require.NoError(t, registry.Stop(ctx)) }() @@ -62,7 +62,7 @@ func TestRegistryWith2ServicesWithoutWait(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - require.NoError(t, registry.Start(ctx)) + registry.Start(ctx) require.NoError(t, registry.Stop(ctx)) }() diff --git a/pkg/http/middleware/response.go b/pkg/http/middleware/response.go index deb0f3dd81..8d6f4d068e 100644 --- a/pkg/http/middleware/response.go +++ b/pkg/http/middleware/response.go @@ -78,6 +78,12 @@ func (writer *nonFlushingBadResponseLoggingWriter) Write(data []byte) (int, erro // https://godoc.org/net/http#ResponseWriter writer.WriteHeader(http.StatusOK) } + + // 204 No Content is a success response that indicates that the request has been successfully processed and that the response body is intentionally empty. + if writer.statusCode == 204 { + return 0, nil + } + n, err := writer.rw.Write(data) if writer.logBody { writer.captureResponseBody(data) diff --git a/pkg/instrumentation/sdk.go b/pkg/instrumentation/sdk.go index f4059d3a6e..c31cf5796d 100644 --- a/pkg/instrumentation/sdk.go +++ b/pkg/instrumentation/sdk.go @@ -23,6 +23,7 @@ type SDK struct { logger *slog.Logger sdk contribsdkconfig.SDK prometheusRegistry *prometheus.Registry + startCh chan struct{} } // New creates a new Instrumentation instance with configured providers. @@ -96,14 +97,17 @@ func New(ctx context.Context, build version.Build, cfg Config) (*SDK, error) { sdk: sdk, prometheusRegistry: prometheusRegistry, logger: NewLogger(cfg), + startCh: make(chan struct{}), }, nil } func (i *SDK) Start(ctx context.Context) error { + <-i.startCh return nil } func (i *SDK) Stop(ctx context.Context) error { + close(i.startCh) return i.sdk.Shutdown(ctx) } diff --git a/pkg/query-service/app/clickhouseReader/reader.go b/pkg/query-service/app/clickhouseReader/reader.go index f7af28296b..ccc441463d 100644 --- a/pkg/query-service/app/clickhouseReader/reader.go +++ b/pkg/query-service/app/clickhouseReader/reader.go @@ -49,7 +49,6 @@ import ( "go.signoz.io/signoz/pkg/query-service/common" "go.signoz.io/signoz/pkg/query-service/constants" chErrors "go.signoz.io/signoz/pkg/query-service/errors" - am "go.signoz.io/signoz/pkg/query-service/integrations/alertManager" "go.signoz.io/signoz/pkg/query-service/interfaces" "go.signoz.io/signoz/pkg/query-service/metrics" "go.signoz.io/signoz/pkg/query-service/model" @@ -145,7 +144,6 @@ type ClickHouseReader struct { promConfigFile string promConfig *config.Config - alertManager am.Manager featureFlags interfaces.FeatureLookup liveTailRefreshSeconds int @@ -196,13 +194,6 @@ func NewReaderFromClickhouseConnection( fluxIntervalForTraceDetail time.Duration, cache cache.Cache, ) *ClickHouseReader { - alertManager, err := am.New() - if err != nil { - zap.L().Error("failed to initialize alert manager", zap.Error(err)) - zap.L().Error("check if the alert manager URL is correctly set and valid") - os.Exit(1) - } - logsTableName := options.primary.LogsTable logsLocalTableName := options.primary.LogsLocalTable if useLogsNewSchema { @@ -221,7 +212,6 @@ func NewReaderFromClickhouseConnection( db: db, localDB: localDB, TraceDB: options.primary.TraceDB, - alertManager: alertManager, operationsTable: options.primary.OperationsTable, indexTable: options.primary.IndexTable, errorTable: options.primary.ErrorTable, diff --git a/pkg/query-service/app/http_handler.go b/pkg/query-service/app/http_handler.go index a85844241f..eba0148e26 100644 --- a/pkg/query-service/app/http_handler.go +++ b/pkg/query-service/app/http_handler.go @@ -18,7 +18,9 @@ import ( "text/template" "time" + "go.signoz.io/signoz/pkg/alertmanager" "go.signoz.io/signoz/pkg/query-service/app/metricsexplorer" + "go.signoz.io/signoz/pkg/signoz" "github.com/gorilla/mux" "github.com/gorilla/websocket" @@ -60,7 +62,6 @@ import ( "go.signoz.io/signoz/pkg/query-service/app/integrations/messagingQueues/kafka" "go.signoz.io/signoz/pkg/query-service/app/logparsingpipeline" "go.signoz.io/signoz/pkg/query-service/dao" - am "go.signoz.io/signoz/pkg/query-service/integrations/alertManager" "go.signoz.io/signoz/pkg/query-service/interfaces" "go.signoz.io/signoz/pkg/query-service/model" "go.signoz.io/signoz/pkg/query-service/rules" @@ -86,7 +87,6 @@ type APIHandler struct { reader interfaces.Reader skipConfig *model.SkipConfig appDao dao.ModelDao - alertManager am.Manager ruleManager *rules.Manager featureFlags interfaces.FeatureLookup querier interfaces.Querier @@ -135,6 +135,10 @@ type APIHandler struct { pvcsRepo *inframetrics.PvcsRepo JWT *authtypes.JWT + + AlertmanagerAPI *alertmanager.API + + Signoz *signoz.SigNoz } type APIHandlerOpts struct { @@ -176,16 +180,14 @@ type APIHandlerOpts struct { UseTraceNewSchema bool JWT *authtypes.JWT + + AlertmanagerAPI *alertmanager.API + + Signoz *signoz.SigNoz } // NewAPIHandler returns an APIHandler func NewAPIHandler(opts APIHandlerOpts) (*APIHandler, error) { - - alertManager, err := am.New() - if err != nil { - return nil, err - } - querierOpts := querier.QuerierOptions{ Reader: opts.Reader, Cache: opts.Cache, @@ -229,7 +231,6 @@ func NewAPIHandler(opts APIHandlerOpts) (*APIHandler, error) { skipConfig: opts.SkipConfig, preferSpanMetrics: opts.PreferSpanMetrics, temporalityMap: make(map[string]map[v3.Temporality]bool), - alertManager: alertManager, ruleManager: opts.RuleManager, featureFlags: opts.FeatureFlags, IntegrationsController: opts.IntegrationsController, @@ -252,6 +253,8 @@ func NewAPIHandler(opts APIHandlerOpts) (*APIHandler, error) { pvcsRepo: pvcsRepo, JWT: opts.JWT, SummaryService: summaryService, + AlertmanagerAPI: opts.AlertmanagerAPI, + Signoz: opts.Signoz, } logsQueryBuilder := logsv3.PrepareLogsQuery @@ -491,21 +494,21 @@ func (aH *APIHandler) Respond(w http.ResponseWriter, data interface{}) { // RegisterPrivateRoutes registers routes for this handler on the given router func (aH *APIHandler) RegisterPrivateRoutes(router *mux.Router) { - router.HandleFunc("/api/v1/channels", aH.listChannels).Methods(http.MethodGet) + router.HandleFunc("/api/v1/channels", aH.AlertmanagerAPI.ListAllChannels).Methods(http.MethodGet) } // RegisterRoutes registers routes for this handler on the given router func (aH *APIHandler) RegisterRoutes(router *mux.Router, am *AuthMiddleware) { router.HandleFunc("/api/v1/query_range", am.ViewAccess(aH.queryRangeMetrics)).Methods(http.MethodGet) router.HandleFunc("/api/v1/query", am.ViewAccess(aH.queryMetrics)).Methods(http.MethodGet) - router.HandleFunc("/api/v1/channels", am.ViewAccess(aH.listChannels)).Methods(http.MethodGet) - router.HandleFunc("/api/v1/channels/{id}", am.ViewAccess(aH.getChannel)).Methods(http.MethodGet) - router.HandleFunc("/api/v1/channels/{id}", am.AdminAccess(aH.editChannel)).Methods(http.MethodPut) - router.HandleFunc("/api/v1/channels/{id}", am.AdminAccess(aH.deleteChannel)).Methods(http.MethodDelete) - router.HandleFunc("/api/v1/channels", am.EditAccess(aH.createChannel)).Methods(http.MethodPost) - router.HandleFunc("/api/v1/testChannel", am.EditAccess(aH.testChannel)).Methods(http.MethodPost) + router.HandleFunc("/api/v1/channels", am.ViewAccess(aH.AlertmanagerAPI.ListChannels)).Methods(http.MethodGet) + router.HandleFunc("/api/v1/channels/{id}", am.ViewAccess(aH.AlertmanagerAPI.GetChannelByID)).Methods(http.MethodGet) + router.HandleFunc("/api/v1/channels/{id}", am.AdminAccess(aH.AlertmanagerAPI.UpdateChannelByID)).Methods(http.MethodPut) + router.HandleFunc("/api/v1/channels/{id}", am.AdminAccess(aH.AlertmanagerAPI.DeleteChannelByID)).Methods(http.MethodDelete) + router.HandleFunc("/api/v1/channels", am.EditAccess(aH.AlertmanagerAPI.CreateChannel)).Methods(http.MethodPost) + router.HandleFunc("/api/v1/testChannel", am.EditAccess(aH.AlertmanagerAPI.TestReceiver)).Methods(http.MethodPost) - router.HandleFunc("/api/v1/alerts", am.ViewAccess(aH.getAlerts)).Methods(http.MethodGet) + router.HandleFunc("/api/v1/alerts", am.ViewAccess(aH.AlertmanagerAPI.GetAlerts)).Methods(http.MethodGet) router.HandleFunc("/api/v1/rules", am.ViewAccess(aH.listRules)).Methods(http.MethodGet) router.HandleFunc("/api/v1/rules/{id}", am.ViewAccess(aH.getRule)).Methods(http.MethodGet) @@ -1369,138 +1372,6 @@ func (aH *APIHandler) editRule(w http.ResponseWriter, r *http.Request) { } -func (aH *APIHandler) getChannel(w http.ResponseWriter, r *http.Request) { - id := mux.Vars(r)["id"] - channel, apiErrorObj := aH.ruleManager.RuleDB().GetChannel(id) - if apiErrorObj != nil { - RespondError(w, apiErrorObj, nil) - return - } - aH.Respond(w, channel) -} - -func (aH *APIHandler) deleteChannel(w http.ResponseWriter, r *http.Request) { - id := mux.Vars(r)["id"] - apiErrorObj := aH.ruleManager.RuleDB().DeleteChannel(id) - if apiErrorObj != nil { - RespondError(w, apiErrorObj, nil) - return - } - aH.Respond(w, "notification channel successfully deleted") -} - -func (aH *APIHandler) listChannels(w http.ResponseWriter, r *http.Request) { - channels, apiErrorObj := aH.ruleManager.RuleDB().GetChannels() - if apiErrorObj != nil { - RespondError(w, apiErrorObj, nil) - return - } - aH.Respond(w, channels) -} - -// testChannels sends test alert to all registered channels -func (aH *APIHandler) testChannel(w http.ResponseWriter, r *http.Request) { - - defer r.Body.Close() - body, err := io.ReadAll(r.Body) - if err != nil { - zap.L().Error("Error in getting req body of testChannel API", zap.Error(err)) - RespondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil) - return - } - - receiver := &am.Receiver{} - if err := json.Unmarshal(body, receiver); err != nil { // Parse []byte to go struct pointer - zap.L().Error("Error in parsing req body of testChannel API\n", zap.Error(err)) - RespondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil) - return - } - // send alert - apiErrorObj := aH.alertManager.TestReceiver(receiver) - if apiErrorObj != nil { - RespondError(w, apiErrorObj, nil) - return - } - aH.Respond(w, "test alert sent") -} - -func (aH *APIHandler) editChannel(w http.ResponseWriter, r *http.Request) { - - id := mux.Vars(r)["id"] - - defer r.Body.Close() - body, err := io.ReadAll(r.Body) - if err != nil { - zap.L().Error("Error in getting req body of editChannel API", zap.Error(err)) - RespondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil) - return - } - - receiver := &am.Receiver{} - if err := json.Unmarshal(body, receiver); err != nil { // Parse []byte to go struct pointer - zap.L().Error("Error in parsing req body of editChannel API", zap.Error(err)) - RespondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil) - return - } - - _, apiErrorObj := aH.ruleManager.RuleDB().EditChannel(receiver, id) - - if apiErrorObj != nil { - RespondError(w, apiErrorObj, nil) - return - } - - aH.Respond(w, nil) - -} - -func (aH *APIHandler) createChannel(w http.ResponseWriter, r *http.Request) { - - defer r.Body.Close() - body, err := io.ReadAll(r.Body) - if err != nil { - zap.L().Error("Error in getting req body of createChannel API", zap.Error(err)) - RespondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil) - return - } - - receiver := &am.Receiver{} - if err := json.Unmarshal(body, receiver); err != nil { // Parse []byte to go struct pointer - zap.L().Error("Error in parsing req body of createChannel API", zap.Error(err)) - RespondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil) - return - } - - _, apiErrorObj := aH.ruleManager.RuleDB().CreateChannel(receiver) - - if apiErrorObj != nil { - RespondError(w, apiErrorObj, nil) - return - } - - aH.Respond(w, nil) - -} - -func (aH *APIHandler) getAlerts(w http.ResponseWriter, r *http.Request) { - params := r.URL.Query() - amEndpoint := constants.GetAlertManagerApiPrefix() - resp, err := http.Get(amEndpoint + "v1/alerts" + "?" + params.Encode()) - if err != nil { - RespondError(w, &model.ApiError{Typ: model.ErrorInternal, Err: err}, nil) - return - } - - defer resp.Body.Close() - body, err := io.ReadAll(resp.Body) - if err != nil { - RespondError(w, &model.ApiError{Typ: model.ErrorInternal, Err: err}, nil) - return - } - - aH.Respond(w, string(body)) -} - func (aH *APIHandler) createRule(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() @@ -2165,7 +2036,7 @@ func (aH *APIHandler) registerUser(w http.ResponseWriter, r *http.Request) { return } - _, apiErr := auth.Register(context.Background(), req) + _, apiErr := auth.Register(context.Background(), req, aH.Signoz.Alertmanager) if apiErr != nil { RespondError(w, apiErr, nil) return diff --git a/pkg/query-service/app/server.go b/pkg/query-service/app/server.go index 69c8f4b9ff..977a835867 100644 --- a/pkg/query-service/app/server.go +++ b/pkg/query-service/app/server.go @@ -14,6 +14,7 @@ import ( "github.com/rs/cors" "github.com/soheilhy/cmux" + "go.signoz.io/signoz/pkg/alertmanager" "go.signoz.io/signoz/pkg/http/middleware" "go.signoz.io/signoz/pkg/query-service/agentConf" "go.signoz.io/signoz/pkg/query-service/app/clickhouseReader" @@ -25,6 +26,7 @@ import ( opAmpModel "go.signoz.io/signoz/pkg/query-service/app/opamp/model" "go.signoz.io/signoz/pkg/query-service/app/preferences" "go.signoz.io/signoz/pkg/signoz" + "go.signoz.io/signoz/pkg/sqlstore" "go.signoz.io/signoz/pkg/types" "go.signoz.io/signoz/pkg/types/authtypes" "go.signoz.io/signoz/pkg/web" @@ -36,7 +38,6 @@ import ( "go.signoz.io/signoz/pkg/query-service/dao" "go.signoz.io/signoz/pkg/query-service/featureManager" "go.signoz.io/signoz/pkg/query-service/healthcheck" - am "go.signoz.io/signoz/pkg/query-service/integrations/alertManager" "go.signoz.io/signoz/pkg/query-service/interfaces" "go.signoz.io/signoz/pkg/query-service/model" pqle "go.signoz.io/signoz/pkg/query-service/pqlEngine" @@ -152,9 +153,16 @@ func NewServer(serverOptions *ServerOptions) (*Server, error) { <-readerReady rm, err := makeRulesManager( - serverOptions.PromConfigPath, - constants.GetAlertManagerApiPrefix(), - serverOptions.RuleRepoURL, serverOptions.SigNoz.SQLStore.SQLxDB(), reader, c, serverOptions.DisableRules, fm, serverOptions.UseLogsNewSchema, serverOptions.UseTraceNewSchema) + serverOptions.RuleRepoURL, + serverOptions.SigNoz.SQLStore.SQLxDB(), + reader, + c, + serverOptions.DisableRules, + fm, + serverOptions.UseLogsNewSchema, + serverOptions.UseTraceNewSchema, + serverOptions.SigNoz.SQLStore, + ) if err != nil { return nil, err } @@ -197,6 +205,8 @@ func NewServer(serverOptions *ServerOptions) (*Server, error) { UseLogsNewSchema: serverOptions.UseLogsNewSchema, UseTraceNewSchema: serverOptions.UseTraceNewSchema, JWT: serverOptions.Jwt, + AlertmanagerAPI: alertmanager.NewAPI(serverOptions.SigNoz.Alertmanager), + Signoz: serverOptions.SigNoz, }) if err != nil { return nil, err @@ -279,7 +289,6 @@ func (s *Server) createPrivateServer(api *APIHandler) (*http.Server, error) { } func (s *Server) createPublicServer(api *APIHandler, web web.Web) (*http.Server, error) { - r := NewRouter() r.Use(middleware.NewAuth(zap.L(), s.serverOptions.Jwt, []string{"Authorization", "Sec-WebSocket-Protocol"}).Wrap) @@ -467,8 +476,6 @@ func (s *Server) Stop() error { } func makeRulesManager( - _, - alertManagerURL string, ruleRepoURL string, db *sqlx.DB, ch interfaces.Reader, @@ -476,7 +483,9 @@ func makeRulesManager( disableRules bool, fm interfaces.FeatureLookup, useLogsNewSchema bool, - useTraceNewSchema bool) (*rules.Manager, error) { + useTraceNewSchema bool, + sqlstore sqlstore.SQLStore, +) (*rules.Manager, error) { // create engine pqle, err := pqle.FromReader(ch) @@ -484,16 +493,8 @@ func makeRulesManager( return nil, fmt.Errorf("failed to create pql engine : %v", err) } - // notifier opts - notifierOpts := am.NotifierOptions{ - QueueCapacity: 10000, - Timeout: 1 * time.Second, - AlertManagerURLs: []string{alertManagerURL}, - } - // create manager opts managerOpts := &rules.ManagerOptions{ - NotifierOpts: notifierOpts, PqlEngine: pqle, RepoURL: ruleRepoURL, DBConn: db, @@ -506,6 +507,7 @@ func makeRulesManager( EvalDelay: constants.GetEvalDelay(), UseLogsNewSchema: useLogsNewSchema, UseTraceNewSchema: useTraceNewSchema, + SQLStore: sqlstore, } // create Manager diff --git a/pkg/query-service/auth/auth.go b/pkg/query-service/auth/auth.go index 69e5415e81..9913e527b3 100644 --- a/pkg/query-service/auth/auth.go +++ b/pkg/query-service/auth/auth.go @@ -11,6 +11,7 @@ import ( "github.com/google/uuid" "github.com/pkg/errors" + "go.signoz.io/signoz/pkg/alertmanager" "go.signoz.io/signoz/pkg/query-service/constants" "go.signoz.io/signoz/pkg/query-service/dao" "go.signoz.io/signoz/pkg/query-service/model" @@ -536,7 +537,7 @@ func RegisterInvitedUser(ctx context.Context, req *RegisterRequest, nopassword b // Register registers a new user. For the first register request, it doesn't need an invite token // and also the first registration is an enforced ADMIN registration. Every subsequent request will // need an invite token to go through. -func Register(ctx context.Context, req *RegisterRequest) (*types.User, *model.ApiError) { +func Register(ctx context.Context, req *RegisterRequest, alertmanager alertmanager.Alertmanager) (*types.User, *model.ApiError) { users, err := dao.DB().GetUsers(ctx) if err != nil { return nil, model.InternalError(fmt.Errorf("failed to get user count")) @@ -544,7 +545,16 @@ func Register(ctx context.Context, req *RegisterRequest) (*types.User, *model.Ap switch len(users) { case 0: - return RegisterFirstUser(ctx, req) + user, err := RegisterFirstUser(ctx, req) + if err != nil { + return nil, err + } + + if err := alertmanager.SetDefaultConfig(ctx, user.OrgID); err != nil { + return nil, model.InternalError(err) + } + + return user, nil default: return RegisterInvitedUser(ctx, req, false) } diff --git a/pkg/query-service/constants/constants.go b/pkg/query-service/constants/constants.go index 1c9173c5e6..fbfe0b76db 100644 --- a/pkg/query-service/constants/constants.go +++ b/pkg/query-service/constants/constants.go @@ -57,22 +57,12 @@ const PreferRPM = "PreferRPM" const SpanSearchScopeRoot = "isroot" const SpanSearchScopeEntryPoint = "isentrypoint" -func GetAlertManagerApiPrefix() string { - if os.Getenv("ALERTMANAGER_API_PREFIX") != "" { - return os.Getenv("ALERTMANAGER_API_PREFIX") - } - return "http://alertmanager:9093/api/" -} - var TELEMETRY_HEART_BEAT_DURATION_MINUTES = GetOrDefaultEnvInt("TELEMETRY_HEART_BEAT_DURATION_MINUTES", 720) var TELEMETRY_ACTIVE_USER_DURATION_MINUTES = GetOrDefaultEnvInt("TELEMETRY_ACTIVE_USER_DURATION_MINUTES", 360) var InviteEmailTemplate = GetOrDefaultEnv("INVITE_EMAIL_TEMPLATE", "/root/templates/invitation_email_template.html") -// Alert manager channel subpath -var AmChannelApiPath = GetOrDefaultEnv("ALERTMANAGER_API_CHANNEL_PATH", "v1/routes") - var OTLPTarget = GetOrDefaultEnv("OTEL_EXPORTER_OTLP_ENDPOINT", "") var LogExportBatchSize = GetOrDefaultEnv("OTEL_BLRP_MAX_EXPORT_BATCH_SIZE", "512") diff --git a/pkg/query-service/constants/constants_test.go b/pkg/query-service/constants/constants_test.go deleted file mode 100644 index 59c35eae12..0000000000 --- a/pkg/query-service/constants/constants_test.go +++ /dev/null @@ -1,21 +0,0 @@ -package constants - -import ( - "os" - "testing" - - . "github.com/smartystreets/goconvey/convey" -) - -func TestGetAlertManagerApiPrefix(t *testing.T) { - Convey("TestGetAlertManagerApiPrefix", t, func() { - res := GetAlertManagerApiPrefix() - So(res, ShouldEqual, "http://alertmanager:9093/api/") - - Convey("WithEnvSet", func() { - os.Setenv("ALERTMANAGER_API_PREFIX", "http://test:9093/api/") - res = GetAlertManagerApiPrefix() - So(res, ShouldEqual, "http://test:9093/api/") - }) - }) -} diff --git a/pkg/query-service/integrations/alertManager/manager.go b/pkg/query-service/integrations/alertManager/manager.go deleted file mode 100644 index 10db4debd7..0000000000 --- a/pkg/query-service/integrations/alertManager/manager.go +++ /dev/null @@ -1,200 +0,0 @@ -package alertManager - -// Wrapper to connect and process alert manager functions -import ( - "bytes" - "encoding/json" - "fmt" - "net/http" - neturl "net/url" - - "go.signoz.io/signoz/pkg/query-service/constants" - "go.signoz.io/signoz/pkg/query-service/model" - "go.uber.org/zap" -) - -const contentType = "application/json" - -type Manager interface { - URL() *neturl.URL - URLPath(path string) *neturl.URL - AddRoute(receiver *Receiver) *model.ApiError - EditRoute(receiver *Receiver) *model.ApiError - DeleteRoute(name string) *model.ApiError - TestReceiver(receiver *Receiver) *model.ApiError -} - -func defaultOptions() []ManagerOptions { - return []ManagerOptions{ - WithURL(constants.GetAlertManagerApiPrefix()), - WithChannelApiPath(constants.AmChannelApiPath), - } -} - -type ManagerOptions func(m *manager) error - -func New(opts ...ManagerOptions) (Manager, error) { - m := &manager{} - - newOpts := defaultOptions() - newOpts = append(newOpts, opts...) - - for _, opt := range newOpts { - err := opt(m) - if err != nil { - return nil, err - } - } - - return m, nil -} - -func WithURL(url string) ManagerOptions { - return func(m *manager) error { - m.url = url - parsedURL, err := neturl.Parse(url) - if err != nil { - return err - } - m.parsedURL = parsedURL - return nil - } -} - -func WithChannelApiPath(path string) ManagerOptions { - return func(m *manager) error { - m.channelApiPath = path - return nil - } -} - -type manager struct { - url string - parsedURL *neturl.URL - channelApiPath string -} - -func (m *manager) prepareAmChannelApiURL() string { - return fmt.Sprintf("%s%s", m.url, m.channelApiPath) -} - -func (m *manager) prepareTestApiURL() string { - return fmt.Sprintf("%s%s", m.url, "v1/testReceiver") -} - -func (m *manager) URL() *neturl.URL { - return m.parsedURL -} - -func (m *manager) URLPath(path string) *neturl.URL { - upath, err := neturl.Parse(path) - if err != nil { - return nil - } - - return m.parsedURL.ResolveReference(upath) -} - -func (m *manager) AddRoute(receiver *Receiver) *model.ApiError { - - receiverString, _ := json.Marshal(receiver) - - amURL := m.prepareAmChannelApiURL() - response, err := http.Post(amURL, contentType, bytes.NewBuffer(receiverString)) - - if err != nil { - zap.L().Error("Error in getting response of API call to alertmanager", zap.String("url", amURL), zap.Error(err)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - if response.StatusCode > 299 { - zap.L().Error("Error in getting 2xx response in API call to alertmanager", zap.String("url", amURL), zap.String("status", response.Status)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - return nil -} - -func (m *manager) EditRoute(receiver *Receiver) *model.ApiError { - receiverString, _ := json.Marshal(receiver) - - amURL := m.prepareAmChannelApiURL() - req, err := http.NewRequest(http.MethodPut, amURL, bytes.NewBuffer(receiverString)) - - if err != nil { - zap.L().Error("Error creating new update request for API call to alertmanager", zap.String("url", amURL), zap.Error(err)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - req.Header.Add("Content-Type", contentType) - - client := &http.Client{} - response, err := client.Do(req) - - if err != nil { - zap.L().Error("Error in getting response of API call to alertmanager", zap.String("url", amURL), zap.Error(err)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - if response.StatusCode > 299 { - zap.L().Error("Error in getting 2xx response in PUT API call to alertmanager", zap.String("url", amURL), zap.String("status", response.Status)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - return nil -} - -func (m *manager) DeleteRoute(name string) *model.ApiError { - values := map[string]string{"name": name} - requestData, _ := json.Marshal(values) - - amURL := m.prepareAmChannelApiURL() - req, err := http.NewRequest(http.MethodDelete, amURL, bytes.NewBuffer(requestData)) - - if err != nil { - zap.L().Error("Error in creating new delete request to alertmanager/v1/receivers", zap.Error(err)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - req.Header.Add("Content-Type", contentType) - - client := &http.Client{} - response, err := client.Do(req) - - if err != nil { - zap.L().Error("Error in getting response of API call to alertmanager", zap.String("url", amURL), zap.Error(err)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - if response.StatusCode > 299 { - err := fmt.Errorf(fmt.Sprintf("Error in getting 2xx response in PUT API call to alertmanager(DELETE %s)\n", amURL), response.Status) - zap.L().Error("Error in getting 2xx response in PUT API call to alertmanager", zap.String("url", amURL), zap.String("status", response.Status)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - return nil -} - -func (m *manager) TestReceiver(receiver *Receiver) *model.ApiError { - - receiverBytes, _ := json.Marshal(receiver) - - amTestURL := m.prepareTestApiURL() - response, err := http.Post(amTestURL, contentType, bytes.NewBuffer(receiverBytes)) - - if err != nil { - zap.L().Error("Error in getting response of API call to alertmanager", zap.String("url", amTestURL), zap.Error(err)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - if response.StatusCode > 201 && response.StatusCode < 400 { - err := fmt.Errorf(fmt.Sprintf("Invalid parameters in test alert api for alertmanager(POST %s)\n", amTestURL), response.Status) - zap.L().Error("Invalid parameters in test alert api for alertmanager", zap.Error(err)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - if response.StatusCode > 400 { - err := fmt.Errorf(fmt.Sprintf("Received Server Error response for API call to alertmanager(POST %s)\n", amTestURL), response.Status) - zap.L().Error("Received Server Error response for API call to alertmanager", zap.Error(err)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - return nil -} diff --git a/pkg/query-service/integrations/alertManager/model.go b/pkg/query-service/integrations/alertManager/model.go deleted file mode 100644 index 5003830bbf..0000000000 --- a/pkg/query-service/integrations/alertManager/model.go +++ /dev/null @@ -1,79 +0,0 @@ -package alertManager - -import ( - "fmt" - "time" - - "go.signoz.io/signoz/pkg/query-service/utils/labels" -) - -// Receiver configuration provides configuration on how to contact a receiver. -type Receiver struct { - // A unique identifier for this receiver. - Name string `yaml:"name" json:"name"` - - EmailConfigs interface{} `yaml:"email_configs,omitempty" json:"email_configs,omitempty"` - PagerdutyConfigs interface{} `yaml:"pagerduty_configs,omitempty" json:"pagerduty_configs,omitempty"` - SlackConfigs interface{} `yaml:"slack_configs,omitempty" json:"slack_configs,omitempty"` - WebhookConfigs interface{} `yaml:"webhook_configs,omitempty" json:"webhook_configs,omitempty"` - OpsGenieConfigs interface{} `yaml:"opsgenie_configs,omitempty" json:"opsgenie_configs,omitempty"` - WechatConfigs interface{} `yaml:"wechat_configs,omitempty" json:"wechat_configs,omitempty"` - PushoverConfigs interface{} `yaml:"pushover_configs,omitempty" json:"pushover_configs,omitempty"` - VictorOpsConfigs interface{} `yaml:"victorops_configs,omitempty" json:"victorops_configs,omitempty"` - SNSConfigs interface{} `yaml:"sns_configs,omitempty" json:"sns_configs,omitempty"` - MSTeamsConfigs interface{} `yaml:"msteams_configs,omitempty" json:"msteams_configs,omitempty"` -} - -type ReceiverResponse struct { - Status string `json:"status"` - Data Receiver `json:"data"` -} - -// Alert is a generic representation of an alert in the Prometheus eco-system. -type Alert struct { - // Label value pairs for purpose of aggregation, matching, and disposition - // dispatching. This must minimally include an "alertname" label. - Labels labels.BaseLabels `json:"labels"` - - // Extra key/value information which does not define alert identity. - Annotations labels.BaseLabels `json:"annotations"` - - // The known time range for this alert. Both ends are optional. - StartsAt time.Time `json:"startsAt,omitempty"` - EndsAt time.Time `json:"endsAt,omitempty"` - GeneratorURL string `json:"generatorURL,omitempty"` - - Receivers []string `json:"receivers,omitempty"` -} - -// Name returns the name of the alert. It is equivalent to the "alertname" label. -func (a *Alert) Name() string { - return a.Labels.Get(labels.AlertNameLabel) -} - -// Hash returns a hash over the alert. It is equivalent to the alert labels hash. -func (a *Alert) Hash() uint64 { - return a.Labels.Hash() -} - -func (a *Alert) String() string { - s := fmt.Sprintf("%s[%s][%s]", a.Name(), fmt.Sprintf("%016x", a.Hash())[:7], a.Receivers) - if a.Resolved() { - return s + "[resolved]" - } - return s + "[active]" -} - -// Resolved returns true iff the activity interval ended in the past. -func (a *Alert) Resolved() bool { - return a.ResolvedAt(time.Now()) -} - -// ResolvedAt returns true off the activity interval ended before -// the given timestamp. -func (a *Alert) ResolvedAt(ts time.Time) bool { - if a.EndsAt.IsZero() { - return false - } - return !a.EndsAt.After(ts) -} diff --git a/pkg/query-service/integrations/alertManager/notifier.go b/pkg/query-service/integrations/alertManager/notifier.go deleted file mode 100644 index 434e2bc112..0000000000 --- a/pkg/query-service/integrations/alertManager/notifier.go +++ /dev/null @@ -1,310 +0,0 @@ -package alertManager - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "sync/atomic" - - "net/http" - "net/url" - "sync" - "time" - - old_ctx "golang.org/x/net/context" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - - "go.uber.org/zap" - "golang.org/x/net/context/ctxhttp" -) - -const ( - alertPushEndpoint = "v1/alerts" - contentTypeJSON = "application/json" -) - -// Notifier is responsible for dispatching alert notifications to an -// alert manager service. -type Notifier struct { - queue []*Alert - opts *NotifierOptions - - more chan struct{} - mtx sync.RWMutex - ctx context.Context - cancel func() - - alertmanagers *alertmanagerSet - logger log.Logger -} - -// NotifierOptions are the configurable parameters of a Handler. -type NotifierOptions struct { - QueueCapacity int - // Used for sending HTTP requests to the Alertmanager. - Do func(ctx old_ctx.Context, client *http.Client, req *http.Request) (*http.Response, error) - // List of alert manager urls - AlertManagerURLs []string - // timeout limit on requests - Timeout time.Duration -} - -func (opts *NotifierOptions) String() string { - var urls string - for _, u := range opts.AlertManagerURLs { - urls = fmt.Sprintf("%s %s", urls, u) - } - return urls -} - -// todo(amol): add metrics - -func NewNotifier(o *NotifierOptions, logger log.Logger) (*Notifier, error) { - ctx, cancel := context.WithCancel(context.Background()) - if o.Do == nil { - o.Do = ctxhttp.Do - } - if logger == nil { - logger = log.NewNopLogger() - } - - n := &Notifier{ - queue: make([]*Alert, 0, o.QueueCapacity), - ctx: ctx, - cancel: cancel, - more: make(chan struct{}, 1), - opts: o, - logger: logger, - } - timeout := o.Timeout - - if int64(timeout) == 0 { - timeout = time.Duration(30 * time.Second) - } - - amset, err := newAlertmanagerSet(o.AlertManagerURLs, timeout, logger) - if err != nil { - zap.L().Error("failed to parse alert manager urls") - return n, err - } - n.alertmanagers = amset - zap.L().Info("Starting notifier with alert manager", zap.Strings("urls", o.AlertManagerURLs)) - return n, nil -} - -const maxBatchSize = 64 - -func (n *Notifier) queueLen() int { - n.mtx.RLock() - defer n.mtx.RUnlock() - - return len(n.queue) -} - -func (n *Notifier) nextBatch() []*Alert { - n.mtx.Lock() - defer n.mtx.Unlock() - - var alerts []*Alert - - if len(n.queue) > maxBatchSize { - alerts = append(make([]*Alert, 0, maxBatchSize), n.queue[:maxBatchSize]...) - n.queue = n.queue[maxBatchSize:] - } else { - alerts = append(make([]*Alert, 0, len(n.queue)), n.queue...) - n.queue = n.queue[:0] - } - - return alerts -} - -// Run dispatches notifications continuously. -func (n *Notifier) Run() { - zap.L().Info("msg: Initiating alert notifier...") - for { - select { - case <-n.ctx.Done(): - return - case <-n.more: - } - alerts := n.nextBatch() - - if !n.sendAll(alerts...) { - zap.L().Warn("msg: dropped alerts", zap.Int("count", len(alerts))) - // n.metrics.dropped.Add(float64(len(alerts))) - } - // If the queue still has items left, kick off the next iteration. - if n.queueLen() > 0 { - n.setMore() - } - } -} - -// Send queues the given notification requests for processing. -// Panics if called on a handler that is not running. -func (n *Notifier) Send(alerts ...*Alert) { - n.mtx.Lock() - defer n.mtx.Unlock() - - // Queue capacity should be significantly larger than a single alert - // batch could be. - if d := len(alerts) - n.opts.QueueCapacity; d > 0 { - alerts = alerts[d:] - - level.Warn(n.logger).Log("msg", "Alert batch larger than queue capacity, dropping alerts", "num_dropped", d) - //n.metrics.dropped.Add(float64(d)) - } - - // If the queue is full, remove the oldest alerts in favor - // of newer ones. - if d := (len(n.queue) + len(alerts)) - n.opts.QueueCapacity; d > 0 { - n.queue = n.queue[d:] - - level.Warn(n.logger).Log("msg", "Alert notification queue full, dropping alerts", "num_dropped", d) - //n.metrics.dropped.Add(float64(d)) - } - n.queue = append(n.queue, alerts...) - - // Notify sending goroutine that there are alerts to be processed. - n.setMore() -} - -// setMore signals that the alert queue has items. -func (n *Notifier) setMore() { - // If we cannot send on the channel, it means the signal already exists - // and has not been consumed yet. - select { - case n.more <- struct{}{}: - default: - } -} - -// Alertmanagers returns a slice of Alertmanager URLs. -func (n *Notifier) Alertmanagers() []*url.URL { - n.mtx.RLock() - amset := n.alertmanagers - n.mtx.RUnlock() - - var res []*url.URL - - amset.mtx.RLock() - for _, am := range amset.ams { - res = append(res, am.URLPath(alertPushEndpoint)) - } - amset.mtx.RUnlock() - - return res -} - -// sendAll sends the alerts to all configured Alertmanagers concurrently. -// It returns true if the alerts could be sent successfully to at least one Alertmanager. -func (n *Notifier) sendAll(alerts ...*Alert) bool { - - b, err := json.Marshal(alerts) - if err != nil { - zap.L().Error("Encoding alerts failed", zap.Error(err)) - return false - } - - n.mtx.RLock() - ams := n.alertmanagers - n.mtx.RUnlock() - - var ( - wg sync.WaitGroup - numSuccess uint64 - ) - - ams.mtx.RLock() - - for _, am := range ams.ams { - wg.Add(1) - - ctx, cancel := context.WithTimeout(n.ctx, time.Duration(ams.timeout)) - defer cancel() - - go func(ams *alertmanagerSet, am Manager) { - u := am.URLPath(alertPushEndpoint).String() - if err := n.sendOne(ctx, ams.client, u, b); err != nil { - zap.L().Error("Error calling alert API", zap.String("alertmanager", u), zap.Int("count", len(alerts)), zap.Error(err)) - } else { - atomic.AddUint64(&numSuccess, 1) - } - // n.metrics.latency.WithLabelValues(u).Observe(time.Since(begin).Seconds()) - // n.metrics.sent.WithLabelValues(u).Add(float64(len(alerts))) - - wg.Done() - }(ams, am) - } - ams.mtx.RUnlock() - - wg.Wait() - - return numSuccess > 0 -} - -func (n *Notifier) sendOne(ctx context.Context, c *http.Client, url string, b []byte) error { - req, err := http.NewRequest("POST", url, bytes.NewReader(b)) - if err != nil { - return err - } - req.Header.Set("Content-Type", contentTypeJSON) - resp, err := n.opts.Do(ctx, c, req) - if err != nil { - return err - } - defer resp.Body.Close() - - // Any HTTP status 2xx is OK. - if resp.StatusCode/100 != 2 { - return fmt.Errorf("bad response status %v", resp.Status) - } - return err -} - -// Stop shuts down the notification handler. -func (n *Notifier) Stop() { - level.Info(n.logger).Log("msg", "Stopping notification manager...") - n.cancel() -} - -// alertmanagerSet contains a set of Alertmanagers discovered via a group of service -// discovery definitions that have a common configuration on how alerts should be sent. -type alertmanagerSet struct { - urls []string - client *http.Client - timeout time.Duration - mtx sync.RWMutex - ams []Manager - - logger log.Logger -} - -func newAlertmanagerSet(urls []string, timeout time.Duration, logger log.Logger) (*alertmanagerSet, error) { - client := &http.Client{} - - s := &alertmanagerSet{ - client: client, - urls: urls, - logger: logger, - timeout: timeout, - } - - ams := []Manager{} - for _, u := range urls { - am, err := New(WithURL(u)) - if err != nil { - level.Error(s.logger).Log(fmt.Sprintf("invalid alert manager url %s: %s", u, err)) - } else { - ams = append(ams, am) - } - } - if len(ams) == 0 { - return s, fmt.Errorf("no alert managers") - } - s.ams = ams - return s, nil -} diff --git a/pkg/query-service/main.go b/pkg/query-service/main.go index 55c00a54c5..2e38c37343 100644 --- a/pkg/query-service/main.go +++ b/pkg/query-service/main.go @@ -4,8 +4,6 @@ import ( "context" "flag" "os" - "os/signal" - "syscall" "time" prommodel "github.com/prometheus/common/model" @@ -94,7 +92,14 @@ func main() { zap.L().Fatal("Failed to create config", zap.Error(err)) } - signoz, err := signoz.New(context.Background(), config, signoz.NewProviderConfig()) + signoz, err := signoz.New( + context.Background(), + config, + signoz.NewCacheProviderFactories(), + signoz.NewWebProviderFactories(), + signoz.NewSQLStoreProviderFactories(), + signoz.NewTelemetryStoreProviderFactories(), + ) if err != nil { zap.L().Fatal("Failed to create signoz struct", zap.Error(err)) } @@ -142,22 +147,20 @@ func main() { logger.Fatal("Failed to initialize auth cache", zap.Error(err)) } - signalsChannel := make(chan os.Signal, 1) - signal.Notify(signalsChannel, os.Interrupt, syscall.SIGTERM) + signoz.Start(context.Background()) - for { - select { - case status := <-server.HealthCheckStatus(): - logger.Info("Received HealthCheck status: ", zap.Int("status", int(status))) - case <-signalsChannel: - logger.Info("Received OS Interrupt Signal ... ") - err := server.Stop() - if err != nil { - logger.Fatal("Failed to stop server", zap.Error(err)) - } - logger.Info("Server stopped") - return - } + if err := signoz.Wait(context.Background()); err != nil { + zap.L().Fatal("Failed to start signoz", zap.Error(err)) + } + + err = server.Stop() + if err != nil { + zap.L().Fatal("Failed to stop server", zap.Error(err)) + } + + err = signoz.Stop(context.Background()) + if err != nil { + zap.L().Fatal("Failed to stop signoz", zap.Error(err)) } } diff --git a/pkg/query-service/rules/api_params.go b/pkg/query-service/rules/api_params.go index e4c76a6d71..fcc8a37742 100644 --- a/pkg/query-service/rules/api_params.go +++ b/pkg/query-service/rules/api_params.go @@ -80,7 +80,6 @@ func parsePostableRule(content []byte, kind RuleDataKind) (*PostableRule, error) // parseIntoRule loads the content (data) into PostableRule and also // validates the end result func parseIntoRule(initRule PostableRule, content []byte, kind RuleDataKind) (*PostableRule, error) { - rule := &initRule var err error diff --git a/pkg/query-service/rules/base_rule.go b/pkg/query-service/rules/base_rule.go index 466cba83fd..00c4f0819a 100644 --- a/pkg/query-service/rules/base_rule.go +++ b/pkg/query-service/rules/base_rule.go @@ -13,6 +13,7 @@ import ( "go.signoz.io/signoz/pkg/query-service/model" v3 "go.signoz.io/signoz/pkg/query-service/model/v3" qslabels "go.signoz.io/signoz/pkg/query-service/utils/labels" + "go.signoz.io/signoz/pkg/sqlstore" "go.uber.org/zap" ) @@ -78,6 +79,8 @@ type BaseRule struct { // querying the v4 table on low cardinal temporality column // should be fast but we can still avoid the query if we have the data in memory TemporalityMap map[string]map[v3.Temporality]bool + + sqlstore sqlstore.SQLStore } type RuleOption func(*BaseRule) @@ -106,6 +109,12 @@ func WithLogger(logger *zap.Logger) RuleOption { } } +func WithSQLStore(sqlstore sqlstore.SQLStore) RuleOption { + return func(r *BaseRule) { + r.sqlstore = sqlstore + } +} + func NewBaseRule(id string, p *PostableRule, reader interfaces.Reader, opts ...RuleOption) (*BaseRule, error) { if p.RuleCondition == nil || !p.RuleCondition.IsValid() { return nil, fmt.Errorf("invalid rule condition") @@ -309,6 +318,20 @@ func (r *BaseRule) ActiveAlerts() []*Alert { } func (r *BaseRule) SendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, interval time.Duration, notifyFunc NotifyFunc) { + var orgID string + err := r. + sqlstore. + BunDB(). + NewSelect(). + Table("organizations"). + ColumnExpr("id"). + Limit(1). + Scan(ctx, &orgID) + if err != nil { + r.logger.Error("failed to get org ids", zap.Error(err)) + return + } + alerts := []*Alert{} r.ForEachActiveAlert(func(alert *Alert) { if alert.needsSending(ts, resendDelay) { @@ -322,7 +345,7 @@ func (r *BaseRule) SendAlerts(ctx context.Context, ts time.Time, resendDelay tim alerts = append(alerts, &anew) } }) - notifyFunc(ctx, "", alerts...) + notifyFunc(ctx, orgID, "", alerts...) } func (r *BaseRule) ForEachActiveAlert(f func(*Alert)) { diff --git a/pkg/query-service/rules/db.go b/pkg/query-service/rules/db.go index f3f89f0156..c420e45b92 100644 --- a/pkg/query-service/rules/db.go +++ b/pkg/query-service/rules/db.go @@ -11,30 +11,24 @@ import ( "github.com/jmoiron/sqlx" "github.com/pkg/errors" - "go.signoz.io/signoz/pkg/query-service/common" - am "go.signoz.io/signoz/pkg/query-service/integrations/alertManager" + "github.com/uptrace/bun" "go.signoz.io/signoz/pkg/query-service/model" v3 "go.signoz.io/signoz/pkg/query-service/model/v3" + "go.signoz.io/signoz/pkg/sqlstore" "go.signoz.io/signoz/pkg/types/authtypes" "go.uber.org/zap" ) // Data store to capture user alert rule settings type RuleDB interface { - GetChannel(id string) (*model.ChannelItem, *model.ApiError) - GetChannels() (*[]model.ChannelItem, *model.ApiError) - DeleteChannel(id string) *model.ApiError - CreateChannel(receiver *am.Receiver) (*am.Receiver, *model.ApiError) - EditChannel(receiver *am.Receiver, id string) (*am.Receiver, *model.ApiError) - - // CreateRuleTx stores rule in the db and returns tx and group name (on success) - CreateRuleTx(ctx context.Context, rule string) (int64, Tx, error) + // CreateRule stores rule in the db and returns tx and group name (on success) + CreateRule(context.Context, *StoredRule, func(context.Context, int64) error) (int64, error) // EditRuleTx updates the given rule in the db and returns tx and group name (on success) - EditRuleTx(ctx context.Context, rule string, id string) (string, Tx, error) + EditRule(context.Context, *StoredRule, func(context.Context) error) error // DeleteRuleTx deletes the given rule in the db and returns tx and group name (on success) - DeleteRuleTx(ctx context.Context, id string) (string, Tx, error) + DeleteRule(context.Context, string, func(context.Context) error) error // GetStoredRules fetches the rule definitions from db GetStoredRules(ctx context.Context) ([]StoredRule, error) @@ -62,142 +56,83 @@ type RuleDB interface { } type StoredRule struct { - Id int `json:"id" db:"id"` - CreatedAt *time.Time `json:"created_at" db:"created_at"` - CreatedBy *string `json:"created_by" db:"created_by"` - UpdatedAt *time.Time `json:"updated_at" db:"updated_at"` - UpdatedBy *string `json:"updated_by" db:"updated_by"` - Data string `json:"data" db:"data"` -} + bun.BaseModel `bun:"rules"` -type Tx interface { - Commit() error - Rollback() error + Id int `json:"id" db:"id" bun:"id,pk,autoincrement"` + CreatedAt *time.Time `json:"created_at" db:"created_at" bun:"created_at"` + CreatedBy *string `json:"created_by" db:"created_by" bun:"created_by"` + UpdatedAt *time.Time `json:"updated_at" db:"updated_at" bun:"updated_at"` + UpdatedBy *string `json:"updated_by" db:"updated_by" bun:"updated_by"` + Data string `json:"data" db:"data" bun:"data"` } type ruleDB struct { *sqlx.DB - alertManager am.Manager + sqlstore sqlstore.SQLStore } -// todo: move init methods for creating tables - -func NewRuleDB(db *sqlx.DB, alertManager am.Manager) RuleDB { - return &ruleDB{ - db, - alertManager, - } +func NewRuleDB(db *sqlx.DB, sqlstore sqlstore.SQLStore) RuleDB { + return &ruleDB{db, sqlstore} } -// CreateRuleTx stores a given rule in db and returns task name, -// sql tx and error (if any) -func (r *ruleDB) CreateRuleTx(ctx context.Context, rule string) (int64, Tx, error) { - var lastInsertId int64 +// CreateRule stores a given rule in db and returns task name and error (if any) +func (r *ruleDB) CreateRule(ctx context.Context, storedRule *StoredRule, cb func(context.Context, int64) error) (int64, error) { + err := r.sqlstore.RunInTxCtx(ctx, nil, func(ctx context.Context) error { + _, err := r.sqlstore. + BunDBCtx(ctx). + NewInsert(). + Model(storedRule). + Exec(ctx) + if err != nil { + return err + } + + return cb(ctx, int64(storedRule.Id)) + }) - var userEmail string - if user := common.GetUserFromContext(ctx); user != nil { - userEmail = user.Email - } - createdAt := time.Now() - updatedAt := time.Now() - tx, err := r.Begin() if err != nil { - return lastInsertId, nil, err + return 0, err } - stmt, err := tx.Prepare(`INSERT into rules (created_at, created_by, updated_at, updated_by, data) VALUES($1,$2,$3,$4,$5);`) - if err != nil { - zap.L().Error("Error in preparing statement for INSERT to rules", zap.Error(err)) - tx.Rollback() - return lastInsertId, nil, err - } - - defer stmt.Close() - - result, err := stmt.Exec(createdAt, userEmail, updatedAt, userEmail, rule) - if err != nil { - zap.L().Error("Error in Executing prepared statement for INSERT to rules", zap.Error(err)) - tx.Rollback() // return an error too, we may want to wrap them - return lastInsertId, nil, err - } - - lastInsertId, err = result.LastInsertId() - if err != nil { - zap.L().Error("Error in getting last insert id for INSERT to rules\n", zap.Error(err)) - tx.Rollback() // return an error too, we may want to wrap them - return lastInsertId, nil, err - } - - return lastInsertId, tx, nil + return int64(storedRule.Id), nil } -// EditRuleTx stores a given rule string in database and returns -// task name, sql tx and error (if any) -func (r *ruleDB) EditRuleTx(ctx context.Context, rule string, id string) (string, Tx, error) { +// EditRule stores a given rule string in database and returns task name and error (if any) +func (r *ruleDB) EditRule(ctx context.Context, storedRule *StoredRule, cb func(context.Context) error) error { + return r.sqlstore.RunInTxCtx(ctx, nil, func(ctx context.Context) error { + _, err := r.sqlstore. + BunDBCtx(ctx). + NewUpdate(). + Model(storedRule). + WherePK(). + Exec(ctx) + if err != nil { + return err + } - var groupName string - idInt, _ := strconv.Atoi(id) - if idInt == 0 { - return groupName, nil, fmt.Errorf("failed to read alert id from parameters") - } - - var userEmail string - if user := common.GetUserFromContext(ctx); user != nil { - userEmail = user.Email - } - updatedAt := time.Now() - groupName = prepareTaskName(int64(idInt)) - - // todo(amol): resolve this error - database locked when using - // edit transaction with sqlx - // tx, err := r.Begin() - //if err != nil { - // return groupName, tx, err - //} - stmt, err := r.Prepare(`UPDATE rules SET updated_by=$1, updated_at=$2, data=$3 WHERE id=$4;`) - if err != nil { - zap.L().Error("Error in preparing statement for UPDATE to rules", zap.Error(err)) - // tx.Rollback() - return groupName, nil, err - } - defer stmt.Close() - - if _, err := stmt.Exec(userEmail, updatedAt, rule, idInt); err != nil { - zap.L().Error("Error in Executing prepared statement for UPDATE to rules", zap.Error(err)) - // tx.Rollback() // return an error too, we may want to wrap them - return groupName, nil, err - } - return groupName, nil, nil + return cb(ctx) + }) } -// DeleteRuleTx deletes a given rule with id and returns -// taskname, sql tx and error (if any) -func (r *ruleDB) DeleteRuleTx(ctx context.Context, id string) (string, Tx, error) { +// DeleteRule deletes a given rule with id and returns taskname and error (if any) +func (r *ruleDB) DeleteRule(ctx context.Context, id string, cb func(context.Context) error) error { + if err := r.sqlstore.RunInTxCtx(ctx, nil, func(ctx context.Context) error { + _, err := r.sqlstore. + BunDBCtx(ctx). + NewDelete(). + Model(&StoredRule{}). + Where("id = ?", id). + Exec(ctx) + if err != nil { + return err + } - idInt, _ := strconv.Atoi(id) - groupName := prepareTaskName(int64(idInt)) - - // commented as this causes db locked error - // tx, err := r.Begin() - // if err != nil { - // return groupName, tx, err - // } - - stmt, err := r.Prepare(`DELETE FROM rules WHERE id=$1;`) - - if err != nil { - return groupName, nil, err + return cb(ctx) + }); err != nil { + return err } - defer stmt.Close() - - if _, err := stmt.Exec(idInt); err != nil { - zap.L().Error("Error in Executing prepared statement for DELETE to rules", zap.Error(err)) - // tx.Rollback() - return groupName, nil, err - } - - return groupName, nil, nil + return nil } func (r *ruleDB) GetStoredRules(ctx context.Context) ([]StoredRule, error) { @@ -320,114 +255,7 @@ func (r *ruleDB) EditPlannedMaintenance(ctx context.Context, maintenance Planned return "", nil } -func getChannelType(receiver *am.Receiver) string { - - if receiver.EmailConfigs != nil { - return "email" - } - if receiver.OpsGenieConfigs != nil { - return "opsgenie" - } - if receiver.PagerdutyConfigs != nil { - return "pagerduty" - } - if receiver.PushoverConfigs != nil { - return "pushover" - } - if receiver.SNSConfigs != nil { - return "sns" - } - if receiver.SlackConfigs != nil { - return "slack" - } - if receiver.VictorOpsConfigs != nil { - return "victorops" - } - if receiver.WebhookConfigs != nil { - return "webhook" - } - if receiver.WechatConfigs != nil { - return "wechat" - } - if receiver.MSTeamsConfigs != nil { - return "msteams" - } - return "" -} - -func (r *ruleDB) GetChannel(id string) (*model.ChannelItem, *model.ApiError) { - - idInt, _ := strconv.Atoi(id) - channel := model.ChannelItem{} - - query := "SELECT id, created_at, updated_at, name, type, data data FROM notification_channels WHERE id=?;" - - stmt, err := r.Preparex(query) - - if err != nil { - zap.L().Error("Error in preparing sql query for GetChannel", zap.Error(err)) - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - err = stmt.Get(&channel, idInt) - - if err != nil { - zap.L().Error("Error in getting channel with id", zap.Int("id", idInt), zap.Error(err)) - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - return &channel, nil -} - -func (r *ruleDB) DeleteChannel(id string) *model.ApiError { - - idInt, _ := strconv.Atoi(id) - - channelToDelete, apiErrorObj := r.GetChannel(id) - - if apiErrorObj != nil { - return apiErrorObj - } - - tx, err := r.Begin() - if err != nil { - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - { - stmt, err := tx.Prepare(`DELETE FROM notification_channels WHERE id=$1;`) - if err != nil { - zap.L().Error("Error in preparing statement for INSERT to notification_channels", zap.Error(err)) - tx.Rollback() - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - defer stmt.Close() - - if _, err := stmt.Exec(idInt); err != nil { - zap.L().Error("Error in Executing prepared statement for INSERT to notification_channels", zap.Error(err)) - tx.Rollback() // return an error too, we may want to wrap them - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - } - - apiError := r.alertManager.DeleteRoute(channelToDelete.Name) - if apiError != nil { - tx.Rollback() - return apiError - } - - err = tx.Commit() - if err != nil { - zap.L().Error("Error in committing transaction for DELETE command to notification_channels", zap.Error(err)) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - return nil - -} - -func (r *ruleDB) GetChannels() (*[]model.ChannelItem, *model.ApiError) { - +func (r *ruleDB) getChannels() (*[]model.ChannelItem, *model.ApiError) { channels := []model.ChannelItem{} query := "SELECT id, created_at, updated_at, name, type, data data FROM notification_channels" @@ -442,105 +270,6 @@ func (r *ruleDB) GetChannels() (*[]model.ChannelItem, *model.ApiError) { } return &channels, nil - -} - -func (r *ruleDB) EditChannel(receiver *am.Receiver, id string) (*am.Receiver, *model.ApiError) { - - idInt, _ := strconv.Atoi(id) - - channel, apiErrObj := r.GetChannel(id) - - if apiErrObj != nil { - return nil, apiErrObj - } - if channel.Name != receiver.Name { - return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("channel name cannot be changed")} - } - - tx, err := r.Begin() - if err != nil { - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - channel_type := getChannelType(receiver) - - receiverString, _ := json.Marshal(receiver) - - { - stmt, err := tx.Prepare(`UPDATE notification_channels SET updated_at=$1, type=$2, data=$3 WHERE id=$4;`) - - if err != nil { - zap.L().Error("Error in preparing statement for UPDATE to notification_channels", zap.Error(err)) - tx.Rollback() - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - defer stmt.Close() - - if _, err := stmt.Exec(time.Now(), channel_type, string(receiverString), idInt); err != nil { - zap.L().Error("Error in Executing prepared statement for UPDATE to notification_channels", zap.Error(err)) - tx.Rollback() // return an error too, we may want to wrap them - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - } - - apiError := r.alertManager.EditRoute(receiver) - if apiError != nil { - tx.Rollback() - return nil, apiError - } - - err = tx.Commit() - if err != nil { - zap.L().Error("Error in committing transaction for INSERT to notification_channels", zap.Error(err)) - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - return receiver, nil - -} - -func (r *ruleDB) CreateChannel(receiver *am.Receiver) (*am.Receiver, *model.ApiError) { - - channel_type := getChannelType(receiver) - - receiverString, _ := json.Marshal(receiver) - - tx, err := r.Begin() - if err != nil { - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - { - stmt, err := tx.Prepare(`INSERT INTO notification_channels (created_at, updated_at, name, type, data) VALUES($1,$2,$3,$4,$5);`) - if err != nil { - zap.L().Error("Error in preparing statement for INSERT to notification_channels", zap.Error(err)) - tx.Rollback() - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - defer stmt.Close() - - if _, err := stmt.Exec(time.Now(), time.Now(), receiver.Name, channel_type, string(receiverString)); err != nil { - zap.L().Error("Error in Executing prepared statement for INSERT to notification_channels", zap.Error(err)) - tx.Rollback() // return an error too, we may want to wrap them - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - } - - apiError := r.alertManager.AddRoute(receiver) - if apiError != nil { - tx.Rollback() - return nil, apiError - } - - err = tx.Commit() - if err != nil { - zap.L().Error("Error in committing transaction for INSERT to notification_channels", zap.Error(err)) - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - return receiver, nil - } func (r *ruleDB) GetAlertsInfo(ctx context.Context) (*model.AlertsInfo, error) { @@ -629,7 +358,7 @@ func (r *ruleDB) GetAlertsInfo(ctx context.Context) (*model.AlertsInfo, error) { } alertsInfo.AlertNames = alertNames - channels, _ := r.GetChannels() + channels, _ := r.getChannels() if channels != nil { alertsInfo.TotalChannels = len(*channels) for _, channel := range *channels { diff --git a/pkg/query-service/rules/manager.go b/pkg/query-service/rules/manager.go index d5e197ffc6..3935424efc 100644 --- a/pkg/query-service/rules/manager.go +++ b/pkg/query-service/rules/manager.go @@ -14,41 +14,45 @@ import ( "errors" + "github.com/go-openapi/strfmt" "github.com/jmoiron/sqlx" + "go.signoz.io/signoz/pkg/alertmanager" "go.signoz.io/signoz/pkg/query-service/cache" - am "go.signoz.io/signoz/pkg/query-service/integrations/alertManager" "go.signoz.io/signoz/pkg/query-service/interfaces" "go.signoz.io/signoz/pkg/query-service/model" pqle "go.signoz.io/signoz/pkg/query-service/pqlEngine" "go.signoz.io/signoz/pkg/query-service/telemetry" + "go.signoz.io/signoz/pkg/sqlstore" + "go.signoz.io/signoz/pkg/types/alertmanagertypes" + "go.signoz.io/signoz/pkg/types/authtypes" ) type PrepareTaskOptions struct { - Rule *PostableRule - TaskName string - RuleDB RuleDB - Logger *zap.Logger - Reader interfaces.Reader - Cache cache.Cache - FF interfaces.FeatureLookup - ManagerOpts *ManagerOptions - NotifyFunc NotifyFunc - + Rule *PostableRule + TaskName string + RuleDB RuleDB + Logger *zap.Logger + Reader interfaces.Reader + Cache cache.Cache + FF interfaces.FeatureLookup + ManagerOpts *ManagerOptions + NotifyFunc NotifyFunc + SQLStore sqlstore.SQLStore UseLogsNewSchema bool UseTraceNewSchema bool } type PrepareTestRuleOptions struct { - Rule *PostableRule - RuleDB RuleDB - Logger *zap.Logger - Reader interfaces.Reader - Cache cache.Cache - FF interfaces.FeatureLookup - ManagerOpts *ManagerOptions - NotifyFunc NotifyFunc - + Rule *PostableRule + RuleDB RuleDB + Logger *zap.Logger + Reader interfaces.Reader + Cache cache.Cache + FF interfaces.FeatureLookup + ManagerOpts *ManagerOptions + NotifyFunc NotifyFunc + SQLStore sqlstore.SQLStore UseLogsNewSchema bool UseTraceNewSchema bool } @@ -72,8 +76,7 @@ func prepareTaskName(ruleId interface{}) string { // ManagerOptions bundles options for the Manager. type ManagerOptions struct { - NotifierOpts am.NotifierOptions - PqlEngine *pqle.PqlEngine + PqlEngine *pqle.PqlEngine // RepoURL is used to generate a backlink in sent alert messages RepoURL string @@ -96,6 +99,8 @@ type ManagerOptions struct { UseLogsNewSchema bool UseTraceNewSchema bool PrepareTestRuleFunc func(opts PrepareTestRuleOptions) (int, *model.ApiError) + Alertmanager alertmanager.Alertmanager + SQLStore sqlstore.SQLStore } // The Manager manages recording and alerting rules. @@ -105,9 +110,6 @@ type Manager struct { rules map[string]Rule mtx sync.RWMutex block chan struct{} - // Notifier sends messages through alert manager - notifier *am.Notifier - // datastore to store alert definitions ruleDB RuleDB @@ -121,15 +123,12 @@ type Manager struct { UseLogsNewSchema bool UseTraceNewSchema bool + + alertmanager alertmanager.Alertmanager + sqlstore sqlstore.SQLStore } func defaultOptions(o *ManagerOptions) *ManagerOptions { - if o.NotifierOpts.QueueCapacity == 0 { - o.NotifierOpts.QueueCapacity = 10000 - } - if o.NotifierOpts.Timeout == 0 { - o.NotifierOpts.Timeout = 10 * time.Second - } if o.ResendDelay == time.Duration(0) { o.ResendDelay = 1 * time.Minute } @@ -161,6 +160,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) { opts.UseLogsNewSchema, opts.UseTraceNewSchema, WithEvalDelay(opts.ManagerOpts.EvalDelay), + WithSQLStore(opts.SQLStore), ) if err != nil { @@ -181,6 +181,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) { opts.Logger, opts.Reader, opts.ManagerOpts.PqlEngine, + WithSQLStore(opts.SQLStore), ) if err != nil { @@ -202,30 +203,12 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) { // NewManager returns an implementation of Manager, ready to be started // by calling the Run method. func NewManager(o *ManagerOptions) (*Manager, error) { - o = defaultOptions(o) - // here we just initiate notifier, it will be started - // in run() - notifier, err := am.NewNotifier(&o.NotifierOpts, nil) - if err != nil { - // todo(amol): rethink on this, the query service - // should not be down because alert manager is not available - return nil, err - } - - amManager, err := am.New() - if err != nil { - return nil, err - } - - db := NewRuleDB(o.DBConn, amManager) - + db := NewRuleDB(o.DBConn, o.SQLStore) telemetry.GetInstance().SetAlertsInfoCallback(db.GetAlertsInfo) - m := &Manager{ tasks: map[string]Task{}, rules: map[string]Rule{}, - notifier: notifier, ruleDB: db, opts: o, block: make(chan struct{}), @@ -235,7 +218,10 @@ func NewManager(o *ManagerOptions) (*Manager, error) { cache: o.Cache, prepareTaskFunc: o.PrepareTaskFunc, prepareTestRuleFunc: o.PrepareTestRuleFunc, + alertmanager: o.Alertmanager, + sqlstore: o.SQLStore, } + return m, nil } @@ -309,9 +295,6 @@ func (m *Manager) initiate() error { // Run starts processing of the rule manager. func (m *Manager) run() { - // initiate notifier - go m.notifier.Run() - // initiate blocked tasks close(m.block) } @@ -333,26 +316,65 @@ func (m *Manager) Stop() { // EditRuleDefinition writes the rule definition to the // datastore and also updates the rule executor func (m *Manager) EditRule(ctx context.Context, ruleStr string, id string) error { + claims, ok := authtypes.ClaimsFromContext(ctx) + if !ok { + return errors.New("claims not found in context") + } parsedRule, err := ParsePostableRule([]byte(ruleStr)) - if err != nil { return err } - taskName, _, err := m.ruleDB.EditRuleTx(ctx, ruleStr, id) + existingRule, err := m.ruleDB.GetStoredRule(ctx, id) if err != nil { return err } - if !m.opts.DisableRules { - err = m.syncRuleStateWithTask(taskName, parsedRule) + now := time.Now() + existingRule.UpdatedAt = &now + existingRule.UpdatedBy = &claims.Email + existingRule.Data = ruleStr + + return m.ruleDB.EditRule(ctx, existingRule, func(ctx context.Context) error { + cfg, err := m.alertmanager.GetConfig(ctx, claims.OrgID) if err != nil { return err } - } - return nil + var preferredChannels []string + if len(parsedRule.PreferredChannels) == 0 { + channels, err := m.alertmanager.ListChannels(ctx, claims.OrgID) + if err != nil { + return err + } + + for _, channel := range channels { + preferredChannels = append(preferredChannels, channel.Name) + } + } else { + preferredChannels = parsedRule.PreferredChannels + } + + err = cfg.UpdateRuleIDMatcher(id, preferredChannels) + if err != nil { + return err + } + + err = m.alertmanager.SetConfig(ctx, cfg) + if err != nil { + return err + } + + if !m.opts.DisableRules { + err = m.syncRuleStateWithTask(prepareTaskName(existingRule.Id), parsedRule) + if err != nil { + return err + } + } + + return nil + }) } func (m *Manager) editTask(rule *PostableRule, taskName string) error { @@ -371,6 +393,7 @@ func (m *Manager) editTask(rule *PostableRule, taskName string) error { FF: m.featureFlags, ManagerOpts: m.opts, NotifyFunc: m.prepareNotifyFunc(), + SQLStore: m.sqlstore, UseLogsNewSchema: m.opts.UseLogsNewSchema, UseTraceNewSchema: m.opts.UseTraceNewSchema, @@ -411,24 +434,45 @@ func (m *Manager) editTask(rule *PostableRule, taskName string) error { } func (m *Manager) DeleteRule(ctx context.Context, id string) error { - - idInt, err := strconv.Atoi(id) + _, err := strconv.Atoi(id) if err != nil { zap.L().Error("delete rule received an rule id in invalid format, must be a number", zap.String("id", id), zap.Error(err)) return fmt.Errorf("delete rule received an rule id in invalid format, must be a number") } - taskName := prepareTaskName(int64(idInt)) - if !m.opts.DisableRules { - m.deleteTask(taskName) + claims, ok := authtypes.ClaimsFromContext(ctx) + if !ok { + return errors.New("claims not found in context") } - if _, _, err := m.ruleDB.DeleteRuleTx(ctx, id); err != nil { - zap.L().Error("failed to delete the rule from rule db", zap.String("id", id), zap.Error(err)) + _, err = m.ruleDB.GetStoredRule(ctx, id) + if err != nil { return err } - return nil + return m.ruleDB.DeleteRule(ctx, id, func(ctx context.Context) error { + cfg, err := m.alertmanager.GetConfig(ctx, claims.OrgID) + if err != nil { + return err + } + + err = cfg.DeleteRuleIDMatcher(id) + if err != nil { + return err + } + + err = m.alertmanager.SetConfig(ctx, cfg) + if err != nil { + return err + } + + taskName := prepareTaskName(id) + if !m.opts.DisableRules { + m.deleteTask(taskName) + } + + return nil + }) } func (m *Manager) deleteTask(taskName string) { @@ -451,32 +495,71 @@ func (m *Manager) deleteTask(taskName string) { // starts an executor for the rule func (m *Manager) CreateRule(ctx context.Context, ruleStr string) (*GettableRule, error) { parsedRule, err := ParsePostableRule([]byte(ruleStr)) - if err != nil { return nil, err } - lastInsertId, tx, err := m.ruleDB.CreateRuleTx(ctx, ruleStr) - taskName := prepareTaskName(lastInsertId) - if err != nil { - return nil, err + claims, ok := authtypes.ClaimsFromContext(ctx) + if !ok { + return nil, errors.New("claims not found in context") } - if !m.opts.DisableRules { - if err := m.addTask(parsedRule, taskName); err != nil { - tx.Rollback() - return nil, err + + now := time.Now() + storedRule := &StoredRule{ + CreatedAt: &now, + CreatedBy: &claims.Email, + UpdatedAt: &now, + UpdatedBy: &claims.Email, + Data: ruleStr, + } + + id, err := m.ruleDB.CreateRule(ctx, storedRule, func(ctx context.Context, id int64) error { + cfg, err := m.alertmanager.GetConfig(ctx, claims.OrgID) + if err != nil { + return err } - } - err = tx.Commit() + + var preferredChannels []string + if len(parsedRule.PreferredChannels) == 0 { + channels, err := m.alertmanager.ListChannels(ctx, claims.OrgID) + if err != nil { + return err + } + + for _, channel := range channels { + preferredChannels = append(preferredChannels, channel.Name) + } + } else { + preferredChannels = parsedRule.PreferredChannels + } + + err = cfg.CreateRuleIDMatcher(fmt.Sprintf("%d", id), preferredChannels) + if err != nil { + return err + } + + err = m.alertmanager.SetConfig(ctx, cfg) + if err != nil { + return err + } + + taskName := prepareTaskName(id) + if !m.opts.DisableRules { + if err := m.addTask(parsedRule, taskName); err != nil { + return err + } + } + + return nil + }) if err != nil { return nil, err } - gettableRule := &GettableRule{ - Id: fmt.Sprintf("%d", lastInsertId), + return &GettableRule{ + Id: fmt.Sprintf("%d", id), PostableRule: *parsedRule, - } - return gettableRule, nil + }, nil } func (m *Manager) addTask(rule *PostableRule, taskName string) error { @@ -494,6 +577,7 @@ func (m *Manager) addTask(rule *PostableRule, taskName string) error { FF: m.featureFlags, ManagerOpts: m.opts, NotifyFunc: m.prepareNotifyFunc(), + SQLStore: m.sqlstore, UseLogsNewSchema: m.opts.UseLogsNewSchema, UseTraceNewSchema: m.opts.UseTraceNewSchema, @@ -594,12 +678,12 @@ func (m *Manager) TriggeredAlerts() []*NamedAlert { } // NotifyFunc sends notifications about a set of alerts generated by the given expression. -type NotifyFunc func(ctx context.Context, expr string, alerts ...*Alert) +type NotifyFunc func(ctx context.Context, orgID string, expr string, alerts ...*Alert) // prepareNotifyFunc implements the NotifyFunc for a Notifier. func (m *Manager) prepareNotifyFunc() NotifyFunc { - return func(ctx context.Context, expr string, alerts ...*Alert) { - var res []*am.Alert + return func(ctx context.Context, orgID string, expr string, alerts ...*Alert) { + var res []*alertmanagertypes.PostableAlert for _, alert := range alerts { generatorURL := alert.GeneratorURL @@ -607,27 +691,71 @@ func (m *Manager) prepareNotifyFunc() NotifyFunc { generatorURL = m.opts.RepoURL } - a := &am.Alert{ - StartsAt: alert.FiredAt, - Labels: alert.Labels, - Annotations: alert.Annotations, - GeneratorURL: generatorURL, - Receivers: alert.Receivers, + a := &alertmanagertypes.PostableAlert{ + Annotations: alert.Annotations.Map(), + StartsAt: strfmt.DateTime(alert.FiredAt), + Alert: alertmanagertypes.AlertModel{ + Labels: alert.Labels.Map(), + GeneratorURL: strfmt.URI(generatorURL), + }, } if !alert.ResolvedAt.IsZero() { - a.EndsAt = alert.ResolvedAt + a.EndsAt = strfmt.DateTime(alert.ResolvedAt) } else { - a.EndsAt = alert.ValidUntil + a.EndsAt = strfmt.DateTime(alert.ValidUntil) } + res = append(res, a) } if len(alerts) > 0 { - m.notifier.Send(res...) + m.alertmanager.PutAlerts(ctx, orgID, res) } } } +func (m *Manager) prepareTestNotifyFunc() NotifyFunc { + return func(ctx context.Context, orgID string, expr string, alerts ...*Alert) { + if len(alerts) == 0 { + return + } + + alert := alerts[0] + generatorURL := alert.GeneratorURL + if generatorURL == "" { + generatorURL = m.opts.RepoURL + } + + a := &alertmanagertypes.PostableAlert{ + Annotations: alert.Annotations.Map(), + StartsAt: strfmt.DateTime(alert.FiredAt), + Alert: alertmanagertypes.AlertModel{ + Labels: alert.Labels.Map(), + GeneratorURL: strfmt.URI(generatorURL), + }, + } + if !alert.ResolvedAt.IsZero() { + a.EndsAt = strfmt.DateTime(alert.ResolvedAt) + } else { + a.EndsAt = strfmt.DateTime(alert.ValidUntil) + } + + if len(alert.Receivers) == 0 { + channels, err := m.alertmanager.ListChannels(ctx, orgID) + if err != nil { + zap.L().Error("failed to list channels while sending test notification", zap.Error(err)) + return + } + + for _, channel := range channels { + alert.Receivers = append(alert.Receivers, channel.Name) + } + } + + m.alertmanager.TestAlert(ctx, orgID, a, alert.Receivers) + } +} + func (m *Manager) ListActiveRules() ([]Rule, error) { ruleList := []Rule{} @@ -736,6 +864,10 @@ func (m *Manager) syncRuleStateWithTask(taskName string, rule *PostableRule) err // - re-deploy or undeploy task as necessary // - update the patched rule in the DB func (m *Manager) PatchRule(ctx context.Context, ruleStr string, ruleId string) (*GettableRule, error) { + claims, ok := authtypes.ClaimsFromContext(ctx) + if !ok { + return nil, errors.New("claims not found in context") + } if ruleId == "" { return nil, fmt.Errorf("id is mandatory for patching rule") @@ -775,15 +907,16 @@ func (m *Manager) PatchRule(ctx context.Context, ruleStr string, ruleId string) return nil, err } - // write updated rule to db - if _, _, err = m.ruleDB.EditRuleTx(ctx, string(patchedRuleBytes), ruleId); err != nil { - // write failed, rollback task state + now := time.Now() + storedJSON.Data = string(patchedRuleBytes) + storedJSON.UpdatedBy = &claims.Email + storedJSON.UpdatedAt = &now - // restore task state from the stored rule + err = m.ruleDB.EditRule(ctx, storedJSON, func(ctx context.Context) error { return nil }) + if err != nil { if err := m.syncRuleStateWithTask(taskName, &storedRule); err != nil { zap.L().Error("failed to restore rule after patch failure", zap.String("taskName", taskName), zap.Error(err)) } - return nil, err } @@ -822,7 +955,8 @@ func (m *Manager) TestNotification(ctx context.Context, ruleStr string) (int, *m Cache: m.cache, FF: m.featureFlags, ManagerOpts: m.opts, - NotifyFunc: m.prepareNotifyFunc(), + NotifyFunc: m.prepareTestNotifyFunc(), + SQLStore: m.sqlstore, UseLogsNewSchema: m.opts.UseLogsNewSchema, UseTraceNewSchema: m.opts.UseTraceNewSchema, }) diff --git a/pkg/query-service/rules/test_notification.go b/pkg/query-service/rules/test_notification.go index e30b7db94f..cda893e3f1 100644 --- a/pkg/query-service/rules/test_notification.go +++ b/pkg/query-service/rules/test_notification.go @@ -52,6 +52,7 @@ func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError) opts.UseTraceNewSchema, WithSendAlways(), WithSendUnmatched(), + WithSQLStore(opts.SQLStore), ) if err != nil { @@ -70,6 +71,7 @@ func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError) opts.ManagerOpts.PqlEngine, WithSendAlways(), WithSendUnmatched(), + WithSQLStore(opts.SQLStore), ) if err != nil { diff --git a/pkg/signoz/config.go b/pkg/signoz/config.go index b8d188ceee..0a51ab5e87 100644 --- a/pkg/signoz/config.go +++ b/pkg/signoz/config.go @@ -3,10 +3,12 @@ package signoz import ( "context" "fmt" + "net/url" "os" "reflect" "time" + "go.signoz.io/signoz/pkg/alertmanager" "go.signoz.io/signoz/pkg/apiserver" "go.signoz.io/signoz/pkg/cache" "go.signoz.io/signoz/pkg/config" @@ -44,6 +46,9 @@ type Config struct { // TelemetryStore config TelemetryStore telemetrystore.Config `mapstructure:"telemetrystore"` + + // Alertmanager config + Alertmanager alertmanager.Config `mapstructure:"alertmanager" yaml:"alertmanager"` } // DeprecatedFlags are the flags that are deprecated and scheduled for removal. @@ -63,6 +68,7 @@ func NewConfig(ctx context.Context, resolverConfig config.ResolverConfig, deprec sqlmigrator.NewConfigFactory(), apiserver.NewConfigFactory(), telemetrystore.NewConfigFactory(), + alertmanager.NewConfigFactory(), } conf, err := config.New(ctx, resolverConfig, configFactories) @@ -71,7 +77,7 @@ func NewConfig(ctx context.Context, resolverConfig config.ResolverConfig, deprec } var config Config - if err := conf.Unmarshal("", &config); err != nil { + if err := conf.Unmarshal("", &config, "yaml"); err != nil { return Config{}, err } @@ -138,17 +144,31 @@ func mergeAndEnsureBackwardCompatibility(config *Config, deprecatedFlags Depreca } if deprecatedFlags.MaxIdleConns != 50 { - fmt.Println("[Deprecated] flag --max-idle-conns is deprecated and scheduled for removal. Please use SIGNOZ_TELEMETRYSTORE_MAX__IDLE__CONNS env variable instead.") + fmt.Println("[Deprecated] flag --max-idle-conns is deprecated and scheduled for removal. Please use SIGNOZ_TELEMETRYSTORE_MAX__IDLE__CONNS instead.") config.TelemetryStore.Connection.MaxIdleConns = deprecatedFlags.MaxIdleConns } if deprecatedFlags.MaxOpenConns != 100 { - fmt.Println("[Deprecated] flag --max-open-conns is deprecated and scheduled for removal. Please use SIGNOZ_TELEMETRYSTORE_MAX__OPEN__CONNS env variable instead.") + fmt.Println("[Deprecated] flag --max-open-conns is deprecated and scheduled for removal. Please use SIGNOZ_TELEMETRYSTORE_MAX__OPEN__CONNS instead.") config.TelemetryStore.Connection.MaxOpenConns = deprecatedFlags.MaxOpenConns } if deprecatedFlags.DialTimeout != 5*time.Second { - fmt.Println("[Deprecated] flag --dial-timeout is deprecated and scheduled for removal. Please use SIGNOZ_TELEMETRYSTORE_DIAL__TIMEOUT environment variable instead.") + fmt.Println("[Deprecated] flag --dial-timeout is deprecated and scheduled for removal. Please use SIGNOZ_TELEMETRYSTORE_DIAL__TIMEOUT instead.") config.TelemetryStore.Connection.DialTimeout = deprecatedFlags.DialTimeout } + + if os.Getenv("ALERTMANAGER_API_PREFIX") != "" { + fmt.Println("[Deprecated] env ALERTMANAGER_API_PREFIX is deprecated and scheduled for removal. Please use SIGNOZ_ALERTMANAGER_LEGACY_API__URL instead.") + u, err := url.Parse(os.Getenv("ALERTMANAGER_API_PREFIX")) + if err != nil { + fmt.Println("Error parsing ALERTMANAGER_API_PREFIX, using default value") + } else { + config.Alertmanager.Legacy.ApiURL = u + } + } + + if os.Getenv("ALERTMANAGER_API_CHANNEL_PATH") != "" { + fmt.Println("[Deprecated] env ALERTMANAGER_API_CHANNEL_PATH is deprecated and scheduled for complete removal.") + } } diff --git a/pkg/signoz/provider.go b/pkg/signoz/provider.go index ed6c9b66a3..1568b31204 100644 --- a/pkg/signoz/provider.go +++ b/pkg/signoz/provider.go @@ -1,6 +1,9 @@ package signoz import ( + "go.signoz.io/signoz/pkg/alertmanager" + "go.signoz.io/signoz/pkg/alertmanager/legacyalertmanager" + "go.signoz.io/signoz/pkg/alertmanager/signozalertmanager" "go.signoz.io/signoz/pkg/cache" "go.signoz.io/signoz/pkg/cache/memorycache" "go.signoz.io/signoz/pkg/cache/rediscache" @@ -18,53 +21,56 @@ import ( "go.signoz.io/signoz/pkg/web/routerweb" ) -type ProviderConfig struct { - // Map of all cache provider factories - CacheProviderFactories factory.NamedMap[factory.ProviderFactory[cache.Cache, cache.Config]] - - // Map of all web provider factories - WebProviderFactories factory.NamedMap[factory.ProviderFactory[web.Web, web.Config]] - - // Map of all sqlstore provider factories - SQLStoreProviderFactories factory.NamedMap[factory.ProviderFactory[sqlstore.SQLStore, sqlstore.Config]] - - // Map of all sql migration provider factories - SQLMigrationProviderFactories factory.NamedMap[factory.ProviderFactory[sqlmigration.SQLMigration, sqlmigration.Config]] - - // Map of all telemetrystore provider factories - TelemetryStoreProviderFactories factory.NamedMap[factory.ProviderFactory[telemetrystore.TelemetryStore, telemetrystore.Config]] +func NewCacheProviderFactories() factory.NamedMap[factory.ProviderFactory[cache.Cache, cache.Config]] { + return factory.MustNewNamedMap( + memorycache.NewFactory(), + rediscache.NewFactory(), + ) } -func NewProviderConfig() ProviderConfig { - return ProviderConfig{ - CacheProviderFactories: factory.MustNewNamedMap( - memorycache.NewFactory(), - rediscache.NewFactory(), - ), - WebProviderFactories: factory.MustNewNamedMap( - routerweb.NewFactory(), - noopweb.NewFactory(), - ), - SQLStoreProviderFactories: factory.MustNewNamedMap( - sqlitesqlstore.NewFactory(sqlstorehook.NewLoggingFactory()), - postgressqlstore.NewFactory(sqlstorehook.NewLoggingFactory()), - ), - SQLMigrationProviderFactories: factory.MustNewNamedMap( - sqlmigration.NewAddDataMigrationsFactory(), - sqlmigration.NewAddOrganizationFactory(), - sqlmigration.NewAddPreferencesFactory(), - sqlmigration.NewAddDashboardsFactory(), - sqlmigration.NewAddSavedViewsFactory(), - sqlmigration.NewAddAgentsFactory(), - sqlmigration.NewAddPipelinesFactory(), - sqlmigration.NewAddIntegrationsFactory(), - sqlmigration.NewAddLicensesFactory(), - sqlmigration.NewAddPatsFactory(), - sqlmigration.NewModifyDatetimeFactory(), - sqlmigration.NewModifyOrgDomainFactory(), - ), - TelemetryStoreProviderFactories: factory.MustNewNamedMap( - clickhousetelemetrystore.NewFactory(telemetrystorehook.NewFactory()), - ), - } +func NewWebProviderFactories() factory.NamedMap[factory.ProviderFactory[web.Web, web.Config]] { + return factory.MustNewNamedMap( + routerweb.NewFactory(), + noopweb.NewFactory(), + ) +} + +func NewSQLStoreProviderFactories() factory.NamedMap[factory.ProviderFactory[sqlstore.SQLStore, sqlstore.Config]] { + hook := sqlstorehook.NewLoggingFactory() + return factory.MustNewNamedMap( + sqlitesqlstore.NewFactory(hook), + postgressqlstore.NewFactory(hook), + ) +} + +func NewSQLMigrationProviderFactories(sqlstore sqlstore.SQLStore) factory.NamedMap[factory.ProviderFactory[sqlmigration.SQLMigration, sqlmigration.Config]] { + return factory.MustNewNamedMap( + sqlmigration.NewAddDataMigrationsFactory(), + sqlmigration.NewAddOrganizationFactory(), + sqlmigration.NewAddPreferencesFactory(), + sqlmigration.NewAddDashboardsFactory(), + sqlmigration.NewAddSavedViewsFactory(), + sqlmigration.NewAddAgentsFactory(), + sqlmigration.NewAddPipelinesFactory(), + sqlmigration.NewAddIntegrationsFactory(), + sqlmigration.NewAddLicensesFactory(), + sqlmigration.NewAddPatsFactory(), + sqlmigration.NewModifyDatetimeFactory(), + sqlmigration.NewModifyOrgDomainFactory(), + sqlmigration.NewUpdateOrganizationFactory(sqlstore), + sqlmigration.NewAddAlertmanagerFactory(), + ) +} + +func NewTelemetryStoreProviderFactories() factory.NamedMap[factory.ProviderFactory[telemetrystore.TelemetryStore, telemetrystore.Config]] { + return factory.MustNewNamedMap( + clickhousetelemetrystore.NewFactory(telemetrystorehook.NewFactory()), + ) +} + +func NewAlertmanagerProviderFactories(sqlstore sqlstore.SQLStore) factory.NamedMap[factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config]] { + return factory.MustNewNamedMap( + legacyalertmanager.NewFactory(sqlstore), + signozalertmanager.NewFactory(sqlstore), + ) } diff --git a/pkg/signoz/provider_test.go b/pkg/signoz/provider_test.go index 82ee0d6559..867a7ef13e 100644 --- a/pkg/signoz/provider_test.go +++ b/pkg/signoz/provider_test.go @@ -3,14 +3,37 @@ package signoz import ( "testing" + "github.com/DATA-DOG/go-sqlmock" "github.com/stretchr/testify/assert" + "go.signoz.io/signoz/pkg/sqlstore" + "go.signoz.io/signoz/pkg/sqlstore/sqlstoretest" ) -func TestNewProviderConfig(t *testing.T) { - // This is a test to ensure that provider factories can be created without panicking since - // we are using the factory.MustNewNamedMap function to initialize the provider factories. - // It also helps us catch these errors during testing instead of runtime. +// This is a test to ensure that provider factories can be created without panicking since +// we are using the factory.MustNewNamedMap function to initialize the provider factories. +// It also helps us catch these errors during testing instead of runtime. +func TestNewProviderFactories(t *testing.T) { assert.NotPanics(t, func() { - NewProviderConfig() + NewCacheProviderFactories() + }) + + assert.NotPanics(t, func() { + NewWebProviderFactories() + }) + + assert.NotPanics(t, func() { + NewSQLStoreProviderFactories() + }) + + assert.NotPanics(t, func() { + NewTelemetryStoreProviderFactories() + }) + + assert.NotPanics(t, func() { + NewSQLMigrationProviderFactories(sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual)) + }) + + assert.NotPanics(t, func() { + NewAlertmanagerProviderFactories(sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual)) }) } diff --git a/pkg/signoz/signoz.go b/pkg/signoz/signoz.go index 8dee40a4e5..54c76671aa 100644 --- a/pkg/signoz/signoz.go +++ b/pkg/signoz/signoz.go @@ -3,6 +3,7 @@ package signoz import ( "context" + "go.signoz.io/signoz/pkg/alertmanager" "go.signoz.io/signoz/pkg/cache" "go.signoz.io/signoz/pkg/factory" "go.signoz.io/signoz/pkg/instrumentation" @@ -16,16 +17,21 @@ import ( ) type SigNoz struct { + *factory.Registry Cache cache.Cache Web web.Web SQLStore sqlstore.SQLStore TelemetryStore telemetrystore.TelemetryStore + Alertmanager alertmanager.Alertmanager } func New( ctx context.Context, config Config, - providerConfig ProviderConfig, + cacheProviderFactories factory.NamedMap[factory.ProviderFactory[cache.Cache, cache.Config]], + webProviderFactories factory.NamedMap[factory.ProviderFactory[web.Web, web.Config]], + sqlstoreProviderFactories factory.NamedMap[factory.ProviderFactory[sqlstore.SQLStore, sqlstore.Config]], + telemetrystoreProviderFactories factory.NamedMap[factory.ProviderFactory[telemetrystore.TelemetryStore, telemetrystore.Config]], ) (*SigNoz, error) { // Initialize instrumentation instrumentation, err := instrumentation.New(ctx, version.Build{}, config.Instrumentation) @@ -33,7 +39,7 @@ func New( return nil, err } - instrumentation.Logger().InfoContext(ctx, "starting signoz", "config", config) + instrumentation.Logger().DebugContext(ctx, "starting signoz", "config", config) // Get the provider settings from instrumentation providerSettings := instrumentation.ToProviderSettings() @@ -43,7 +49,7 @@ func New( ctx, providerSettings, config.Cache, - providerConfig.CacheProviderFactories, + cacheProviderFactories, config.Cache.Provider, ) if err != nil { @@ -55,7 +61,7 @@ func New( ctx, providerSettings, config.Web, - providerConfig.WebProviderFactories, + webProviderFactories, config.Web.Provider(), ) if err != nil { @@ -67,22 +73,19 @@ func New( ctx, providerSettings, config.SQLStore, - providerConfig.SQLStoreProviderFactories, + sqlstoreProviderFactories, config.SQLStore.Provider, ) if err != nil { return nil, err } - // add the org migration here since we need to pass the sqlstore - providerConfig.SQLMigrationProviderFactories.Add(sqlmigration.NewUpdateOrganizationFactory(sqlstore)) - // Initialize telemetrystore from the available telemetrystore provider factories telemetrystore, err := factory.NewProviderFromNamedMap( ctx, providerSettings, config.TelemetryStore, - providerConfig.TelemetryStoreProviderFactories, + telemetrystoreProviderFactories, config.TelemetryStore.Provider, ) if err != nil { @@ -94,7 +97,7 @@ func New( ctx, providerSettings, config.SQLMigration, - providerConfig.SQLMigrationProviderFactories, + NewSQLMigrationProviderFactories(sqlstore), ) if err != nil { return nil, err @@ -105,10 +108,32 @@ func New( return nil, err } + alertmanager, err := factory.NewProviderFromNamedMap( + ctx, + providerSettings, + config.Alertmanager, + NewAlertmanagerProviderFactories(sqlstore), + config.Alertmanager.Provider, + ) + if err != nil { + return nil, err + } + + registry, err := factory.NewRegistry( + instrumentation.Logger(), + factory.NewNamedService(factory.MustNewName("instrumentation"), instrumentation), + factory.NewNamedService(factory.MustNewName("alertmanager"), alertmanager), + ) + if err != nil { + return nil, err + } + return &SigNoz{ + Registry: registry, Cache: cache, Web: web, SQLStore: sqlstore, TelemetryStore: telemetrystore, + Alertmanager: alertmanager, }, nil } diff --git a/pkg/sqlmigration/001_add_organization.go b/pkg/sqlmigration/001_add_organization.go index 6f87c18334..37d1447c1c 100644 --- a/pkg/sqlmigration/001_add_organization.go +++ b/pkg/sqlmigration/001_add_organization.go @@ -36,8 +36,8 @@ func (migration *addOrganization) Up(ctx context.Context, db *bun.DB) error { ID string `bun:"id,pk,type:text"` Name string `bun:"name,type:text,notnull"` CreatedAt int `bun:"created_at,notnull"` - IsAnonymous int `bun:"is_anonymous,notnull,default:0,CHECK(is_anonymous IN (0,1))"` - HasOptedUpdates int `bun:"has_opted_updates,notnull,default:1,CHECK(has_opted_updates IN (0,1))"` + IsAnonymous int `bun:"is_anonymous,notnull,default:0"` + HasOptedUpdates int `bun:"has_opted_updates,notnull,default:1"` }{}). IfNotExists(). Exec(ctx); err != nil { diff --git a/pkg/sqlmigration/014_add_alertmanager.go b/pkg/sqlmigration/014_add_alertmanager.go new file mode 100644 index 0000000000..d06e63a973 --- /dev/null +++ b/pkg/sqlmigration/014_add_alertmanager.go @@ -0,0 +1,280 @@ +package sqlmigration + +import ( + "context" + "database/sql" + "encoding/json" + "strconv" + "strings" + "time" + + "github.com/prometheus/alertmanager/config" + "github.com/tidwall/gjson" + "github.com/uptrace/bun" + "github.com/uptrace/bun/migrate" + "go.signoz.io/signoz/pkg/alertmanager/alertmanagerserver" + "go.signoz.io/signoz/pkg/factory" + "go.signoz.io/signoz/pkg/types/alertmanagertypes" +) + +type addAlertmanager struct{} + +func NewAddAlertmanagerFactory() factory.ProviderFactory[SQLMigration, Config] { + return factory.NewProviderFactory(factory.MustNewName("add_alertmanager"), newAddAlertmanager) +} + +func newAddAlertmanager(_ context.Context, _ factory.ProviderSettings, _ Config) (SQLMigration, error) { + return &addAlertmanager{}, nil +} + +func (migration *addAlertmanager) Register(migrations *migrate.Migrations) error { + if err := migrations.Register(migration.Up, migration.Down); err != nil { + return err + } + + return nil +} + +func (migration *addAlertmanager) Up(ctx context.Context, db *bun.DB) error { + tx, err := db.BeginTx(ctx, nil) + if err != nil { + return err + } + + defer tx.Rollback() //nolint:errcheck + + if _, err := tx. + NewDropColumn(). + Table("notification_channels"). + ColumnExpr("deleted"). + Exec(ctx); err != nil { + if !strings.Contains(err.Error(), "no such column") { + return err + } + } + + if _, err := tx. + NewAddColumn(). + Table("notification_channels"). + ColumnExpr("org_id"). + Apply(WrapIfNotExists(ctx, db, "notification_channels", "org_id")). + Exec(ctx); err != nil && err != ErrNoExecute { + return err + } + + if _, err := tx. + NewCreateTable(). + Model(&struct { + bun.BaseModel `bun:"table:alertmanager_config"` + ID uint64 `bun:"id,pk,autoincrement"` + Config string `bun:"config,notnull,type:text"` + Hash string `bun:"hash,notnull,type:text"` + CreatedAt time.Time `bun:"created_at,notnull"` + UpdatedAt time.Time `bun:"updated_at,notnull"` + OrgID string `bun:"org_id,notnull,unique"` + }{}). + ForeignKey(`("org_id") REFERENCES "organizations" ("id")`). + IfNotExists(). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateTable(). + Model(&struct { + bun.BaseModel `bun:"table:alertmanager_state"` + ID uint64 `bun:"id,pk,autoincrement"` + Silences string `bun:"silences,nullzero,type:text"` + NFLog string `bun:"nflog,nullzero,type:text"` + CreatedAt time.Time `bun:"created_at,notnull"` + UpdatedAt time.Time `bun:"updated_at,notnull"` + OrgID string `bun:"org_id,notnull,unique"` + }{}). + ForeignKey(`("org_id") REFERENCES "organizations" ("id")`). + IfNotExists(). + Exec(ctx); err != nil { + return err + } + + var orgID string + err = tx. + NewSelect(). + ColumnExpr("id"). + Table("organizations"). + Limit(1). + Scan(ctx, &orgID) + if err != nil { + if err != sql.ErrNoRows { + return err + } + } + + if err == nil { + if err := migration.populateOrgIDInChannels(ctx, tx, orgID); err != nil { + return err + } + + if err := migration.populateAlertmanagerConfig(ctx, tx, orgID); err != nil { + return err + } + } + + if err := tx.Commit(); err != nil { + return err + } + + return nil +} + +func (migration *addAlertmanager) populateOrgIDInChannels(ctx context.Context, tx bun.Tx, orgID string) error { + if _, err := tx. + NewUpdate(). + Table("notification_channels"). + Set("org_id = ?", orgID). + Where("org_id IS NULL"). + Exec(ctx); err != nil { + return err + } + + return nil +} + +func (migration *addAlertmanager) populateAlertmanagerConfig(ctx context.Context, tx bun.Tx, orgID string) error { + var channels []*alertmanagertypes.Channel + + err := tx. + NewSelect(). + Model(&channels). + Where("org_id = ?", orgID). + Scan(ctx) + if err != nil { + return err + } + + var receiversFromChannels []string + for _, channel := range channels { + receiversFromChannels = append(receiversFromChannels, channel.Name) + } + + type matcher struct { + bun.BaseModel `bun:"table:rules"` + ID int `bun:"id,pk"` + Data string `bun:"data"` + } + + matchers := []matcher{} + + err = tx. + NewSelect(). + Column("id", "data"). + Model(&matchers). + Scan(ctx) + if err != nil { + return err + } + + matchersMap := make(map[string][]string) + for _, matcher := range matchers { + receivers := gjson.Get(matcher.Data, "preferredChannels").Array() + for _, receiver := range receivers { + matchersMap[strconv.Itoa(matcher.ID)] = append(matchersMap[strconv.Itoa(matcher.ID)], receiver.String()) + } + + if len(receivers) == 0 { + matchersMap[strconv.Itoa(matcher.ID)] = append(matchersMap[strconv.Itoa(matcher.ID)], receiversFromChannels...) + } + } + + for _, channel := range channels { + if err := migration.msTeamsChannelToMSTeamsV2Channel(channel); err != nil { + return err + } + } + + config, err := alertmanagertypes.NewConfigFromChannels(alertmanagerserver.NewConfig().Global, alertmanagerserver.NewConfig().Route, channels, orgID) + if err != nil { + return err + } + + for ruleID, receivers := range matchersMap { + err = config.CreateRuleIDMatcher(ruleID, receivers) + if err != nil { + return err + } + } + + if _, err := tx. + NewInsert(). + Model(config.StoreableConfig()). + On("CONFLICT (org_id) DO UPDATE"). + Set("config = ?", config.StoreableConfig().Config). + Set("hash = ?", config.StoreableConfig().Hash). + Set("updated_at = ?", config.StoreableConfig().UpdatedAt). + Exec(ctx); err != nil { + return err + } + + for _, channel := range channels { + if channel.Type == "msteamsv2" { + if _, err := tx. + NewUpdate(). + Model(channel). + WherePK(). + Exec(ctx); err != nil { + return err + } + } + } + + return nil +} + +func (migration *addAlertmanager) Down(ctx context.Context, db *bun.DB) error { + return nil +} + +func (migration *addAlertmanager) msTeamsChannelToMSTeamsV2Channel(c *alertmanagertypes.Channel) error { + if c.Type != "msteams" { + return nil + } + + receiver, err := alertmanagertypes.NewReceiver(c.Data) + if err != nil { + return err + } + + receiver = migration.msTeamsReceiverToMSTeamsV2Receiver(receiver) + data, err := json.Marshal(receiver) + if err != nil { + return err + } + + c.Type = "msteamsv2" + c.Data = string(data) + c.UpdatedAt = time.Now() + + return nil +} + +func (migration *addAlertmanager) msTeamsReceiverToMSTeamsV2Receiver(receiver alertmanagertypes.Receiver) alertmanagertypes.Receiver { + if receiver.MSTeamsConfigs == nil { + return receiver + } + + var msTeamsV2Configs []*config.MSTeamsV2Config + for _, cfg := range receiver.MSTeamsConfigs { + msTeamsV2Configs = append(msTeamsV2Configs, &config.MSTeamsV2Config{ + NotifierConfig: cfg.NotifierConfig, + HTTPConfig: cfg.HTTPConfig, + WebhookURL: cfg.WebhookURL, + WebhookURLFile: cfg.WebhookURLFile, + Title: cfg.Title, + Text: cfg.Text, + }) + } + + receiver.MSTeamsConfigs = nil + receiver.MSTeamsV2Configs = msTeamsV2Configs + + return receiver +} diff --git a/pkg/types/alertmanagertypes/channel.go b/pkg/types/alertmanagertypes/channel.go index a6df3614d9..9e7fa52d48 100644 --- a/pkg/types/alertmanagertypes/channel.go +++ b/pkg/types/alertmanagertypes/channel.go @@ -95,16 +95,6 @@ func NewChannelFromReceiver(receiver config.Receiver, orgID string) *Channel { return &channel } -func NewReceiverFromChannel(channel *Channel) (Receiver, error) { - receiver := Receiver{} - err := json.Unmarshal([]byte(channel.Data), &receiver) - if err != nil { - return Receiver{}, err - } - - return receiver, nil -} - func NewConfigFromChannels(globalConfig GlobalConfig, routeConfig RouteConfig, channels Channels, orgID string) (*Config, error) { cfg, err := NewDefaultConfig( globalConfig, @@ -165,3 +155,26 @@ func (c *Channel) Update(receiver Receiver) error { return nil } + +// This is needed by the legacy alertmanager to convert the MSTeamsV2Configs to MSTeamsConfigs +func (c *Channel) MSTeamsV2ToMSTeams() error { + if c.Type != "msteamsv2" { + return nil + } + + receiver, err := NewReceiver(c.Data) + if err != nil { + return err + } + + receiver = MSTeamsV2ReceiverToMSTeamsReceiver(receiver) + data, err := json.Marshal(receiver) + if err != nil { + return err + } + + c.Type = "msteams" + c.Data = string(data) + + return nil +} diff --git a/pkg/types/alertmanagertypes/channel_test.go b/pkg/types/alertmanagertypes/channel_test.go index c7623c819e..0ca59c4b14 100644 --- a/pkg/types/alertmanagertypes/channel_test.go +++ b/pkg/types/alertmanagertypes/channel_test.go @@ -26,8 +26,23 @@ func TestNewConfigFromChannels(t *testing.T) { Data: `{"name":"email-receiver","email_configs":[{"to":"test@example.com"}]}`, }, }, - expectedRoutes: []map[string]any{{"receiver": "email-receiver", "continue": true}}, - expectedReceivers: []map[string]any{{"name": "default-receiver"}, {"name": "email-receiver", "email_configs": []any{map[string]any{"send_resolved": false, "to": "test@example.com", "from": "alerts@example.com", "hello": "localhost", "smarthost": "smtp.example.com:587", "require_tls": true, "tls_config": map[string]any{"insecure_skip_verify": false}}}}}, + expectedRoutes: []map[string]any{{"receiver": "email-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1\""}}}, + expectedReceivers: []map[string]any{ + {"name": "default-receiver"}, + { + "name": "email-receiver", + "email_configs": []any{map[string]any{ + "send_resolved": false, + "to": "test@example.com", + "from": "alerts@example.com", + "hello": "localhost", + "smarthost": "smtp.example.com:587", + "require_tls": true, + "html": "{{ template \"email.default.html\" . }}", + "tls_config": map[string]any{"insecure_skip_verify": false}, + }}, + }, + }, }, { name: "OneSlackChannel", @@ -38,8 +53,35 @@ func TestNewConfigFromChannels(t *testing.T) { Data: `{"name":"slack-receiver","slack_configs":[{"channel":"#alerts","api_url":"https://slack.com/api/test","send_resolved":true}]}`, }, }, - expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true}}, - expectedReceivers: []map[string]any{{"name": "default-receiver"}, {"name": "slack-receiver", "slack_configs": []any{map[string]any{"send_resolved": true, "http_config": map[string]any{"tls_config": map[string]any{"insecure_skip_verify": false}, "follow_redirects": true, "enable_http2": true, "proxy_url": nil}, "api_url": "https://slack.com/api/test", "channel": "#alerts"}}}}, + expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1\""}}}, + expectedReceivers: []map[string]any{ + {"name": "default-receiver"}, + { + "name": "slack-receiver", + "slack_configs": []any{map[string]any{ + "send_resolved": true, + "api_url": "https://slack.com/api/test", + "channel": "#alerts", + "callback_id": "{{ template \"slack.default.callbackid\" . }}", + "color": "{{ if eq .Status \"firing\" }}danger{{ else }}good{{ end }}", + "fallback": "{{ template \"slack.default.fallback\" . }}", + "footer": "{{ template \"slack.default.footer\" . }}", + "icon_emoji": "{{ template \"slack.default.iconemoji\" . }}", + "icon_url": "{{ template \"slack.default.iconurl\" . }}", + "pretext": "{{ template \"slack.default.pretext\" . }}", + "text": "{{ template \"slack.default.text\" . }}", + "title": "{{ template \"slack.default.title\" . }}", + "title_link": "{{ template \"slack.default.titlelink\" . }}", + "username": "{{ template \"slack.default.username\" . }}", + "http_config": map[string]any{ + "tls_config": map[string]any{"insecure_skip_verify": false}, + "follow_redirects": true, + "enable_http2": true, + "proxy_url": nil, + }, + }}, + }, + }, }, { name: "OnePagerdutyChannel", @@ -50,8 +92,34 @@ func TestNewConfigFromChannels(t *testing.T) { Data: `{"name":"pagerduty-receiver","pagerduty_configs":[{"service_key":"test"}]}`, }, }, - expectedRoutes: []map[string]any{{"receiver": "pagerduty-receiver", "continue": true}}, - expectedReceivers: []map[string]any{{"name": "default-receiver"}, {"name": "pagerduty-receiver", "pagerduty_configs": []any{map[string]any{"send_resolved": false, "http_config": map[string]any{"tls_config": map[string]any{"insecure_skip_verify": false}, "follow_redirects": true, "enable_http2": true, "proxy_url": nil}, "service_key": "test", "url": "https://events.pagerduty.com/v2/enqueue"}}}}, + expectedRoutes: []map[string]any{{"receiver": "pagerduty-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1\""}}}, + expectedReceivers: []map[string]any{ + {"name": "default-receiver"}, + { + "name": "pagerduty-receiver", + "pagerduty_configs": []any{map[string]any{ + "send_resolved": false, + "service_key": "test", + "url": "https://events.pagerduty.com/v2/enqueue", + "client": "{{ template \"pagerduty.default.client\" . }}", + "client_url": "{{ template \"pagerduty.default.clientURL\" . }}", + "description": "{{ template \"pagerduty.default.description\" .}}", + "source": "{{ template \"pagerduty.default.client\" . }}", + "details": map[string]any{ + "firing": "{{ template \"pagerduty.default.instances\" .Alerts.Firing }}", + "num_firing": "{{ .Alerts.Firing | len }}", + "num_resolved": "{{ .Alerts.Resolved | len }}", + "resolved": "{{ template \"pagerduty.default.instances\" .Alerts.Resolved }}", + }, + "http_config": map[string]any{ + "tls_config": map[string]any{"insecure_skip_verify": false}, + "follow_redirects": true, + "enable_http2": true, + "proxy_url": nil, + }, + }}, + }, + }, }, { name: "OnePagerdutyAndOneSlackChannel", @@ -67,8 +135,59 @@ func TestNewConfigFromChannels(t *testing.T) { Data: `{"name":"slack-receiver","slack_configs":[{"channel":"#alerts","api_url":"https://slack.com/api/test","send_resolved":true}]}`, }, }, - expectedRoutes: []map[string]any{{"receiver": "pagerduty-receiver", "continue": true}, {"receiver": "slack-receiver", "continue": true}}, - expectedReceivers: []map[string]any{{"name": "default-receiver"}, {"name": "pagerduty-receiver", "pagerduty_configs": []any{map[string]any{"send_resolved": false, "http_config": map[string]any{"tls_config": map[string]any{"insecure_skip_verify": false}, "follow_redirects": true, "enable_http2": true, "proxy_url": nil}, "service_key": "test", "url": "https://events.pagerduty.com/v2/enqueue"}}}, {"name": "slack-receiver", "slack_configs": []any{map[string]any{"send_resolved": true, "http_config": map[string]any{"tls_config": map[string]any{"insecure_skip_verify": false}, "follow_redirects": true, "enable_http2": true, "proxy_url": nil}, "api_url": "https://slack.com/api/test", "channel": "#alerts"}}}}, + expectedRoutes: []map[string]any{{"receiver": "pagerduty-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1\""}}, {"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1\""}}}, + expectedReceivers: []map[string]any{ + {"name": "default-receiver"}, + { + "name": "pagerduty-receiver", + "pagerduty_configs": []any{map[string]any{ + "send_resolved": false, + "service_key": "test", + "url": "https://events.pagerduty.com/v2/enqueue", + "client": "{{ template \"pagerduty.default.client\" . }}", + "client_url": "{{ template \"pagerduty.default.clientURL\" . }}", + "description": "{{ template \"pagerduty.default.description\" .}}", + "source": "{{ template \"pagerduty.default.client\" . }}", + "details": map[string]any{ + "firing": "{{ template \"pagerduty.default.instances\" .Alerts.Firing }}", + "num_firing": "{{ .Alerts.Firing | len }}", + "num_resolved": "{{ .Alerts.Resolved | len }}", + "resolved": "{{ template \"pagerduty.default.instances\" .Alerts.Resolved }}", + }, + "http_config": map[string]any{ + "tls_config": map[string]any{"insecure_skip_verify": false}, + "follow_redirects": true, + "enable_http2": true, + "proxy_url": nil, + }, + }}, + }, + { + "name": "slack-receiver", + "slack_configs": []any{map[string]any{ + "send_resolved": true, + "api_url": "https://slack.com/api/test", + "channel": "#alerts", + "callback_id": "{{ template \"slack.default.callbackid\" . }}", + "color": "{{ if eq .Status \"firing\" }}danger{{ else }}good{{ end }}", + "fallback": "{{ template \"slack.default.fallback\" . }}", + "footer": "{{ template \"slack.default.footer\" . }}", + "icon_emoji": "{{ template \"slack.default.iconemoji\" . }}", + "icon_url": "{{ template \"slack.default.iconurl\" . }}", + "pretext": "{{ template \"slack.default.pretext\" . }}", + "text": "{{ template \"slack.default.text\" . }}", + "title": "{{ template \"slack.default.title\" . }}", + "title_link": "{{ template \"slack.default.titlelink\" . }}", + "username": "{{ template \"slack.default.username\" . }}", + "http_config": map[string]any{ + "tls_config": map[string]any{"insecure_skip_verify": false}, + "follow_redirects": true, + "enable_http2": true, + "proxy_url": nil, + }, + }}, + }, + }, }, } diff --git a/pkg/types/alertmanagertypes/config.go b/pkg/types/alertmanagertypes/config.go index ae6a93aee3..167b0520ab 100644 --- a/pkg/types/alertmanagertypes/config.go +++ b/pkg/types/alertmanagertypes/config.go @@ -10,7 +10,6 @@ import ( "dario.cat/mergo" "github.com/prometheus/alertmanager/config" - "github.com/prometheus/alertmanager/pkg/labels" commoncfg "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/uptrace/bun" @@ -33,10 +32,10 @@ type ( ) type RouteConfig struct { - GroupByStr []string - GroupInterval time.Duration - GroupWait time.Duration - RepeatInterval time.Duration + GroupByStr []string `mapstructure:"group_by"` + GroupInterval time.Duration `mapstructure:"group_interval"` + GroupWait time.Duration `mapstructure:"group_wait"` + RepeatInterval time.Duration `mapstructure:"repeat_interval"` } type StoreableConfig struct { @@ -91,15 +90,14 @@ func NewDefaultConfig(globalConfig GlobalConfig, routeConfig RouteConfig, orgID return nil, err } + route, err := NewRouteFromRouteConfig(nil, routeConfig) + if err != nil { + return nil, err + } + return NewConfig(&config.Config{ - Global: &globalConfig, - Route: &config.Route{ - Receiver: DefaultReceiverName, - GroupByStr: routeConfig.GroupByStr, - GroupInterval: (*model.Duration)(&routeConfig.GroupInterval), - GroupWait: (*model.Duration)(&routeConfig.GroupWait), - RepeatInterval: (*model.Duration)(&routeConfig.RepeatInterval), - }, + Global: &globalConfig, + Route: route, Receivers: []config.Receiver{{Name: DefaultReceiverName}}, }, orgID), nil } @@ -111,6 +109,20 @@ func newConfigFromString(s string) (*config.Config, error) { return nil, err } + for i, receiver := range config.Receivers { + bytes, err := json.Marshal(receiver) + if err != nil { + return nil, err + } + + receiver, err := NewReceiver(string(bytes)) + if err != nil { + return nil, err + } + + config.Receivers[i] = receiver + } + return config, nil } @@ -146,37 +158,32 @@ func (c *Config) CopyWithReset() (*Config, error) { return newConfig, nil } -func (c *Config) SetGlobalConfig(globalConfig GlobalConfig) { +func (c *Config) SetGlobalConfig(globalConfig GlobalConfig) error { + err := mergo.Merge(&globalConfig, config.DefaultGlobalConfig()) + if err != nil { + return err + } + c.alertmanagerConfig.Global = &globalConfig c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig)) c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config)) c.storeableConfig.UpdatedAt = time.Now() + + return nil } -func (c *Config) SetRouteConfig(routeConfig RouteConfig) { - c.alertmanagerConfig.Route = &config.Route{ - Receiver: DefaultReceiverName, - GroupByStr: routeConfig.GroupByStr, - GroupInterval: (*model.Duration)(&routeConfig.GroupInterval), - GroupWait: (*model.Duration)(&routeConfig.GroupWait), - RepeatInterval: (*model.Duration)(&routeConfig.RepeatInterval), - } - c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig)) - c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config)) - c.storeableConfig.UpdatedAt = time.Now() -} - -func (c *Config) UpdateRouteConfig(routeConfig RouteConfig) { - for _, route := range c.alertmanagerConfig.Route.Routes { - route.GroupByStr = routeConfig.GroupByStr - route.GroupInterval = (*model.Duration)(&routeConfig.GroupInterval) - route.GroupWait = (*model.Duration)(&routeConfig.GroupWait) - route.RepeatInterval = (*model.Duration)(&routeConfig.RepeatInterval) +func (c *Config) SetRouteConfig(routeConfig RouteConfig) error { + route, err := NewRouteFromRouteConfig(c.alertmanagerConfig.Route, routeConfig) + if err != nil { + return err } + c.alertmanagerConfig.Route = route c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig)) c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config)) c.storeableConfig.UpdatedAt = time.Now() + + return nil } func (c *Config) AlertmanagerConfig() *config.Config { @@ -188,10 +195,6 @@ func (c *Config) StoreableConfig() *StoreableConfig { } func (c *Config) CreateReceiver(receiver config.Receiver) error { - if receiver.Name == "" { - return errors.New(errors.TypeInvalidInput, ErrCodeAlertmanagerConfigInvalid, "receiver is mandatory in route and receiver") - } - // check that receiver name is not already used for _, existingReceiver := range c.alertmanagerConfig.Receivers { if existingReceiver.Name == receiver.Name { @@ -199,7 +202,12 @@ func (c *Config) CreateReceiver(receiver config.Receiver) error { } } - c.alertmanagerConfig.Route.Routes = append(c.alertmanagerConfig.Route.Routes, newRouteFromReceiver(receiver)) + route, err := NewRouteFromReceiver(receiver) + if err != nil { + return err + } + + c.alertmanagerConfig.Route.Routes = append(c.alertmanagerConfig.Route.Routes, route) c.alertmanagerConfig.Receivers = append(c.alertmanagerConfig.Receivers, receiver) if err := c.alertmanagerConfig.UnmarshalYAML(func(i interface{}) error { return nil }); err != nil { @@ -223,10 +231,6 @@ func (c *Config) GetReceiver(name string) (Receiver, error) { } func (c *Config) UpdateReceiver(receiver config.Receiver) error { - if receiver.Name == "" { - return errors.New(errors.TypeInvalidInput, ErrCodeAlertmanagerConfigInvalid, "receiver is mandatory in route and receiver") - } - // find and update receiver for i, existingReceiver := range c.alertmanagerConfig.Receivers { if existingReceiver.Name == receiver.Name { @@ -235,6 +239,10 @@ func (c *Config) UpdateReceiver(receiver config.Receiver) error { } } + if err := c.alertmanagerConfig.UnmarshalYAML(func(i interface{}) error { return nil }); err != nil { + return err + } + c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig)) c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config)) c.storeableConfig.UpdatedAt = time.Now() @@ -274,15 +282,11 @@ func (c *Config) CreateRuleIDMatcher(ruleID string, receiverNames []string) erro return errors.New(errors.TypeInvalidInput, ErrCodeAlertmanagerConfigInvalid, "route is nil") } - routes := c.alertmanagerConfig.Route.Routes - for i, route := range routes { + for _, route := range c.alertmanagerConfig.Route.Routes { if slices.Contains(receiverNames, route.Receiver) { - matcher, err := labels.NewMatcher(labels.MatchEqual, "ruleId", ruleID) - if err != nil { + if err := addRuleIDToRoute(route, ruleID); err != nil { return err } - - c.alertmanagerConfig.Route.Routes[i].Matchers = append(c.alertmanagerConfig.Route.Routes[i].Matchers, matcher) } } @@ -303,13 +307,9 @@ func (c *Config) UpdateRuleIDMatcher(ruleID string, receiverNames []string) erro } func (c *Config) DeleteRuleIDMatcher(ruleID string) error { - routes := c.alertmanagerConfig.Route.Routes - for i, r := range routes { - j := slices.IndexFunc(r.Matchers, func(m *labels.Matcher) bool { - return m.Name == "ruleId" && m.Value == ruleID - }) - if j != -1 { - c.alertmanagerConfig.Route.Routes[i].Matchers = slices.Delete(r.Matchers, j, j+1) + for i := range c.alertmanagerConfig.Route.Routes { + if err := removeRuleIDFromRoute(c.alertmanagerConfig.Route.Routes[i], ruleID); err != nil { + return err } } @@ -320,18 +320,16 @@ func (c *Config) DeleteRuleIDMatcher(ruleID string) error { return nil } -func (c *Config) ReceiverNamesFromRuleID(ruleID string) ([]string, error) { +func (c *Config) ReceiverNamesFromRuleID(ruleID string) []string { receiverNames := make([]string, 0) routes := c.alertmanagerConfig.Route.Routes - for _, r := range routes { - for _, m := range r.Matchers { - if m.Name == "ruleId" && m.Value == ruleID { - receiverNames = append(receiverNames, r.Receiver) - } + for _, route := range routes { + if ok := matcherContainsRuleID(route.Matchers, ruleID); ok { + receiverNames = append(receiverNames, route.Receiver) } } - return receiverNames, nil + return receiverNames } type storeOptions struct { @@ -397,4 +395,5 @@ type ConfigStore interface { func init() { commoncfg.MarshalSecretValue = true config.MarshalSecretValue = true + model.NameValidationScheme = model.UTF8Validation } diff --git a/pkg/types/alertmanagertypes/config_test.go b/pkg/types/alertmanagertypes/config_test.go index cfa6dec135..c9c832c8dc 100644 --- a/pkg/types/alertmanagertypes/config_test.go +++ b/pkg/types/alertmanagertypes/config_test.go @@ -2,14 +2,14 @@ package alertmanagertypes import ( "encoding/json" - "fmt" "net/url" "testing" + "time" "github.com/prometheus/alertmanager/config" + "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/tidwall/gjson" ) func TestCreateRuleIDMatcher(t *testing.T) { @@ -35,7 +35,7 @@ func TestCreateRuleIDMatcher(t *testing.T) { }, }, ruleIDToReceivers: map[string][]string{"test-rule": {"slack-receiver"}}, - expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=\"test-rule\""}}}, + expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1|test-rule\""}}}, }, { name: "SlackAndEmailReceivers", @@ -60,7 +60,7 @@ func TestCreateRuleIDMatcher(t *testing.T) { }, }, ruleIDToReceivers: map[string][]string{"test-rule": {"slack-receiver", "email-receiver"}}, - expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=\"test-rule\""}}, {"receiver": "email-receiver", "continue": true, "matchers": []any{"ruleId=\"test-rule\""}}}, + expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1|test-rule\""}}, {"receiver": "email-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1|test-rule\""}}}, }, { name: "ReceiverDoesNotExist", @@ -77,7 +77,7 @@ func TestCreateRuleIDMatcher(t *testing.T) { }, }, ruleIDToReceivers: map[string][]string{"test-rule": {"does-not-exist"}}, - expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true}}, + expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1\""}}}, }, { name: "MultipleAlertsOnOneSlackReceiver", @@ -94,13 +94,17 @@ func TestCreateRuleIDMatcher(t *testing.T) { }, }, ruleIDToReceivers: map[string][]string{"test-rule-1": {"slack-receiver", "does-not-exist"}, "test-rule-2": {"slack-receiver"}}, - expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=\"test-rule-1\"", "ruleId=\"test-rule-2\""}}}, + expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1|test-rule-1|test-rule-2\""}}}, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - cfg, err := NewDefaultConfig(GlobalConfig{SMTPSmarthost: config.HostPort{Host: "localhost", Port: "25"}, SMTPFrom: "test@example.com"}, RouteConfig{}, tc.orgID) + cfg, err := NewDefaultConfig( + GlobalConfig{SMTPSmarthost: config.HostPort{Host: "localhost", Port: "25"}, SMTPFrom: "test@example.com"}, + RouteConfig{GroupInterval: 1 * time.Minute, GroupWait: 1 * time.Minute, RepeatInterval: 1 * time.Minute}, + tc.orgID, + ) require.NoError(t, err) for _, receiver := range tc.receivers { @@ -113,15 +117,12 @@ func TestCreateRuleIDMatcher(t *testing.T) { assert.NoError(t, err) } - actualRoutes, err := json.Marshal(cfg.alertmanagerConfig.Route.Routes) + routes, err := json.Marshal(cfg.alertmanagerConfig.Route.Routes) require.NoError(t, err) - expectedRoutes, err := json.Marshal(tc.expectedRoutes) + var actualRoutes []map[string]any + err = json.Unmarshal(routes, &actualRoutes) require.NoError(t, err) - - for i := range len(tc.expectedRoutes) { - assert.Equal(t, gjson.GetBytes(expectedRoutes, fmt.Sprintf("$[%d].receiver", i)).String(), gjson.GetBytes(actualRoutes, fmt.Sprintf("$[%d].receiver", i)).String()) - assert.ElementsMatch(t, gjson.GetBytes(expectedRoutes, fmt.Sprintf("$[%d].matchers", i)).Array(), gjson.GetBytes(actualRoutes, fmt.Sprintf("$[%d].matchers", i)).Array()) - } + assert.ElementsMatch(t, tc.expectedRoutes, actualRoutes) }) } } @@ -159,10 +160,10 @@ func TestDeleteRuleIDMatcher(t *testing.T) { }, ruleIDToReceivers: map[string][]string{"test-rule": {"email-receiver", "slack-receiver"}}, ruleIDsToDelete: []string{"test-rule"}, - expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true}, {"receiver": "email-receiver", "continue": true}}, + expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1\""}}, {"receiver": "email-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1\""}}}, }, { - name: "AlertNameDoesNotExist", + name: "RuleIDDoesNotExist", orgID: "1", receivers: []config.Receiver{ { @@ -185,13 +186,17 @@ func TestDeleteRuleIDMatcher(t *testing.T) { }, ruleIDToReceivers: map[string][]string{"test-rule": {"email-receiver", "slack-receiver"}}, ruleIDsToDelete: []string{"does-not-exist"}, - expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=\"test-rule\""}}, {"receiver": "email-receiver", "continue": true, "matchers": []any{"ruleId=\"test-rule\""}}}, + expectedRoutes: []map[string]any{{"receiver": "slack-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1|test-rule\""}}, {"receiver": "email-receiver", "continue": true, "matchers": []any{"ruleId=~\"-1|test-rule\""}}}, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - cfg, err := NewDefaultConfig(GlobalConfig{SMTPSmarthost: config.HostPort{Host: "localhost", Port: "25"}, SMTPFrom: "test@example.com"}, RouteConfig{}, tc.orgID) + cfg, err := NewDefaultConfig( + GlobalConfig{SMTPSmarthost: config.HostPort{Host: "localhost", Port: "25"}, SMTPFrom: "test@example.com"}, + RouteConfig{GroupInterval: 1 * time.Minute, GroupWait: 1 * time.Minute, RepeatInterval: 1 * time.Minute}, + tc.orgID, + ) require.NoError(t, err) for _, receiver := range tc.receivers { @@ -218,3 +223,60 @@ func TestDeleteRuleIDMatcher(t *testing.T) { }) } } + +func TestSetRouteConfigWithNilRoute(t *testing.T) { + cfg := NewConfig(&config.Config{}, "1") + err := cfg.SetRouteConfig(RouteConfig{GroupByStr: []string{"alertname"}, GroupInterval: 1 * time.Minute, GroupWait: 1 * time.Minute, RepeatInterval: 1 * time.Minute}) + require.NoError(t, err) + + assert.NotNil(t, cfg.alertmanagerConfig.Route) + assert.Equal(t, DefaultReceiverName, cfg.alertmanagerConfig.Route.Receiver) + assert.Equal(t, []string{"alertname"}, cfg.alertmanagerConfig.Route.GroupByStr) + assert.Equal(t, model.Duration(1*time.Minute), *cfg.alertmanagerConfig.Route.GroupInterval) + assert.Equal(t, model.Duration(1*time.Minute), *cfg.alertmanagerConfig.Route.GroupWait) + assert.Equal(t, model.Duration(1*time.Minute), *cfg.alertmanagerConfig.Route.RepeatInterval) +} + +func TestSetRouteConfigWithNonNilRoute(t *testing.T) { + cfg := NewConfig(&config.Config{Route: &config.Route{Receiver: "test-receiver"}}, "1") + err := cfg.SetRouteConfig(RouteConfig{GroupByStr: []string{"testgroupby"}, GroupInterval: 5 * time.Minute, GroupWait: 5 * time.Minute, RepeatInterval: 5 * time.Minute}) + require.NoError(t, err) + + assert.NotNil(t, cfg.alertmanagerConfig.Route) + assert.Equal(t, "test-receiver", cfg.alertmanagerConfig.Route.Receiver) + assert.Equal(t, []string{"testgroupby"}, cfg.alertmanagerConfig.Route.GroupByStr) + assert.Equal(t, model.Duration(5*time.Minute), *cfg.alertmanagerConfig.Route.GroupInterval) + assert.Equal(t, model.Duration(5*time.Minute), *cfg.alertmanagerConfig.Route.GroupWait) + assert.Equal(t, model.Duration(5*time.Minute), *cfg.alertmanagerConfig.Route.RepeatInterval) +} + +func TestUTF8Validation(t *testing.T) { + testCases := []struct { + name string + label string + pass bool + }{ + { + name: "DotLabel", + label: "a.b.c", + pass: true, + }, + { + name: "UnderscoreLabel", + label: "a_b_c", + pass: true, + }, + { + name: "DashLabel", + label: "a-b-c", + pass: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + l := model.LabelName(tc.label) + assert.Equal(t, tc.pass, l.IsValid()) + }) + } +} diff --git a/pkg/types/alertmanagertypes/matcher.go b/pkg/types/alertmanagertypes/matcher.go new file mode 100644 index 0000000000..dd6fd76bb7 --- /dev/null +++ b/pkg/types/alertmanagertypes/matcher.go @@ -0,0 +1,87 @@ +package alertmanagertypes + +import ( + "slices" + "strings" + + "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/pkg/labels" +) + +const ( + RuleIDMatcherName string = "ruleId" + ruleIDMatcherValueSep string = "|" +) + +var ( + // noRuleIDMatcher is a matcher that matches no ruleId. + // This is used to ensure that when a new receiver is created, it does not start matching any ruleId. + noRuleIDMatcher, _ = labels.NewMatcher(labels.MatchRegexp, RuleIDMatcherName, "-1") +) + +func addRuleIDToRoute(route *config.Route, ruleID string) error { + matcherIdx := slices.IndexFunc(route.Matchers, func(m *labels.Matcher) bool { + return m.Name == RuleIDMatcherName + }) + + if matcherIdx == -1 { + matcher, err := labels.NewMatcher(labels.MatchRegexp, RuleIDMatcherName, ruleID) + if err != nil { + return err + } + + route.Matchers = append(route.Matchers, matcher) + return nil + } + + existingRuleIDs := strings.Split(route.Matchers[matcherIdx].Value, ruleIDMatcherValueSep) + if slices.Contains(existingRuleIDs, ruleID) { + return nil + } + + existingRuleIDs = append(existingRuleIDs, ruleID) + newMatcher, err := labels.NewMatcher(labels.MatchRegexp, RuleIDMatcherName, strings.Join(existingRuleIDs, ruleIDMatcherValueSep)) + if err != nil { + return err + } + route.Matchers = slices.Replace(route.Matchers, matcherIdx, matcherIdx+1, newMatcher) + + return nil +} + +func removeRuleIDFromRoute(route *config.Route, ruleID string) error { + matcherIdx := slices.IndexFunc(route.Matchers, func(m *labels.Matcher) bool { return m.Name == RuleIDMatcherName }) + if matcherIdx == -1 { + return nil + } + + existingRuleIDs := strings.Split(route.Matchers[matcherIdx].Value, ruleIDMatcherValueSep) + existingRuleIDIdx := slices.IndexFunc(existingRuleIDs, func(id string) bool { return id == ruleID }) + if existingRuleIDIdx == -1 { + return nil + } + + existingRuleIDs = slices.Delete(existingRuleIDs, existingRuleIDIdx, existingRuleIDIdx+1) + if len(existingRuleIDs) == 0 { + route.Matchers = slices.Delete(route.Matchers, matcherIdx, matcherIdx+1) + return nil + } + + newMatcher, err := labels.NewMatcher(labels.MatchRegexp, RuleIDMatcherName, strings.Join(existingRuleIDs, ruleIDMatcherValueSep)) + if err != nil { + return err + } + route.Matchers = slices.Replace(route.Matchers, matcherIdx, matcherIdx+1, newMatcher) + + return nil +} + +func matcherContainsRuleID(matchers config.Matchers, ruleID string) bool { + for _, matcher := range matchers { + if matcher.Matches(ruleID) { + return true + } + } + + return false +} diff --git a/pkg/types/alertmanagertypes/matcher_test.go b/pkg/types/alertmanagertypes/matcher_test.go new file mode 100644 index 0000000000..cfe074abf1 --- /dev/null +++ b/pkg/types/alertmanagertypes/matcher_test.go @@ -0,0 +1,201 @@ +package alertmanagertypes + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/pkg/labels" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/tidwall/gjson" +) + +func TestAddRuleIDToRoute(t *testing.T) { + testCases := []struct { + name string + route func() *config.Route + ruleID string + expectedMatchers []any + }{ + { + name: "Simple", + route: func() *config.Route { + route, err := NewRouteFromReceiver(Receiver{Name: "test"}) + require.NoError(t, err) + + return route + }, + ruleID: "1", + expectedMatchers: []any{"ruleId=~\"-1|1\""}, + }, + { + name: "AlreadyExists", + route: func() *config.Route { + route, err := NewRouteFromReceiver(Receiver{Name: "test"}) + require.NoError(t, err) + + err = addRuleIDToRoute(route, "1") + require.NoError(t, err) + + return route + }, + ruleID: "1", + expectedMatchers: []any{"ruleId=~\"-1|1\""}, + }, + { + name: "CreateMatcher", + route: func() *config.Route { + return &config.Route{Receiver: "test"} + }, + ruleID: "1", + expectedMatchers: []any{"ruleId=~\"1\""}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + route := tc.route() + err := addRuleIDToRoute(route, tc.ruleID) + assert.NoError(t, err) + + marshalledRoute, err := json.Marshal(route) + require.NoError(t, err) + + marshalledMatchers := gjson.GetBytes(marshalledRoute, "matchers").Array() + actualMatchers := make([]any, 0, len(marshalledMatchers)) + for _, matcher := range marshalledMatchers { + actualMatchers = append(actualMatchers, matcher.String()) + } + + assert.ElementsMatch(t, tc.expectedMatchers, actualMatchers) + }) + } +} + +func TestRemoveRuleIDFromRoute(t *testing.T) { + testCases := []struct { + name string + route func() *config.Route + ruleID string + expectedMatchers []any + }{ + { + name: "Simple", + route: func() *config.Route { + route, err := NewRouteFromReceiver(Receiver{Name: "test"}) + require.NoError(t, err) + + err = addRuleIDToRoute(route, "1") + require.NoError(t, err) + + return route + }, + ruleID: "1", + expectedMatchers: []any{"ruleId=~\"-1\""}, + }, + { + name: "DoesNotExist", + route: func() *config.Route { + route, err := NewRouteFromReceiver(Receiver{Name: "test"}) + require.NoError(t, err) + + return route + }, + ruleID: "1", + expectedMatchers: []any{"ruleId=~\"-1\""}, + }, + { + name: "DeleteMatcher", + route: func() *config.Route { + route, err := NewRouteFromReceiver(Receiver{Name: "test"}) + require.NoError(t, err) + + return route + }, + ruleID: "-1", + expectedMatchers: []any{}, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + route := tc.route() + err := removeRuleIDFromRoute(route, tc.ruleID) + assert.NoError(t, err) + + marshalledRoute, err := json.Marshal(route) + require.NoError(t, err) + + marshalledMatchers := gjson.GetBytes(marshalledRoute, "matchers").Array() + actualMatchers := make([]any, 0, len(marshalledMatchers)) + for _, matcher := range marshalledMatchers { + actualMatchers = append(actualMatchers, matcher.String()) + } + + assert.ElementsMatch(t, tc.expectedMatchers, actualMatchers) + }) + } +} + +func TestMatcherContainsRuleID(t *testing.T) { + testCases := []struct { + name string + matchers func() config.Matchers + ruleID string + expected bool + }{ + { + name: "SimpleTrue", + matchers: func() config.Matchers { + matcher, err := labels.NewMatcher(labels.MatchRegexp, RuleIDMatcherName, strings.Join([]string{"-1", "1"}, ruleIDMatcherValueSep)) + require.NoError(t, err) + + return config.Matchers{matcher} + }, + ruleID: "1", + expected: true, + }, + { + name: "SimpleFalse", + matchers: func() config.Matchers { + matcher, err := labels.NewMatcher(labels.MatchRegexp, RuleIDMatcherName, strings.Join([]string{"-1", "1"}, ruleIDMatcherValueSep)) + require.NoError(t, err) + + return config.Matchers{matcher} + }, + ruleID: "2", + expected: false, + }, + { + name: "SameCharactersFalse", + matchers: func() config.Matchers { + matcher, err := labels.NewMatcher(labels.MatchRegexp, RuleIDMatcherName, strings.Join([]string{"-1", "1", "11", "12", "13", "111", "100"}, ruleIDMatcherValueSep)) + require.NoError(t, err) + + return config.Matchers{matcher} + }, + ruleID: "10", + expected: false, + }, + { + name: "SameCharactersTrue", + matchers: func() config.Matchers { + matcher, err := labels.NewMatcher(labels.MatchRegexp, RuleIDMatcherName, strings.Join([]string{"-1", "1", "11", "12", "13", "111"}, ruleIDMatcherValueSep)) + require.NoError(t, err) + + return config.Matchers{matcher} + }, + ruleID: "11", + expected: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + matchers := tc.matchers() + contains := matcherContainsRuleID(matchers, tc.ruleID) + assert.Equal(t, tc.expected, contains) + }) + } +} diff --git a/pkg/types/alertmanagertypes/receiver.go b/pkg/types/alertmanagertypes/receiver.go index 1106e782f5..55fbc48a57 100644 --- a/pkg/types/alertmanagertypes/receiver.go +++ b/pkg/types/alertmanagertypes/receiver.go @@ -10,6 +10,7 @@ import ( "github.com/prometheus/alertmanager/notify" "github.com/prometheus/alertmanager/template" "go.signoz.io/signoz/pkg/errors" + "gopkg.in/yaml.v2" "github.com/prometheus/alertmanager/config" "github.com/prometheus/alertmanager/config/receiver" @@ -20,6 +21,8 @@ type ( Receiver = config.Receiver ) +// Creates a new receiver from a string. The input is initialized with the default values from the upstream alertmanager. +// The only default value which is missed is `send_resolved` (as it is a bool) which if not set in the input will always be set to `false`. func NewReceiver(input string) (Receiver, error) { receiver := Receiver{} err := json.Unmarshal([]byte(input), &receiver) @@ -27,11 +30,23 @@ func NewReceiver(input string) (Receiver, error) { return Receiver{}, err } - return receiver, nil -} + // We marshal and unmarshal the receiver to ensure that the receiver is + // initialized with defaults from the upstream alertmanager. + bytes, err := yaml.Marshal(receiver) + if err != nil { + return Receiver{}, err + } -func newRouteFromReceiver(receiver Receiver) *config.Route { - return &config.Route{Receiver: receiver.Name, Continue: true} + receiverWithDefaults := Receiver{} + if err := yaml.Unmarshal(bytes, &receiverWithDefaults); err != nil { + return Receiver{}, err + } + + if err := receiverWithDefaults.UnmarshalYAML(func(i interface{}) error { return nil }); err != nil { + return Receiver{}, err + } + + return receiverWithDefaults, nil } func NewReceiverIntegrations(nc Receiver, tmpl *template.Template, logger *slog.Logger) ([]notify.Integration, error) { @@ -55,6 +70,11 @@ func TestReceiver(ctx context.Context, receiver Receiver, config *Config, tmpl * return err } + receiver, err = testConfig.GetReceiver(receiver.Name) + if err != nil { + return err + } + integrations, err := NewReceiverIntegrations(receiver, tmpl, logger) if err != nil { return err @@ -70,3 +90,27 @@ func TestReceiver(ctx context.Context, receiver Receiver, config *Config, tmpl * return nil } + +// This is needed by the legacy alertmanager to convert the MSTeamsV2Configs to MSTeamsConfigs +func MSTeamsV2ReceiverToMSTeamsReceiver(receiver Receiver) Receiver { + if receiver.MSTeamsV2Configs == nil { + return receiver + } + + var msTeamsConfigs []*config.MSTeamsConfig + for _, cfg := range receiver.MSTeamsV2Configs { + msTeamsConfigs = append(msTeamsConfigs, &config.MSTeamsConfig{ + NotifierConfig: cfg.NotifierConfig, + HTTPConfig: cfg.HTTPConfig, + WebhookURL: cfg.WebhookURL, + WebhookURLFile: cfg.WebhookURLFile, + Title: cfg.Title, + Text: cfg.Text, + }) + } + + receiver.MSTeamsV2Configs = nil + receiver.MSTeamsConfigs = msTeamsConfigs + + return receiver +} diff --git a/pkg/types/alertmanagertypes/receiver_test.go b/pkg/types/alertmanagertypes/receiver_test.go new file mode 100644 index 0000000000..ee4f4104ff --- /dev/null +++ b/pkg/types/alertmanagertypes/receiver_test.go @@ -0,0 +1,41 @@ +package alertmanagertypes + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewReceiver(t *testing.T) { + testCases := []struct { + name string + input string + expected string + pass bool + }{ + { + name: "TelegramConfig", + input: `{"name":"telegram","telegram_configs":[{"chat":12345,"token":"1234567890"}]}`, + expected: `{"name":"telegram","telegram_configs":[{"send_resolved":false,"token":"1234567890","chat":12345,"message":"{{ template \"telegram.default.message\" . }}","parse_mode":"HTML"}]}`, + pass: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + receiver, err := NewReceiver(tc.input) + if tc.pass { + assert.NoError(t, err) + + bytes, err := json.Marshal(receiver) + require.NoError(t, err) + assert.Equal(t, tc.expected, string(bytes)) + return + } + + assert.Error(t, err) + }) + } +} diff --git a/pkg/types/alertmanagertypes/route.go b/pkg/types/alertmanagertypes/route.go new file mode 100644 index 0000000000..436e8935bc --- /dev/null +++ b/pkg/types/alertmanagertypes/route.go @@ -0,0 +1,38 @@ +package alertmanagertypes + +import ( + "github.com/prometheus/alertmanager/config" + "github.com/prometheus/common/model" +) + +func NewRouteFromRouteConfig(route *config.Route, cfg RouteConfig) (*config.Route, error) { + if route == nil { + route = &config.Route{ + Receiver: DefaultReceiverName, + GroupByStr: cfg.GroupByStr, + GroupInterval: (*model.Duration)(&cfg.GroupInterval), + GroupWait: (*model.Duration)(&cfg.GroupWait), + RepeatInterval: (*model.Duration)(&cfg.RepeatInterval), + } + } else { + route.GroupByStr = cfg.GroupByStr + route.GroupInterval = (*model.Duration)(&cfg.GroupInterval) + route.GroupWait = (*model.Duration)(&cfg.GroupWait) + route.RepeatInterval = (*model.Duration)(&cfg.RepeatInterval) + } + + if err := route.UnmarshalYAML(func(i interface{}) error { return nil }); err != nil { + return nil, err + } + + return route, nil +} + +func NewRouteFromReceiver(receiver Receiver) (*config.Route, error) { + route := &config.Route{Receiver: receiver.Name, Continue: true, Matchers: config.Matchers{noRuleIDMatcher}} + if err := route.UnmarshalYAML(func(i interface{}) error { return nil }); err != nil { + return nil, err + } + + return route, nil +}