signoz/pkg/factory/registry.go
Vibhu Pandey 1f33928bf9
feat(alertmanager): integrate with ruler (#7222)
### Summary

Integrate the new implementations of the alertmanager along with changes to the ruler. This change can be broadly categoried into 3 parts:

#### Frontend
- The earlier `/api/v1/alerts` api was double encoding the response in json and sending it to the frontend. This PR fixes the json response object. 

For instance, we have gone from the response `{
    "status": "success",
    "data": "{\"status\":\"success\",\"data\":[{\"labels\":{\"alertname\":\"[platform][consumer] consumer is above 100% memory utilization\",\"bu\":\"platform\",\"......
}` to the response `{"status":"success","data":[{"labels":{"alertname":"[Metrics] Pod CP......`

- `msteams` has been changed to `msteamsv2` wherever applicable

#### Ruler
The following changes have been done in the ruler component:

- Removal of the old alertmanager and notifier
- The RuleDB methods `Create`, `Edit` and `Delete` have been made transactional
- Introduction of a new `testPrepareNotifyFunc` for sending test notifications
- Integration with the new alertmanager

#### Alertmanager
Although a huge chunk of the alertmanagers have been merged in previous PRs (the list can be found at https://github.com/SigNoz/platform-pod/issues/404), this PR takes care of changes needed in order to incorporate it with the ruler

- Addition of ruleId based matching
- Support for marshalling the global configuration directly from the upstream alertmanager
- Addition of orgId to the legacy alertmanager
- Support for always adding defaults to both routes and receivers while creating them
- Migration to create the required alertmanager tables
- Migration for msteams to msteamsv2 has been added. We will start using msteamv2 config for the new alertmanager and keep using msteams for the old one.

#### Related Issues / PR's

Closes https://github.com/SigNoz/platform-pod/issues/404
Closes https://github.com/SigNoz/platform-pod/issues/176
2025-03-09 20:00:42 +00:00

90 lines
2.0 KiB
Go

package factory
import (
"context"
"errors"
"fmt"
"log/slog"
"os"
"os/signal"
"syscall"
)
type Registry struct {
services NamedMap[NamedService]
logger *slog.Logger
startCh chan error
stopCh chan error
}
// New creates a new registry of services. It needs at least one service in the input.
func NewRegistry(logger *slog.Logger, services ...NamedService) (*Registry, error) {
if logger == nil {
return nil, fmt.Errorf("cannot build registry, logger is required")
}
if len(services) == 0 {
return nil, fmt.Errorf("cannot build registry, at least one service is required")
}
m, err := NewNamedMap(services...)
if err != nil {
return nil, err
}
return &Registry{
logger: logger.With("pkg", "go.signoz.io/pkg/factory"),
services: m,
startCh: make(chan error, 1),
stopCh: make(chan error, len(services)),
}, nil
}
func (r *Registry) Start(ctx context.Context) {
for _, s := range r.services.GetInOrder() {
go func(s NamedService) {
r.logger.InfoContext(ctx, "starting service", "service", s.Name())
err := s.Start(ctx)
r.startCh <- err
}(s)
}
}
func (r *Registry) Wait(ctx context.Context) error {
interrupt := make(chan os.Signal, 1)
signal.Notify(interrupt, syscall.SIGINT, syscall.SIGTERM)
select {
case <-ctx.Done():
r.logger.InfoContext(ctx, "caught context error, exiting", "error", ctx.Err())
case s := <-interrupt:
r.logger.InfoContext(ctx, "caught interrupt signal, exiting", "signal", s)
case err := <-r.startCh:
r.logger.ErrorContext(ctx, "caught service error, exiting", "error", err)
return err
}
return nil
}
func (r *Registry) Stop(ctx context.Context) error {
for _, s := range r.services.GetInOrder() {
go func(s NamedService) {
r.logger.InfoContext(ctx, "stopping service", "service", s.Name())
err := s.Stop(ctx)
r.stopCh <- err
}(s)
}
errs := make([]error, len(r.services.GetInOrder()))
for i := 0; i < len(r.services.GetInOrder()); i++ {
err := <-r.stopCh
if err != nil {
errs = append(errs, err)
}
}
return errors.Join(errs...)
}