mirror of
https://git.mirrors.martin98.com/https://github.com/SigNoz/signoz
synced 2025-08-12 17:19:00 +08:00
chore: make eval delay configurable (#5649)
This commit is contained in:
parent
6f73bb6eca
commit
f031845300
@ -728,6 +728,7 @@ func makeRulesManager(
|
||||
DisableRules: disableRules,
|
||||
FeatureFlags: fm,
|
||||
Reader: ch,
|
||||
EvalDelay: baseconst.GetEvalDelay(),
|
||||
}
|
||||
|
||||
// create Manager
|
||||
|
@ -714,6 +714,7 @@ func makeRulesManager(
|
||||
DisableRules: disableRules,
|
||||
FeatureFlags: fm,
|
||||
Reader: ch,
|
||||
EvalDelay: constants.GetEvalDelay(),
|
||||
}
|
||||
|
||||
// create Manager
|
||||
|
@ -152,6 +152,15 @@ func GetContextTimeoutMaxAllowed() time.Duration {
|
||||
return contextTimeoutDuration
|
||||
}
|
||||
|
||||
func GetEvalDelay() time.Duration {
|
||||
evalDelayStr := GetOrDefaultEnv("RULES_EVAL_DELAY", "2m")
|
||||
evalDelayDuration, err := time.ParseDuration(evalDelayStr)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return evalDelayDuration
|
||||
}
|
||||
|
||||
var ContextTimeoutMaxAllowed = GetContextTimeoutMaxAllowed()
|
||||
|
||||
const (
|
||||
|
@ -63,6 +63,8 @@ type ManagerOptions struct {
|
||||
DisableRules bool
|
||||
FeatureFlags interfaces.FeatureLookup
|
||||
Reader interfaces.Reader
|
||||
|
||||
EvalDelay time.Duration
|
||||
}
|
||||
|
||||
// The Manager manages recording and alerting rules.
|
||||
@ -524,7 +526,9 @@ func (m *Manager) prepareTask(acquireLock bool, r *PostableRule, taskName string
|
||||
tr, err := NewThresholdRule(
|
||||
ruleId,
|
||||
r,
|
||||
ThresholdRuleOpts{},
|
||||
ThresholdRuleOpts{
|
||||
EvalDelay: m.opts.EvalDelay,
|
||||
},
|
||||
m.featureFlags,
|
||||
m.reader,
|
||||
)
|
||||
|
@ -75,6 +75,8 @@ type ThresholdRule struct {
|
||||
|
||||
querier interfaces.Querier
|
||||
querierV2 interfaces.Querier
|
||||
|
||||
evalDelay time.Duration
|
||||
}
|
||||
|
||||
type ThresholdRuleOpts struct {
|
||||
@ -86,6 +88,12 @@ type ThresholdRuleOpts struct {
|
||||
// sendAlways will send alert irresepective of resendDelay
|
||||
// or other params
|
||||
SendAlways bool
|
||||
|
||||
// EvalDelay is the time to wait for data to be available
|
||||
// before evaluating the rule. This is useful in scenarios
|
||||
// where data might not be available in the system immediately
|
||||
// after the timestamp.
|
||||
EvalDelay time.Duration
|
||||
}
|
||||
|
||||
func NewThresholdRule(
|
||||
@ -96,6 +104,8 @@ func NewThresholdRule(
|
||||
reader interfaces.Reader,
|
||||
) (*ThresholdRule, error) {
|
||||
|
||||
zap.L().Info("creating new ThresholdRule", zap.String("id", id), zap.Any("opts", opts))
|
||||
|
||||
if p.RuleCondition == nil {
|
||||
return nil, fmt.Errorf("no rule condition")
|
||||
} else if !p.RuleCondition.IsValid() {
|
||||
@ -117,6 +127,7 @@ func NewThresholdRule(
|
||||
typ: p.AlertType,
|
||||
version: p.Version,
|
||||
temporalityMap: make(map[string]map[v3.Temporality]bool),
|
||||
evalDelay: opts.EvalDelay,
|
||||
}
|
||||
|
||||
if int64(t.evalWindow) == 0 {
|
||||
@ -402,7 +413,6 @@ func (r *ThresholdRule) ForEachActiveAlert(f func(*Alert)) {
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) SendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, interval time.Duration, notifyFunc NotifyFunc) {
|
||||
zap.L().Info("sending alerts", zap.String("rule", r.Name()))
|
||||
alerts := []*Alert{}
|
||||
r.ForEachActiveAlert(func(alert *Alert) {
|
||||
if r.opts.SendAlways || alert.needsSending(ts, resendDelay) {
|
||||
@ -431,11 +441,14 @@ func (r *ThresholdRule) Unit() string {
|
||||
|
||||
func (r *ThresholdRule) prepareQueryRange(ts time.Time) *v3.QueryRangeParamsV3 {
|
||||
|
||||
// todo(srikanthccv): make this configurable
|
||||
// 2 minutes is reasonable time to wait for data to be available
|
||||
// 60 seconds (SDK) + 10 seconds (batch) + rest for n/w + serialization + write to disk etc..
|
||||
start := ts.Add(-time.Duration(r.evalWindow)).UnixMilli() - 2*60*1000
|
||||
end := ts.UnixMilli() - 2*60*1000
|
||||
zap.L().Info("prepareQueryRange", zap.Int64("ts", ts.UnixMilli()), zap.Int64("evalWindow", r.evalWindow.Milliseconds()), zap.Int64("evalDelay", r.evalDelay.Milliseconds()))
|
||||
|
||||
start := ts.Add(-time.Duration(r.evalWindow)).UnixMilli()
|
||||
end := ts.UnixMilli()
|
||||
if r.evalDelay > 0 {
|
||||
start = start - int64(r.evalDelay.Milliseconds())
|
||||
end = end - int64(r.evalDelay.Milliseconds())
|
||||
}
|
||||
// round to minute otherwise we could potentially miss data
|
||||
start = start - (start % (60 * 1000))
|
||||
end = end - (end % (60 * 1000))
|
||||
|
@ -611,7 +611,7 @@ func TestThresholdRuleShouldAlert(t *testing.T) {
|
||||
postableRule.RuleCondition.MatchType = MatchType(c.matchType)
|
||||
postableRule.RuleCondition.Target = &c.target
|
||||
|
||||
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{}, fm, nil)
|
||||
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{EvalDelay: 2 * time.Minute}, fm, nil)
|
||||
if err != nil {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
@ -697,7 +697,7 @@ func TestPrepareLinksToLogs(t *testing.T) {
|
||||
}
|
||||
fm := featureManager.StartManager()
|
||||
|
||||
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{}, fm, nil)
|
||||
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{EvalDelay: 2 * time.Minute}, fm, nil)
|
||||
if err != nil {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
@ -739,7 +739,7 @@ func TestPrepareLinksToTraces(t *testing.T) {
|
||||
}
|
||||
fm := featureManager.StartManager()
|
||||
|
||||
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{}, fm, nil)
|
||||
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{EvalDelay: 2 * time.Minute}, fm, nil)
|
||||
if err != nil {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
@ -815,7 +815,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) {
|
||||
postableRule.RuleCondition.MatchType = MatchType(c.matchType)
|
||||
postableRule.RuleCondition.Target = &c.target
|
||||
|
||||
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{}, fm, nil)
|
||||
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{EvalDelay: 2 * time.Minute}, fm, nil)
|
||||
if err != nil {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
@ -834,6 +834,55 @@ func TestThresholdRuleLabelNormalization(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestThresholdRuleEvalDelay(t *testing.T) {
|
||||
postableRule := PostableRule{
|
||||
AlertName: "Test Eval Delay",
|
||||
AlertType: "METRIC_BASED_ALERT",
|
||||
RuleType: RuleTypeThreshold,
|
||||
EvalWindow: Duration(5 * time.Minute),
|
||||
Frequency: Duration(1 * time.Minute),
|
||||
RuleCondition: &RuleCondition{
|
||||
CompositeQuery: &v3.CompositeQuery{
|
||||
QueryType: v3.QueryTypeClickHouseSQL,
|
||||
ClickHouseQueries: map[string]*v3.ClickHouseQuery{
|
||||
"A": {
|
||||
Query: "SELECT 1 >= {{.start_timestamp_ms}} AND 1 <= {{.end_timestamp_ms}}",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// 01:39:47
|
||||
ts := time.Unix(1717205987, 0)
|
||||
|
||||
cases := []struct {
|
||||
expectedQuery string
|
||||
}{
|
||||
// Test cases for Equals Always
|
||||
{
|
||||
// 01:34:00 - 01:39:00
|
||||
expectedQuery: "SELECT 1 >= 1717205640000 AND 1 <= 1717205940000",
|
||||
},
|
||||
}
|
||||
|
||||
fm := featureManager.StartManager()
|
||||
for idx, c := range cases {
|
||||
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{}, fm, nil) // no eval delay
|
||||
if err != nil {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
params := rule.prepareQueryRange(ts)
|
||||
|
||||
assert.Equal(t, c.expectedQuery, params.CompositeQuery.ClickHouseQueries["A"].Query, "Test case %d", idx)
|
||||
|
||||
secondTimeParams := rule.prepareQueryRange(ts)
|
||||
|
||||
assert.Equal(t, c.expectedQuery, secondTimeParams.CompositeQuery.ClickHouseQueries["A"].Query, "Test case %d", idx)
|
||||
}
|
||||
}
|
||||
|
||||
func TestThresholdRuleClickHouseTmpl(t *testing.T) {
|
||||
postableRule := PostableRule{
|
||||
AlertName: "Tricky Condition Tests",
|
||||
@ -868,7 +917,7 @@ func TestThresholdRuleClickHouseTmpl(t *testing.T) {
|
||||
|
||||
fm := featureManager.StartManager()
|
||||
for idx, c := range cases {
|
||||
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{}, fm, nil)
|
||||
rule, err := NewThresholdRule("69", &postableRule, ThresholdRuleOpts{EvalDelay: 2 * time.Minute}, fm, nil)
|
||||
if err != nil {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user