fix: address gaps in alert to notification link (#4573)

This commit is contained in:
Srikanth Chekuri 2024-02-20 10:42:30 +05:30 committed by GitHub
parent e2669eb370
commit aa67b47053
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 48 additions and 19 deletions

View File

@ -50,6 +50,8 @@ function CreateAlertChannels({
*Summary:* {{ .Annotations.summary }} *Summary:* {{ .Annotations.summary }}
*Description:* {{ .Annotations.description }} *Description:* {{ .Annotations.description }}
*RelatedLogs:* {{ .Annotations.related_logs }}
*RelatedTraces:* {{ .Annotations.related_traces }}
*Details:* *Details:*
{{ range .Labels.SortedPairs }} *{{ .Name }}:* {{ .Value }} {{ range .Labels.SortedPairs }} *{{ .Name }}:* {{ .Value }}

View File

@ -225,7 +225,7 @@ func prepareRuleGeneratorURL(ruleId string, source string) string {
} }
// check if source is a valid url // check if source is a valid url
_, err := url.Parse(source) parsedSource, err := url.Parse(source)
if err != nil { if err != nil {
return "" return ""
} }
@ -239,5 +239,12 @@ func prepareRuleGeneratorURL(ruleId string, source string) string {
return ruleURL return ruleURL
} }
return source // The source contains the encoded query, start and end time
// and other parameters. We don't want to include them in the generator URL
// mainly to keep the URL short and lower the alert body contents
// The generator URL with /alerts/edit?ruleId= is enough
if parsedSource.Port() != "" {
return fmt.Sprintf("%s://%s:%s/alerts/edit?ruleId=%s", parsedSource.Scheme, parsedSource.Hostname(), parsedSource.Port(), ruleId)
}
return fmt.Sprintf("%s://%s/alerts/edit?ruleId=%s", parsedSource.Scheme, parsedSource.Hostname(), ruleId)
} }

View File

@ -16,6 +16,10 @@ type Sample struct {
Point Point
Metric labels.Labels Metric labels.Labels
// Label keys as-is from the result query.
// The original labels are used to prepare the related{logs, traces} link in alert notification
MetricOrig labels.Labels
} }
func (s Sample) String() string { func (s Sample) String() string {

View File

@ -437,7 +437,13 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer
} }
sample := Sample{} sample := Sample{}
// Why do we maintain two labels sets? Alertmanager requires
// label keys to follow the model https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
// However, our traces and logs explorers support label keys with dot and other namespace characters
// Using normalized label keys results in invalid filter criteria.
// The original labels are used to prepare the related{logs, traces} link in alert notification
lbls := labels.NewBuilder(labels.Labels{}) lbls := labels.NewBuilder(labels.Labels{})
lblsOrig := labels.NewBuilder(labels.Labels{})
for i, v := range vars { for i, v := range vars {
@ -446,6 +452,7 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer
switch v := v.(type) { switch v := v.(type) {
case *string: case *string:
lbls.Set(colName, *v) lbls.Set(colName, *v)
lblsOrig.Set(columnNames[i], *v)
case *time.Time: case *time.Time:
timval := *v timval := *v
@ -453,6 +460,7 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer
sample.Point.T = timval.Unix() sample.Point.T = timval.Unix()
} else { } else {
lbls.Set(colName, timval.Format("2006-01-02 15:04:05")) lbls.Set(colName, timval.Format("2006-01-02 15:04:05"))
lblsOrig.Set(columnNames[i], timval.Format("2006-01-02 15:04:05"))
} }
case *float64: case *float64:
@ -460,6 +468,7 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer
sample.Point.V = *v sample.Point.V = *v
} else { } else {
lbls.Set(colName, fmt.Sprintf("%f", *v)) lbls.Set(colName, fmt.Sprintf("%f", *v))
lblsOrig.Set(columnNames[i], fmt.Sprintf("%f", *v))
} }
case **float64: case **float64:
// ch seems to return this type when column is derived from // ch seems to return this type when column is derived from
@ -470,6 +479,7 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer
sample.Point.V = *floatVal sample.Point.V = *floatVal
} else { } else {
lbls.Set(colName, fmt.Sprintf("%f", *floatVal)) lbls.Set(colName, fmt.Sprintf("%f", *floatVal))
lblsOrig.Set(columnNames[i], fmt.Sprintf("%f", *floatVal))
} }
} }
case *float32: case *float32:
@ -478,18 +488,21 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer
sample.Point.V = float64(float32Val) sample.Point.V = float64(float32Val)
} else { } else {
lbls.Set(colName, fmt.Sprintf("%f", float32Val)) lbls.Set(colName, fmt.Sprintf("%f", float32Val))
lblsOrig.Set(columnNames[i], fmt.Sprintf("%f", float32Val))
} }
case *uint8, *uint64, *uint16, *uint32: case *uint8, *uint64, *uint16, *uint32:
if _, ok := constants.ReservedColumnTargetAliases[colName]; ok { if _, ok := constants.ReservedColumnTargetAliases[colName]; ok {
sample.Point.V = float64(reflect.ValueOf(v).Elem().Uint()) sample.Point.V = float64(reflect.ValueOf(v).Elem().Uint())
} else { } else {
lbls.Set(colName, fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Uint())) lbls.Set(colName, fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Uint()))
lblsOrig.Set(columnNames[i], fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Uint()))
} }
case *int8, *int16, *int32, *int64: case *int8, *int16, *int32, *int64:
if _, ok := constants.ReservedColumnTargetAliases[colName]; ok { if _, ok := constants.ReservedColumnTargetAliases[colName]; ok {
sample.Point.V = float64(reflect.ValueOf(v).Elem().Int()) sample.Point.V = float64(reflect.ValueOf(v).Elem().Int())
} else { } else {
lbls.Set(colName, fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Int())) lbls.Set(colName, fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Int()))
lblsOrig.Set(columnNames[i], fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Int()))
} }
default: default:
zap.S().Errorf("ruleId:", r.ID(), "\t error: invalid var found in query result", v, columnNames[i]) zap.S().Errorf("ruleId:", r.ID(), "\t error: invalid var found in query result", v, columnNames[i])
@ -503,6 +516,7 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer
// capture lables in result // capture lables in result
sample.Metric = lbls.Labels() sample.Metric = lbls.Labels()
sample.MetricOrig = lblsOrig.Labels()
labelHash := lbls.Labels().Hash() labelHash := lbls.Labels().Hash()
@ -750,7 +764,7 @@ func (r *ThresholdRule) prepareLinksToLogs(ts time.Time, lbls labels.Labels) str
} }
data, _ := json.Marshal(urlData) data, _ := json.Marshal(urlData)
compositeQuery := url.QueryEscape(url.QueryEscape(string(data))) compositeQuery := url.QueryEscape(string(data))
optionsData, _ := json.Marshal(options) optionsData, _ := json.Marshal(options)
urlEncodedOptions := url.QueryEscape(string(optionsData)) urlEncodedOptions := url.QueryEscape(string(optionsData))
@ -813,8 +827,7 @@ func (r *ThresholdRule) prepareLinksToTraces(ts time.Time, lbls labels.Labels) s
} }
data, _ := json.Marshal(urlData) data, _ := json.Marshal(urlData)
// We need to double encode the composite query to remain compatible with the UI compositeQuery := url.QueryEscape(string(data))
compositeQuery := url.QueryEscape(url.QueryEscape(string(data)))
optionsData, _ := json.Marshal(options) optionsData, _ := json.Marshal(options)
urlEncodedOptions := url.QueryEscape(string(optionsData)) urlEncodedOptions := url.QueryEscape(string(optionsData))
@ -828,9 +841,9 @@ func (r *ThresholdRule) hostFromSource() string {
return "" return ""
} }
if parsedUrl.Port() != "" { if parsedUrl.Port() != "" {
return fmt.Sprintf("%s:%s", parsedUrl.Hostname(), parsedUrl.Port()) return fmt.Sprintf("%s://%s:%s", parsedUrl.Scheme, parsedUrl.Hostname(), parsedUrl.Port())
} }
return parsedUrl.Hostname() return fmt.Sprintf("%s://%s", parsedUrl.Scheme, parsedUrl.Hostname())
} }
func (r *ThresholdRule) prepareClickhouseQueries(ts time.Time) (map[string]string, error) { func (r *ThresholdRule) prepareClickhouseQueries(ts time.Time) (map[string]string, error) {
@ -1054,23 +1067,26 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Querie
lb.Set(labels.AlertRuleIdLabel, r.ID()) lb.Set(labels.AlertRuleIdLabel, r.ID())
lb.Set(labels.RuleSourceLabel, r.GeneratorURL()) lb.Set(labels.RuleSourceLabel, r.GeneratorURL())
if r.typ == "TRACES_BASED_ALERT" {
link := r.prepareLinksToTraces(ts, smpl.Metric)
if link != "" && r.hostFromSource() != "" {
lb.Set("RelatedTraces", fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link))
}
} else if r.typ == "LOGS_BASED_ALERT" {
link := r.prepareLinksToLogs(ts, smpl.Metric)
if link != "" && r.hostFromSource() != "" {
lb.Set("RelatedLogs", fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link))
}
}
annotations := make(labels.Labels, 0, len(r.annotations)) annotations := make(labels.Labels, 0, len(r.annotations))
for _, a := range r.annotations { for _, a := range r.annotations {
annotations = append(annotations, labels.Label{Name: normalizeLabelName(a.Name), Value: expand(a.Value)}) annotations = append(annotations, labels.Label{Name: normalizeLabelName(a.Name), Value: expand(a.Value)})
} }
// Links with timestamps should go in annotations since labels
// is used alert grouping, and we want to group alerts with the same
// label set, but different timestamps, together.
if r.typ == "TRACES_BASED_ALERT" {
link := r.prepareLinksToTraces(ts, smpl.MetricOrig)
if link != "" && r.hostFromSource() != "" {
annotations = append(annotations, labels.Label{Name: "related_traces", Value: fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link)})
}
} else if r.typ == "LOGS_BASED_ALERT" {
link := r.prepareLinksToLogs(ts, smpl.MetricOrig)
if link != "" && r.hostFromSource() != "" {
annotations = append(annotations, labels.Label{Name: "related_logs", Value: fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link)})
}
}
lbs := lb.Labels() lbs := lb.Labels()
h := lbs.Hash() h := lbs.Hash()
resultFPs[h] = struct{}{} resultFPs[h] = struct{}{}