package metricsexplorer import ( "context" "encoding/json" "errors" "sort" "strings" "time" "go.uber.org/zap" "go.signoz.io/signoz/pkg/query-service/app/dashboards" "go.signoz.io/signoz/pkg/query-service/interfaces" "go.signoz.io/signoz/pkg/query-service/model" "go.signoz.io/signoz/pkg/query-service/model/metrics_explorer" v3 "go.signoz.io/signoz/pkg/query-service/model/v3" "go.signoz.io/signoz/pkg/query-service/rules" "go.signoz.io/signoz/pkg/types/authtypes" "golang.org/x/sync/errgroup" ) type SummaryService struct { reader interfaces.Reader rulesManager *rules.Manager } func NewSummaryService(reader interfaces.Reader, alertManager *rules.Manager) *SummaryService { return &SummaryService{reader: reader, rulesManager: alertManager} } func (receiver *SummaryService) FilterKeys(ctx context.Context, params *metrics_explorer.FilterKeyRequest) (*metrics_explorer.FilterKeyResponse, *model.ApiError) { var response metrics_explorer.FilterKeyResponse keys, apiError := receiver.reader.GetAllMetricFilterAttributeKeys( ctx, params, true, ) if apiError != nil { return nil, apiError } response.AttributeKeys = *keys var availableColumnFilter []string for key := range metrics_explorer.AvailableColumnFilterMap { availableColumnFilter = append(availableColumnFilter, key) } response.MetricColumns = availableColumnFilter return &response, nil } func (receiver *SummaryService) FilterValues(ctx context.Context, params *metrics_explorer.FilterValueRequest) (*metrics_explorer.FilterValueResponse, *model.ApiError) { var response metrics_explorer.FilterValueResponse switch params.FilterKey { case "metric_name": var filterValues []string request := v3.AggregateAttributeRequest{DataSource: v3.DataSourceMetrics, SearchText: params.SearchText, Limit: params.Limit} attributes, err := receiver.reader.GetMetricAggregateAttributes(ctx, &request, true) if err != nil { return nil, model.InternalError(err) } for _, item := range attributes.AttributeKeys { filterValues = append(filterValues, item.Key) } response.FilterValues = filterValues return &response, nil case "metric_unit": attributes, err := receiver.reader.GetAllMetricFilterUnits(ctx, params) if err != nil { return nil, err } response.FilterValues = attributes return &response, nil case "metric_type": attributes, err := receiver.reader.GetAllMetricFilterTypes(ctx, params) if err != nil { return nil, err } response.FilterValues = attributes return &response, nil default: attributes, err := receiver.reader.GetAllMetricFilterAttributeValues(ctx, params) if err != nil { return nil, err } response.FilterValues = attributes return &response, nil } } func (receiver *SummaryService) GetMetricsSummary(ctx context.Context, metricName string) (metrics_explorer.MetricDetailsDTO, *model.ApiError) { var metricDetailsDTO metrics_explorer.MetricDetailsDTO g, ctx := errgroup.WithContext(ctx) // Call 1: GetMetricMetadata g.Go(func() error { metadata, err := receiver.reader.GetMetricMetadata(ctx, metricName, metricName) if err != nil { return &model.ApiError{Typ: "ClickHouseError", Err: err} } metricDetailsDTO.Name = metricName metricDetailsDTO.Unit = metadata.Unit metricDetailsDTO.Description = metadata.Description metricDetailsDTO.Type = metadata.Type metricDetailsDTO.Metadata.MetricType = metadata.Type metricDetailsDTO.Metadata.Description = metadata.Description metricDetailsDTO.Metadata.Unit = metadata.Unit return nil }) g.Go(func() error { dataPoints, err := receiver.reader.GetMetricsDataPoints(ctx, metricName) if err != nil { return err } metricDetailsDTO.Samples = dataPoints return nil }) g.Go(func() error { lastReceived, err := receiver.reader.GetMetricsLastReceived(ctx, metricName) if err != nil { return err } metricDetailsDTO.LastReceived = lastReceived return nil }) g.Go(func() error { totalSeries, err := receiver.reader.GetTotalTimeSeriesForMetricName(ctx, metricName) if err != nil { return err } metricDetailsDTO.TimeSeriesTotal = totalSeries return nil }) g.Go(func() error { activeSeries, err := receiver.reader.GetActiveTimeSeriesForMetricName(ctx, metricName, 120*time.Minute) if err != nil { return err } metricDetailsDTO.TimeSeriesActive = activeSeries return nil }) g.Go(func() error { attributes, err := receiver.reader.GetAttributesForMetricName(ctx, metricName, nil, nil) if err != nil { return err } if attributes != nil { metricDetailsDTO.Attributes = *attributes } return nil }) g.Go(func() error { var metricNames []string metricNames = append(metricNames, metricName) claims, ok := authtypes.ClaimsFromContext(ctx) if !ok { return &model.ApiError{Typ: model.ErrorInternal, Err: errors.New("failed to get claims")} } data, err := dashboards.GetDashboardsWithMetricNames(ctx, claims.OrgID, metricNames) if err != nil { return err } if data != nil { jsonData, err := json.Marshal(data) if err != nil { zap.L().Error("Error marshalling data:", zap.Error(err)) return &model.ApiError{Typ: "MarshallingErr", Err: err} } var dashboards map[string][]metrics_explorer.Dashboard err = json.Unmarshal(jsonData, &dashboards) if err != nil { zap.L().Error("Error unmarshalling data:", zap.Error(err)) return &model.ApiError{Typ: "UnMarshallingErr", Err: err} } if _, ok := dashboards[metricName]; ok { metricDetailsDTO.Dashboards = dashboards[metricName] } } return nil }) g.Go(func() error { var metrics []string var metricsAlerts []metrics_explorer.Alert metrics = append(metrics, metricName) data, err := receiver.rulesManager.GetAlertDetailsForMetricNames(ctx, metrics) if err != nil { return err } if rulesLists, ok := data[metricName]; ok { for _, rule := range rulesLists { metricsAlerts = append(metricsAlerts, metrics_explorer.Alert{AlertName: rule.AlertName, AlertID: rule.Id}) } } metricDetailsDTO.Alerts = metricsAlerts return nil }) // Wait for all goroutines and handle any errors if err := g.Wait(); err != nil { var apiErr *model.ApiError if errors.As(err, &apiErr) { return metrics_explorer.MetricDetailsDTO{}, apiErr } return metrics_explorer.MetricDetailsDTO{}, &model.ApiError{Typ: "InternalError", Err: err} } return metricDetailsDTO, nil } func (receiver *SummaryService) ListMetricsWithSummary(ctx context.Context, params *metrics_explorer.SummaryListMetricsRequest) (*metrics_explorer.SummaryListMetricsResponse, *model.ApiError) { return receiver.reader.ListSummaryMetrics(ctx, params) } func (receiver *SummaryService) GetMetricsTreemap(ctx context.Context, params *metrics_explorer.TreeMapMetricsRequest) (*metrics_explorer.TreeMap, *model.ApiError) { var response metrics_explorer.TreeMap switch params.Treemap { case metrics_explorer.TimeSeriesTeeMap: cardinality, apiError := receiver.reader.GetMetricsTimeSeriesPercentage(ctx, params) if apiError != nil { return nil, apiError } if cardinality != nil { response.TimeSeries = *cardinality } return &response, nil case metrics_explorer.SamplesTreeMap: dataPoints, apiError := receiver.reader.GetMetricsSamplesPercentage(ctx, params) if apiError != nil { return nil, apiError } if dataPoints != nil { response.Samples = *dataPoints } return &response, nil default: return nil, nil } } func (receiver *SummaryService) GetRelatedMetrics(ctx context.Context, params *metrics_explorer.RelatedMetricsRequest) (*metrics_explorer.RelatedMetricsResponse, *model.ApiError) { // Get name similarity scores nameSimilarityScores, err := receiver.reader.GetNameSimilarity(ctx, params) if err != nil { return nil, err } attrCtx, cancel := context.WithTimeout(ctx, 20*time.Second) defer cancel() attrSimilarityScores, err := receiver.reader.GetAttributeSimilarity(attrCtx, params) if err != nil { // If we hit a deadline exceeded error, proceed with only name similarity if errors.Is(err.Err, context.DeadlineExceeded) { zap.L().Warn("Attribute similarity calculation timed out, proceeding with name similarity only") attrSimilarityScores = make(map[string]metrics_explorer.RelatedMetricsScore) } else { return nil, err } } // Combine scores and compute final scores finalScores := make(map[string]float64) relatedMetricsMap := make(map[string]metrics_explorer.RelatedMetricsScore) // Merge name and attribute similarity scores for metric, nameScore := range nameSimilarityScores { attrScore, exists := attrSimilarityScores[metric] if exists { relatedMetricsMap[metric] = metrics_explorer.RelatedMetricsScore{ NameSimilarity: nameScore.NameSimilarity, AttributeSimilarity: attrScore.AttributeSimilarity, Filters: attrScore.Filters, MetricType: attrScore.MetricType, Temporality: attrScore.Temporality, IsMonotonic: attrScore.IsMonotonic, } } else { relatedMetricsMap[metric] = nameScore } finalScores[metric] = nameScore.NameSimilarity*0.7 + relatedMetricsMap[metric].AttributeSimilarity*0.3 } // Handle metrics that are only present in attribute similarity scores for metric, attrScore := range attrSimilarityScores { if _, exists := nameSimilarityScores[metric]; !exists { relatedMetricsMap[metric] = metrics_explorer.RelatedMetricsScore{ AttributeSimilarity: attrScore.AttributeSimilarity, Filters: attrScore.Filters, MetricType: attrScore.MetricType, Temporality: attrScore.Temporality, IsMonotonic: attrScore.IsMonotonic, } finalScores[metric] = attrScore.AttributeSimilarity * 0.3 } } type metricScore struct { Name string Score float64 } var sortedScores []metricScore for metric, score := range finalScores { sortedScores = append(sortedScores, metricScore{ Name: metric, Score: score, }) } sort.Slice(sortedScores, func(i, j int) bool { return sortedScores[i].Score > sortedScores[j].Score }) metricNames := make([]string, len(sortedScores)) for i, ms := range sortedScores { metricNames[i] = ms.Name } // Fetch dashboards and alerts concurrently g, ctx := errgroup.WithContext(ctx) dashboardsRelatedData := make(map[string][]metrics_explorer.Dashboard) alertsRelatedData := make(map[string][]metrics_explorer.Alert) g.Go(func() error { claims, ok := authtypes.ClaimsFromContext(ctx) if !ok { return &model.ApiError{Typ: model.ErrorInternal, Err: errors.New("failed to get claims")} } names, apiError := dashboards.GetDashboardsWithMetricNames(ctx, claims.OrgID, metricNames) if apiError != nil { return apiError } if names != nil { jsonData, err := json.Marshal(names) if err != nil { zap.L().Error("Error marshalling dashboard data", zap.Error(err)) return &model.ApiError{Typ: "MarshallingErr", Err: err} } err = json.Unmarshal(jsonData, &dashboardsRelatedData) if err != nil { zap.L().Error("Error unmarshalling dashboard data", zap.Error(err)) return &model.ApiError{Typ: "UnMarshallingErr", Err: err} } } return nil }) g.Go(func() error { rulesData, apiError := receiver.rulesManager.GetAlertDetailsForMetricNames(ctx, metricNames) if apiError != nil { return apiError } for s, gettableRules := range rulesData { var metricsRules []metrics_explorer.Alert for _, rule := range gettableRules { metricsRules = append(metricsRules, metrics_explorer.Alert{AlertID: rule.Id, AlertName: rule.AlertName}) } alertsRelatedData[s] = metricsRules } return nil }) // Check for context cancellation before waiting if ctx.Err() != nil { return nil, &model.ApiError{Typ: "ContextCanceled", Err: ctx.Err()} } if err := g.Wait(); err != nil { var apiErr *model.ApiError if errors.As(err, &apiErr) { return nil, apiErr } return nil, &model.ApiError{Typ: "InternalError", Err: err} } // Build response var response metrics_explorer.RelatedMetricsResponse for _, ms := range sortedScores { relatedMetric := metrics_explorer.RelatedMetrics{ Name: ms.Name, Query: getQueryRangeForRelateMetricsList(ms.Name, relatedMetricsMap[ms.Name]), } if dashboardsDetails, ok := dashboardsRelatedData[ms.Name]; ok { relatedMetric.Dashboards = dashboardsDetails } if alerts, ok := alertsRelatedData[ms.Name]; ok { relatedMetric.Alerts = alerts } response.RelatedMetrics = append(response.RelatedMetrics, relatedMetric) } return &response, nil } func getQueryRangeForRelateMetricsList(metricName string, scores metrics_explorer.RelatedMetricsScore) *v3.BuilderQuery { var filterItems []v3.FilterItem for _, pair := range scores.Filters { if len(pair) < 2 { continue // Skip invalid filter pairs. } filterItem := v3.FilterItem{ Key: v3.AttributeKey{ Key: pair[0], // Default type, or you can use v3.AttributeKeyTypeUnspecified. IsColumn: false, IsJSON: false, }, Value: pair[1], Operator: v3.FilterOperatorEqual, // Using "=" as the operator. } filterItems = append(filterItems, filterItem) } // If there are any filters, combine them with an "AND" operator. var filters *v3.FilterSet if len(filterItems) > 0 { filters = &v3.FilterSet{ Operator: "AND", Items: filterItems, } } // Create the BuilderQuery. Here we set the QueryName to the metric name. query := v3.BuilderQuery{ QueryName: metricName, DataSource: v3.DataSourceMetrics, Expression: metricName, // Using metric name as expression Filters: filters, } if scores.MetricType == v3.MetricTypeSum && !scores.IsMonotonic && scores.Temporality == v3.Cumulative { scores.MetricType = v3.MetricTypeGauge } switch scores.MetricType { case v3.MetricTypeGauge: query.TimeAggregation = v3.TimeAggregationAvg query.SpaceAggregation = v3.SpaceAggregationAvg case v3.MetricTypeSum: query.TimeAggregation = v3.TimeAggregationRate query.SpaceAggregation = v3.SpaceAggregationSum case v3.MetricTypeHistogram: query.SpaceAggregation = v3.SpaceAggregationPercentile95 } query.AggregateAttribute = v3.AttributeKey{ Key: metricName, Type: v3.AttributeKeyType(scores.MetricType), } query.StepInterval = 60 return &query } func (receiver *SummaryService) GetInspectMetrics(ctx context.Context, params *metrics_explorer.InspectMetricsRequest) (*metrics_explorer.InspectMetricsResponse, *model.ApiError) { // Capture the original context. parentCtx := ctx // Create an errgroup using the original context. g, egCtx := errgroup.WithContext(ctx) var attributes []metrics_explorer.Attribute var resourceAttrs map[string]uint64 // Run the two queries concurrently using the derived context. g.Go(func() error { attrs, apiErr := receiver.reader.GetAttributesForMetricName(egCtx, params.MetricName, ¶ms.Start, ¶ms.End) if apiErr != nil { return apiErr } if attrs != nil { attributes = *attrs } return nil }) g.Go(func() error { resAttrs, apiErr := receiver.reader.GetMetricsAllResourceAttributes(egCtx, params.Start, params.End) if apiErr != nil { return apiErr } if resAttrs != nil { resourceAttrs = resAttrs } return nil }) // Wait for the concurrent operations to complete. if err := g.Wait(); err != nil { return nil, &model.ApiError{Typ: "InternalError", Err: err} } // Use the parentCtx (or create a new context from it) for the rest of the calls. if parentCtx.Err() != nil { return nil, &model.ApiError{Typ: "ContextCanceled", Err: parentCtx.Err()} } // Build a set of attribute keys for O(1) lookup. attributeKeys := make(map[string]struct{}) for _, attr := range attributes { attributeKeys[attr.Key] = struct{}{} } // Filter resource attributes that are present in attributes. var validAttrs []string for attrName := range resourceAttrs { normalizedAttrName := strings.ReplaceAll(attrName, ".", "_") if _, ok := attributeKeys[normalizedAttrName]; ok { validAttrs = append(validAttrs, normalizedAttrName) } } // Get top 3 resource attributes (or use top attributes by valueCount if none match). if len(validAttrs) > 3 { validAttrs = validAttrs[:3] } else if len(validAttrs) == 0 { sort.Slice(attributes, func(i, j int) bool { return attributes[i].ValueCount > attributes[j].ValueCount }) for i := 0; i < len(attributes) && i < 3; i++ { validAttrs = append(validAttrs, attributes[i].Key) } } fingerprints, apiError := receiver.reader.GetInspectMetricsFingerprints(parentCtx, validAttrs, params) if apiError != nil { return nil, apiError } baseResponse, apiErr := receiver.reader.GetInspectMetrics(parentCtx, params, fingerprints) if apiErr != nil { return nil, apiErr } return baseResponse, nil }