aniketio-ctrl 68effaf232
chore: support for non-normalized metrics behind a feature flag (#7919)
feat(7294-services): added dot metrics boolean for services tab
2025-05-30 10:27:29 +00:00

502 lines
14 KiB
Go

package inframetrics
import (
"context"
"math"
"sort"
"github.com/SigNoz/signoz/pkg/query-service/app/metrics/v4/helpers"
"github.com/SigNoz/signoz/pkg/query-service/common"
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
"github.com/SigNoz/signoz/pkg/query-service/model"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
"github.com/SigNoz/signoz/pkg/query-service/postprocess"
"github.com/SigNoz/signoz/pkg/valuer"
"golang.org/x/exp/slices"
)
var (
metricToUseForJobs = GetDotMetrics("k8s_job_desired_successful_pods")
k8sJobNameAttrKey = GetDotMetrics("k8s_job_name")
metricNamesForJobs = map[string]string{
"desired_successful_pods": GetDotMetrics("k8s_job_desired_successful_pods"),
"active_pods": GetDotMetrics("k8s_job_active_pods"),
"failed_pods": GetDotMetrics("k8s_job_failed_pods"),
"successful_pods": GetDotMetrics("k8s_job_successful_pods"),
}
jobAttrsToEnrich = []string{
GetDotMetrics("k8s_job_name"),
GetDotMetrics("k8s_namespace_name"),
GetDotMetrics("k8s_cluster_name"),
}
queryNamesForJobs = map[string][]string{
"cpu": {"A"},
"cpu_request": {"B", "A"},
"cpu_limit": {"C", "A"},
"memory": {"D"},
"memory_request": {"E", "D"},
"memory_limit": {"F", "D"},
"restarts": {"G", "A"},
"desired_successful_pods": {"H"},
"active_pods": {"I"},
"failed_pods": {"J"},
"successful_pods": {"K"},
}
builderQueriesForJobs = map[string]*v3.BuilderQuery{
// desired nodes
"H": {
QueryName: "H",
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: GetDotMetrics(metricNamesForJobs["desired_successful_pods"]),
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{},
},
GroupBy: []v3.AttributeKey{},
Expression: "H",
ReduceTo: v3.ReduceToOperatorLast,
TimeAggregation: v3.TimeAggregationAnyLast,
SpaceAggregation: v3.SpaceAggregationSum,
Disabled: false,
},
// available nodes
"I": {
QueryName: "I",
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: GetDotMetrics(metricNamesForJobs["active_pods"]),
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{},
},
GroupBy: []v3.AttributeKey{},
Expression: "I",
ReduceTo: v3.ReduceToOperatorLast,
TimeAggregation: v3.TimeAggregationAnyLast,
SpaceAggregation: v3.SpaceAggregationSum,
Disabled: false,
},
// failed pods
"J": {
QueryName: "J",
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: GetDotMetrics(metricNamesForJobs["failed_pods"]),
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{},
},
GroupBy: []v3.AttributeKey{},
Expression: "J",
ReduceTo: v3.ReduceToOperatorLast,
TimeAggregation: v3.TimeAggregationAnyLast,
SpaceAggregation: v3.SpaceAggregationSum,
Disabled: false,
},
// successful pods
"K": {
QueryName: "K",
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: GetDotMetrics(metricNamesForJobs["successful_pods"]),
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{},
},
GroupBy: []v3.AttributeKey{},
Expression: "K",
ReduceTo: v3.ReduceToOperatorLast,
TimeAggregation: v3.TimeAggregationAnyLast,
SpaceAggregation: v3.SpaceAggregationSum,
Disabled: false,
},
}
jobQueryNames = []string{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"}
)
type JobsRepo struct {
reader interfaces.Reader
querierV2 interfaces.Querier
}
func NewJobsRepo(reader interfaces.Reader, querierV2 interfaces.Querier) *JobsRepo {
return &JobsRepo{reader: reader, querierV2: querierV2}
}
func (d *JobsRepo) GetJobAttributeKeys(ctx context.Context, req v3.FilterAttributeKeyRequest) (*v3.FilterAttributeKeyResponse, error) {
// TODO(srikanthccv): remove hardcoded metric name and support keys from any pod metric
req.DataSource = v3.DataSourceMetrics
req.AggregateAttribute = metricToUseForJobs
if req.Limit == 0 {
req.Limit = 50
}
attributeKeysResponse, err := d.reader.GetMetricAttributeKeys(ctx, &req)
if err != nil {
return nil, err
}
// TODO(srikanthccv): only return resource attributes when we have a way to
// distinguish between resource attributes and other attributes.
filteredKeys := []v3.AttributeKey{}
for _, key := range attributeKeysResponse.AttributeKeys {
if slices.Contains(pointAttrsToIgnore, key.Key) {
continue
}
filteredKeys = append(filteredKeys, key)
}
return &v3.FilterAttributeKeyResponse{AttributeKeys: filteredKeys}, nil
}
func (d *JobsRepo) GetJobAttributeValues(ctx context.Context, req v3.FilterAttributeValueRequest) (*v3.FilterAttributeValueResponse, error) {
req.DataSource = v3.DataSourceMetrics
req.AggregateAttribute = metricToUseForJobs
if req.Limit == 0 {
req.Limit = 50
}
attributeValuesResponse, err := d.reader.GetMetricAttributeValues(ctx, &req)
if err != nil {
return nil, err
}
return attributeValuesResponse, nil
}
func (d *JobsRepo) getMetadataAttributes(ctx context.Context, req model.JobListRequest) (map[string]map[string]string, error) {
jobAttrs := map[string]map[string]string{}
for _, key := range jobAttrsToEnrich {
hasKey := false
for _, groupByKey := range req.GroupBy {
if groupByKey.Key == key {
hasKey = true
break
}
}
if !hasKey {
req.GroupBy = append(req.GroupBy, v3.AttributeKey{Key: key})
}
}
mq := v3.BuilderQuery{
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: metricToUseForJobs,
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
GroupBy: req.GroupBy,
}
query, err := helpers.PrepareTimeseriesFilterQuery(req.Start, req.End, &mq)
if err != nil {
return nil, err
}
query = localQueryToDistributedQuery(query)
attrsListResponse, err := d.reader.GetListResultV3(ctx, query)
if err != nil {
return nil, err
}
for _, row := range attrsListResponse {
stringData := map[string]string{}
for key, value := range row.Data {
if str, ok := value.(string); ok {
stringData[key] = str
} else if strPtr, ok := value.(*string); ok {
stringData[key] = *strPtr
}
}
jobName := stringData[k8sJobNameAttrKey]
if _, ok := jobAttrs[jobName]; !ok {
jobAttrs[jobName] = map[string]string{}
}
for _, key := range req.GroupBy {
jobAttrs[jobName][key.Key] = stringData[key.Key]
}
}
return jobAttrs, nil
}
func (d *JobsRepo) getTopJobGroups(ctx context.Context, orgID valuer.UUID, req model.JobListRequest, q *v3.QueryRangeParamsV3) ([]map[string]string, []map[string]string, error) {
step, timeSeriesTableName, samplesTableName := getParamsForTopJobs(req)
queryNames := queryNamesForJobs[req.OrderBy.ColumnName]
topJobGroupsQueryRangeParams := &v3.QueryRangeParamsV3{
Start: req.Start,
End: req.End,
Step: step,
CompositeQuery: &v3.CompositeQuery{
BuilderQueries: map[string]*v3.BuilderQuery{},
QueryType: v3.QueryTypeBuilder,
PanelType: v3.PanelTypeTable,
},
}
for _, queryName := range queryNames {
query := q.CompositeQuery.BuilderQueries[queryName].Clone()
query.StepInterval = step
query.MetricTableHints = &v3.MetricTableHints{
TimeSeriesTableName: timeSeriesTableName,
SamplesTableName: samplesTableName,
}
if req.Filters != nil && len(req.Filters.Items) > 0 {
if query.Filters == nil {
query.Filters = &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}
}
query.Filters.Items = append(query.Filters.Items, req.Filters.Items...)
}
topJobGroupsQueryRangeParams.CompositeQuery.BuilderQueries[queryName] = query
}
queryResponse, _, err := d.querierV2.QueryRange(ctx, orgID, topJobGroupsQueryRangeParams)
if err != nil {
return nil, nil, err
}
formattedResponse, err := postprocess.PostProcessResult(queryResponse, topJobGroupsQueryRangeParams)
if err != nil {
return nil, nil, err
}
if len(formattedResponse) == 0 || len(formattedResponse[0].Series) == 0 {
return nil, nil, nil
}
if req.OrderBy.Order == v3.DirectionDesc {
sort.Slice(formattedResponse[0].Series, func(i, j int) bool {
return formattedResponse[0].Series[i].Points[0].Value > formattedResponse[0].Series[j].Points[0].Value
})
} else {
sort.Slice(formattedResponse[0].Series, func(i, j int) bool {
return formattedResponse[0].Series[i].Points[0].Value < formattedResponse[0].Series[j].Points[0].Value
})
}
limit := math.Min(float64(req.Offset+req.Limit), float64(len(formattedResponse[0].Series)))
paginatedTopJobGroupsSeries := formattedResponse[0].Series[req.Offset:int(limit)]
topJobGroups := []map[string]string{}
for _, series := range paginatedTopJobGroupsSeries {
topJobGroups = append(topJobGroups, series.Labels)
}
allJobGroups := []map[string]string{}
for _, series := range formattedResponse[0].Series {
allJobGroups = append(allJobGroups, series.Labels)
}
return topJobGroups, allJobGroups, nil
}
func (d *JobsRepo) GetJobList(ctx context.Context, orgID valuer.UUID, req model.JobListRequest) (model.JobListResponse, error) {
resp := model.JobListResponse{}
if req.Limit == 0 {
req.Limit = 10
}
if req.OrderBy == nil {
req.OrderBy = &v3.OrderBy{ColumnName: GetDotMetrics("desired_pods"), Order: v3.DirectionDesc}
}
if req.GroupBy == nil {
req.GroupBy = []v3.AttributeKey{{Key: k8sJobNameAttrKey}}
resp.Type = model.ResponseTypeList
} else {
resp.Type = model.ResponseTypeGroupedList
}
step := int64(math.Max(float64(common.MinAllowedStepInterval(req.Start, req.End)), 60))
query := WorkloadTableListQuery.Clone()
query.Start = req.Start
query.End = req.End
query.Step = step
// add additional queries for jobs
for _, jobQuery := range builderQueriesForJobs {
query.CompositeQuery.BuilderQueries[jobQuery.QueryName] = jobQuery.Clone()
}
for _, query := range query.CompositeQuery.BuilderQueries {
query.StepInterval = step
if req.Filters != nil && len(req.Filters.Items) > 0 {
if query.Filters == nil {
query.Filters = &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}
}
query.Filters.Items = append(query.Filters.Items, req.Filters.Items...)
}
query.GroupBy = req.GroupBy
// make sure we only get records for jobs
query.Filters.Items = append(query.Filters.Items, v3.FilterItem{
Key: v3.AttributeKey{Key: k8sJobNameAttrKey},
Operator: v3.FilterOperatorExists,
})
}
jobAttrs, err := d.getMetadataAttributes(ctx, req)
if err != nil {
return resp, err
}
topJobGroups, allJobGroups, err := d.getTopJobGroups(ctx, orgID, req, query)
if err != nil {
return resp, err
}
groupFilters := map[string][]string{}
for _, topJobGroup := range topJobGroups {
for k, v := range topJobGroup {
groupFilters[k] = append(groupFilters[k], v)
}
}
for groupKey, groupValues := range groupFilters {
hasGroupFilter := false
if req.Filters != nil && len(req.Filters.Items) > 0 {
for _, filter := range req.Filters.Items {
if filter.Key.Key == groupKey {
hasGroupFilter = true
break
}
}
}
if !hasGroupFilter {
for _, query := range query.CompositeQuery.BuilderQueries {
query.Filters.Items = append(query.Filters.Items, v3.FilterItem{
Key: v3.AttributeKey{Key: groupKey},
Value: groupValues,
Operator: v3.FilterOperatorIn,
})
}
}
}
queryResponse, _, err := d.querierV2.QueryRange(ctx, orgID, query)
if err != nil {
return resp, err
}
formattedResponse, err := postprocess.PostProcessResult(queryResponse, query)
if err != nil {
return resp, err
}
records := []model.JobListRecord{}
for _, result := range formattedResponse {
for _, row := range result.Table.Rows {
record := model.JobListRecord{
JobName: "",
CPUUsage: -1,
CPURequest: -1,
CPULimit: -1,
MemoryUsage: -1,
MemoryRequest: -1,
MemoryLimit: -1,
DesiredSuccessfulPods: -1,
ActivePods: -1,
FailedPods: -1,
SuccessfulPods: -1,
}
if jobName, ok := row.Data[k8sJobNameAttrKey].(string); ok {
record.JobName = jobName
}
if cpu, ok := row.Data["A"].(float64); ok {
record.CPUUsage = cpu
}
if cpuRequest, ok := row.Data["B"].(float64); ok {
record.CPURequest = cpuRequest
}
if cpuLimit, ok := row.Data["C"].(float64); ok {
record.CPULimit = cpuLimit
}
if memory, ok := row.Data["D"].(float64); ok {
record.MemoryUsage = memory
}
if memoryRequest, ok := row.Data["E"].(float64); ok {
record.MemoryRequest = memoryRequest
}
if memoryLimit, ok := row.Data["F"].(float64); ok {
record.MemoryLimit = memoryLimit
}
if restarts, ok := row.Data["G"].(float64); ok {
record.Restarts = int(restarts)
}
if desiredSuccessfulPods, ok := row.Data["H"].(float64); ok {
record.DesiredSuccessfulPods = int(desiredSuccessfulPods)
}
if activePods, ok := row.Data["I"].(float64); ok {
record.ActivePods = int(activePods)
}
if failedPods, ok := row.Data["J"].(float64); ok {
record.FailedPods = int(failedPods)
}
if successfulPods, ok := row.Data["K"].(float64); ok {
record.SuccessfulPods = int(successfulPods)
}
record.Meta = map[string]string{}
if _, ok := jobAttrs[record.JobName]; ok && record.JobName != "" {
record.Meta = jobAttrs[record.JobName]
}
for k, v := range row.Data {
if slices.Contains(jobQueryNames, k) {
continue
}
if labelValue, ok := v.(string); ok {
record.Meta[k] = labelValue
}
}
records = append(records, record)
}
}
resp.Total = len(allJobGroups)
resp.Records = records
resp.SortBy(req.OrderBy)
return resp, nil
}