From 105216de3e7a29c6e06df3282723edf45847697c Mon Sep 17 00:00:00 2001 From: Srikanth Chekuri Date: Sat, 30 Dec 2023 22:53:09 +0530 Subject: [PATCH] chore: add prepare query for delta/unspecified timeseries (#4167) * chore: update BuilderQuery struct and add PrepareTimeseriesFilterQuery * chore: add prepare query for cumulative/unspecified timeseries * chore: add prepare query for delta/unspecified timeseries * chore: update group by to work with 23.11+ * chore: fix test --------- Co-authored-by: Nityananda Gohain --- .../app/metrics/v4/delta/helper.go | 61 +++++ .../app/metrics/v4/delta/time_series_test.go | 229 ++++++++++++++++++ .../app/metrics/v4/delta/timeseries.go | 120 +++++++++ 3 files changed, 410 insertions(+) create mode 100644 pkg/query-service/app/metrics/v4/delta/helper.go create mode 100644 pkg/query-service/app/metrics/v4/delta/time_series_test.go create mode 100644 pkg/query-service/app/metrics/v4/delta/timeseries.go diff --git a/pkg/query-service/app/metrics/v4/delta/helper.go b/pkg/query-service/app/metrics/v4/delta/helper.go new file mode 100644 index 0000000000..972120fc15 --- /dev/null +++ b/pkg/query-service/app/metrics/v4/delta/helper.go @@ -0,0 +1,61 @@ +package delta + +import ( + "fmt" + "strings" + + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" +) + +// groupingSets returns a string of comma separated tags for group by clause +// `ts` is always added to the group by clause +func groupingSets(tags ...string) string { + withTs := append(tags, "ts") + if len(withTs) > 1 { + return fmt.Sprintf(`GROUPING SETS ( (%s), (%s) )`, strings.Join(withTs, ", "), strings.Join(tags, ", ")) + } else { + return strings.Join(withTs, ", ") + } +} + +// groupingSetsByAttributeKeyTags returns a string of comma separated tags for group by clause +func groupingSetsByAttributeKeyTags(tags ...v3.AttributeKey) string { + groupTags := []string{} + for _, tag := range tags { + groupTags = append(groupTags, tag.Key) + } + return groupingSets(groupTags...) +} + +// groupBy returns a string of comma separated tags for group by clause +func groupByAttributeKeyTags(tags ...v3.AttributeKey) string { + groupTags := []string{} + for _, tag := range tags { + groupTags = append(groupTags, tag.Key) + } + groupTags = append(groupTags, "ts") + return strings.Join(groupTags, ", ") +} + +// orderBy returns a string of comma separated tags for order by clause +// if the order is not specified, it defaults to ASC +func orderByAttributeKeyTags(items []v3.OrderBy, tags []v3.AttributeKey) string { + var orderBy []string + for _, tag := range tags { + found := false + for _, item := range items { + if item.ColumnName == tag.Key { + found = true + orderBy = append(orderBy, fmt.Sprintf("%s %s", item.ColumnName, item.Order)) + break + } + } + if !found { + orderBy = append(orderBy, fmt.Sprintf("%s ASC", tag.Key)) + } + } + + orderBy = append(orderBy, "ts ASC") + + return strings.Join(orderBy, ", ") +} diff --git a/pkg/query-service/app/metrics/v4/delta/time_series_test.go b/pkg/query-service/app/metrics/v4/delta/time_series_test.go new file mode 100644 index 0000000000..d22aa12961 --- /dev/null +++ b/pkg/query-service/app/metrics/v4/delta/time_series_test.go @@ -0,0 +1,229 @@ +package delta + +import ( + "testing" + + "github.com/stretchr/testify/assert" + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" +) + +func TestPrepareTimeAggregationSubQuery(t *testing.T) { + // The time aggregation is performed for each unique series - since the fingerprint represents the + // unique hash of label set, we always group by fingerprint regardless of the GroupBy + // This sub result is then aggregated on dimensions using the provided GroupBy clause keys + testCases := []struct { + name string + builderQuery *v3.BuilderQuery + start int64 + end int64 + expectedQueryContains string + }{ + { + name: "test time aggregation = avg, temporality = delta", + builderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + DataSource: v3.DataSourceMetrics, + AggregateAttribute: v3.AttributeKey{ + Key: "http_requests", + DataType: v3.AttributeKeyDataTypeFloat64, + Type: v3.AttributeKeyTypeUnspecified, + IsColumn: true, + IsJSON: false, + }, + Temporality: v3.Delta, + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service_name", + Type: v3.AttributeKeyTypeTag, + DataType: v3.AttributeKeyDataTypeString, + }, + Operator: v3.FilterOperatorNotEqual, + Value: "payment_service", + }, + { + Key: v3.AttributeKey{ + Key: "endpoint", + Type: v3.AttributeKeyTypeTag, + DataType: v3.AttributeKeyDataTypeString, + }, + Operator: v3.FilterOperatorIn, + Value: []interface{}{"/paycallback", "/payme", "/paypal"}, + }, + }, + }, + GroupBy: []v3.AttributeKey{{ + Key: "service_name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeTag, + }}, + Expression: "A", + Disabled: false, + TimeAggregation: v3.TimeAggregationAvg, + }, + start: 1701794980000, + end: 1701796780000, + expectedQueryContains: "SELECT fingerprint, any(service_name) as service_name, toStartOfInterval(toDateTime(intDiv(timestamp_ms, 1000)), INTERVAL 60 SECOND) as ts, avg(value) as per_series_value FROM signoz_metrics.distributed_samples_v2 INNER JOIN (SELECT DISTINCT JSONExtractString(labels, 'service_name') as service_name, fingerprint FROM signoz_metrics.time_series_v2 WHERE metric_name = 'http_requests' AND temporality = 'Delta' AND JSONExtractString(labels, 'service_name') != 'payment_service' AND JSONExtractString(labels, 'endpoint') IN ['/paycallback','/payme','/paypal']) as filtered_time_series USING fingerprint WHERE metric_name = 'http_requests' AND timestamp_ms >= 1701794980000 AND timestamp_ms <= 1701796780000 GROUP BY fingerprint, ts ORDER BY fingerprint, ts", + }, + { + name: "test time aggregation = rate, temporality = delta", + builderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + DataSource: v3.DataSourceMetrics, + AggregateAttribute: v3.AttributeKey{ + Key: "http_requests", + DataType: v3.AttributeKeyDataTypeFloat64, + Type: v3.AttributeKeyTypeUnspecified, + IsColumn: true, + IsJSON: false, + }, + Temporality: v3.Delta, + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service_name", + Type: v3.AttributeKeyTypeTag, + DataType: v3.AttributeKeyDataTypeString, + }, + Operator: v3.FilterOperatorContains, + Value: "payment_service", + }, + }, + }, + GroupBy: []v3.AttributeKey{{ + Key: "service_name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeTag, + }}, + Expression: "A", + Disabled: false, + TimeAggregation: v3.TimeAggregationRate, + }, + start: 1701794980000, + end: 1701796780000, + expectedQueryContains: "SELECT fingerprint, any(service_name) as service_name, toStartOfInterval(toDateTime(intDiv(timestamp_ms, 1000)), INTERVAL 60 SECOND) as ts, sum(value)/60 as per_series_value FROM signoz_metrics.distributed_samples_v2 INNER JOIN (SELECT DISTINCT JSONExtractString(labels, 'service_name') as service_name, fingerprint FROM signoz_metrics.time_series_v2 WHERE metric_name = 'http_requests' AND temporality = 'Delta' AND like(JSONExtractString(labels, 'service_name'), '%payment_service%')) as filtered_time_series USING fingerprint WHERE metric_name = 'http_requests' AND timestamp_ms >= 1701794980000 AND timestamp_ms <= 1701796780000 GROUP BY fingerprint, ts ORDER BY fingerprint, ts", + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + query, err := prepareTimeAggregationSubQueryTimeSeries( + testCase.start, + testCase.end, + testCase.builderQuery.StepInterval, + testCase.builderQuery, + ) + assert.Nil(t, err) + assert.Contains(t, query, testCase.expectedQueryContains) + }) + } +} +func TestPrepareTimeseriesQuery(t *testing.T) { + testCases := []struct { + name string + builderQuery *v3.BuilderQuery + start int64 + end int64 + expectedQueryContains string + }{ + { + name: "test time aggregation = avg, space aggregation = sum, temporality = unspecified", + builderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + DataSource: v3.DataSourceMetrics, + AggregateAttribute: v3.AttributeKey{ + Key: "system_memory_usage", + DataType: v3.AttributeKeyDataTypeFloat64, + Type: v3.AttributeKeyTypeUnspecified, + IsColumn: true, + IsJSON: false, + }, + Temporality: v3.Unspecified, + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "state", + Type: v3.AttributeKeyTypeTag, + DataType: v3.AttributeKeyDataTypeString, + }, + Operator: v3.FilterOperatorNotEqual, + Value: "idle", + }, + }, + }, + GroupBy: []v3.AttributeKey{}, + Expression: "A", + Disabled: false, + TimeAggregation: v3.TimeAggregationAvg, + SpaceAggregation: v3.SpaceAggregationSum, + }, + start: 1701794980000, + end: 1701796780000, + expectedQueryContains: "SELECT ts, sum(per_series_value) as value FROM (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(timestamp_ms, 1000)), INTERVAL 60 SECOND) as ts, avg(value) as per_series_value FROM signoz_metrics.distributed_samples_v2 INNER JOIN (SELECT DISTINCT fingerprint FROM signoz_metrics.time_series_v2 WHERE metric_name = 'system_memory_usage' AND temporality = 'Unspecified' AND JSONExtractString(labels, 'state') != 'idle') as filtered_time_series USING fingerprint WHERE metric_name = 'system_memory_usage' AND timestamp_ms >= 1701794980000 AND timestamp_ms <= 1701796780000 GROUP BY fingerprint, ts ORDER BY fingerprint, ts) WHERE isNaN(per_series_value) = 0 GROUP BY ts ORDER BY ts ASC", + }, + { + name: "test time aggregation = rate, space aggregation = sum, temporality = delta", + builderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + DataSource: v3.DataSourceMetrics, + AggregateAttribute: v3.AttributeKey{ + Key: "http_requests", + DataType: v3.AttributeKeyDataTypeFloat64, + Type: v3.AttributeKeyTypeUnspecified, + IsColumn: true, + IsJSON: false, + }, + Temporality: v3.Delta, + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service_name", + Type: v3.AttributeKeyTypeTag, + DataType: v3.AttributeKeyDataTypeString, + }, + Operator: v3.FilterOperatorContains, + Value: "payment_service", + }, + }, + }, + GroupBy: []v3.AttributeKey{{ + Key: "service_name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeTag, + }}, + Expression: "A", + Disabled: false, + TimeAggregation: v3.TimeAggregationRate, + SpaceAggregation: v3.SpaceAggregationSum, + }, + start: 1701794980000, + end: 1701796780000, + expectedQueryContains: "SELECT service_name, ts, sum(per_series_value) as value FROM (SELECT fingerprint, any(service_name) as service_name, toStartOfInterval(toDateTime(intDiv(timestamp_ms, 1000)), INTERVAL 60 SECOND) as ts, sum(value)/60 as per_series_value FROM signoz_metrics.distributed_samples_v2 INNER JOIN (SELECT DISTINCT JSONExtractString(labels, 'service_name') as service_name, fingerprint FROM signoz_metrics.time_series_v2 WHERE metric_name = 'http_requests' AND temporality = 'Delta' AND like(JSONExtractString(labels, 'service_name'), '%payment_service%')) as filtered_time_series USING fingerprint WHERE metric_name = 'http_requests' AND timestamp_ms >= 1701794980000 AND timestamp_ms <= 1701796780000 GROUP BY fingerprint, ts ORDER BY fingerprint, ts) WHERE isNaN(per_series_value) = 0 GROUP BY GROUPING SETS ( (service_name, ts), (service_name) ) ORDER BY service_name ASC, ts ASC", + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + query, err := prepareMetricQueryDeltaTimeSeries( + testCase.start, + testCase.end, + testCase.builderQuery.StepInterval, + testCase.builderQuery, + ) + assert.Nil(t, err) + assert.Contains(t, query, testCase.expectedQueryContains) + }) + } +} diff --git a/pkg/query-service/app/metrics/v4/delta/timeseries.go b/pkg/query-service/app/metrics/v4/delta/timeseries.go new file mode 100644 index 0000000000..f9a9e265c3 --- /dev/null +++ b/pkg/query-service/app/metrics/v4/delta/timeseries.go @@ -0,0 +1,120 @@ +package delta + +import ( + "fmt" + + v4 "go.signoz.io/signoz/pkg/query-service/app/metrics/v4" + "go.signoz.io/signoz/pkg/query-service/constants" + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" + "go.signoz.io/signoz/pkg/query-service/utils" +) + +// prepareTimeAggregationSubQueryTimeSeries builds the sub-query to be used for temporal aggregation +func prepareTimeAggregationSubQueryTimeSeries(start, end, step int64, mq *v3.BuilderQuery) (string, error) { + + var subQuery string + + timeSeriesSubQuery, err := v4.PrepareTimeseriesFilterQuery(mq) + if err != nil { + return "", err + } + + samplesTableFilter := fmt.Sprintf("metric_name = %s AND timestamp_ms >= %d AND timestamp_ms <= %d", utils.ClickHouseFormattedValue(mq.AggregateAttribute.Key), start, end) + + // Select the aggregate value for interval + queryTmpl := + "SELECT fingerprint, %s" + + " toStartOfInterval(toDateTime(intDiv(timestamp_ms, 1000)), INTERVAL %d SECOND) as ts," + + " %s as per_series_value" + + " FROM " + constants.SIGNOZ_METRIC_DBNAME + "." + constants.SIGNOZ_SAMPLES_TABLENAME + + " INNER JOIN" + + " (%s) as filtered_time_series" + + " USING fingerprint" + + " WHERE " + samplesTableFilter + + " GROUP BY fingerprint, ts" + + " ORDER BY fingerprint, ts" + + var selectLabelsAny string + for _, tag := range mq.GroupBy { + selectLabelsAny += fmt.Sprintf("any(%s) as %s,", tag.Key, tag.Key) + } + + var selectLabels string + for _, tag := range mq.GroupBy { + selectLabels += tag.Key + "," + } + + switch mq.TimeAggregation { + case v3.TimeAggregationAvg: + op := "avg(value)" + subQuery = fmt.Sprintf(queryTmpl, selectLabelsAny, step, op, timeSeriesSubQuery) + case v3.TimeAggregationSum: + op := "sum(value)" + subQuery = fmt.Sprintf(queryTmpl, selectLabelsAny, step, op, timeSeriesSubQuery) + case v3.TimeAggregationMin: + op := "min(value)" + subQuery = fmt.Sprintf(queryTmpl, selectLabelsAny, step, op, timeSeriesSubQuery) + case v3.TimeAggregationMax: + op := "max(value)" + subQuery = fmt.Sprintf(queryTmpl, selectLabelsAny, step, op, timeSeriesSubQuery) + case v3.TimeAggregationCount: + op := "count(value)" + subQuery = fmt.Sprintf(queryTmpl, selectLabelsAny, step, op, timeSeriesSubQuery) + case v3.TimeAggregationCountDistinct: + op := "count(distinct(value))" + subQuery = fmt.Sprintf(queryTmpl, selectLabelsAny, step, op, timeSeriesSubQuery) + case v3.TimeAggregationAnyLast: + op := "anyLast(value)" + subQuery = fmt.Sprintf(queryTmpl, selectLabelsAny, step, op, timeSeriesSubQuery) + case v3.TimeAggregationRate: + op := fmt.Sprintf("sum(value)/%d", step) + subQuery = fmt.Sprintf(queryTmpl, selectLabelsAny, step, op, timeSeriesSubQuery) + case v3.TimeAggregationIncrease: + op := "sum(value)" + subQuery = fmt.Sprintf(queryTmpl, selectLabelsAny, step, op, timeSeriesSubQuery) + } + return subQuery, nil +} + +// prepareMetricQueryDeltaTimeSeries builds the query to be used for fetching metrics +func prepareMetricQueryDeltaTimeSeries(start, end, step int64, mq *v3.BuilderQuery) (string, error) { + + var query string + + temporalAggSubQuery, err := prepareTimeAggregationSubQueryTimeSeries(start, end, step, mq) + if err != nil { + return "", err + } + + groupBy := groupingSetsByAttributeKeyTags(mq.GroupBy...) + orderBy := orderByAttributeKeyTags(mq.OrderBy, mq.GroupBy) + selectLabels := groupByAttributeKeyTags(mq.GroupBy...) + + queryTmpl := + "SELECT %s," + + " %s as value" + + " FROM (%s)" + + " WHERE isNaN(per_series_value) = 0" + + " GROUP BY %s" + + " ORDER BY %s" + + switch mq.SpaceAggregation { + case v3.SpaceAggregationAvg: + op := "avg(per_series_value)" + query = fmt.Sprintf(queryTmpl, selectLabels, op, temporalAggSubQuery, groupBy, orderBy) + case v3.SpaceAggregationSum: + op := "sum(per_series_value)" + query = fmt.Sprintf(queryTmpl, selectLabels, op, temporalAggSubQuery, groupBy, orderBy) + case v3.SpaceAggregationMin: + op := "min(per_series_value)" + query = fmt.Sprintf(queryTmpl, selectLabels, op, temporalAggSubQuery, groupBy, orderBy) + case v3.SpaceAggregationMax: + op := "max(per_series_value)" + query = fmt.Sprintf(queryTmpl, selectLabels, op, temporalAggSubQuery, groupBy, orderBy) + case v3.SpaceAggregationCount: + op := "count(per_series_value)" + query = fmt.Sprintf(queryTmpl, selectLabels, op, temporalAggSubQuery, groupBy, orderBy) + } + + return query, nil +}