diff --git a/pkg/query-service/app/clickhouseReader/reader.go b/pkg/query-service/app/clickhouseReader/reader.go index a921052273..bb2a84b487 100644 --- a/pkg/query-service/app/clickhouseReader/reader.go +++ b/pkg/query-service/app/clickhouseReader/reader.go @@ -4098,44 +4098,65 @@ func (r *ClickHouseReader) GetQBFilterSuggestionsForLogs( func (r *ClickHouseReader) getValuesForLogAttributes( ctx context.Context, attributes []v3.AttributeKey, limit uint64, ) ([][]any, *model.ApiError) { - // query top `limit` distinct values seen for `tagKey`s of interest - // ordered by timestamp when the value was seen + /* + The query used here needs to be as cheap as possible, and while uncommon, it is possible for + a tag to have 100s of millions of values (eg: message, request_id) - // we added the settings max_rows_to_group_by=100, group_by_overflow_mode = 'break' - // to avoid query from taking up all the resources when value is high cardinality. - query := fmt.Sprintf( - ` - select tagKey, stringTagValue, int64TagValue, float64TagValue - from ( - select - tagKey, - stringTagValue, - int64TagValue, - float64TagValue, - row_number() over (partition by tagKey order by ts desc) as rank - from ( - select - tagKey, - stringTagValue, - int64TagValue, - float64TagValue, - max(timestamp) as ts - from %s.%s - where tagKey in $1 - group by (tagKey, stringTagValue, int64TagValue, float64TagValue) SETTINGS max_rows_to_group_by = 100, group_by_overflow_mode = 'break', max_threads=4 - ) + Construct a query to UNION the result of querying first `limit` values for each attribute. For example: + ``` + select * from ( + ( + select tagKey, stringTagValue, int64TagValue, float64TagValue + from signoz_logs.distributed_tag_attributes + where tagKey = $1 and ( + stringTagValue != '' or int64TagValue is not null or float64TagValue is not null + ) + limit 2 + ) UNION DISTINCT ( + select tagKey, stringTagValue, int64TagValue, float64TagValue + from signoz_logs.distributed_tag_attributes + where tagKey = $2 and ( + stringTagValue != '' or int64TagValue is not null or float64TagValue is not null + ) + limit 2 + ) + ) settings max_threads=2 + ``` + Since tag_attributes table uses ReplacingMergeTree, the values would be distinct and no order by + is being used to ensure the `limit` clause minimizes the amount of data scanned. + + This query scanned ~30k rows per attribute on fiscalnote-v2 for attributes like `message` and `time` + that had >~110M values each + */ + + if len(attributes) > 10 { + zap.L().Error( + "log attribute values requested for too many attributes. This can lead to slow and costly queries", + zap.Int("count", len(attributes)), ) - where rank <= %d - `, - r.logsDB, r.logsTagAttributeTable, limit, - ) - - attribNames := []string{} - for _, attrib := range attributes { - attribNames = append(attribNames, attrib.Key) + attributes = attributes[:10] } - rows, err := r.db.Query(ctx, query, attribNames) + tagQueries := []string{} + tagKeyQueryArgs := []any{} + for idx, attrib := range attributes { + tagQueries = append(tagQueries, fmt.Sprintf(`( + select tagKey, stringTagValue, int64TagValue, float64TagValue + from %s.%s + where tagKey = $%d and ( + stringTagValue != '' or int64TagValue is not null or float64TagValue is not null + ) + limit %d + )`, r.logsDB, r.logsTagAttributeTable, idx+1, limit)) + + tagKeyQueryArgs = append(tagKeyQueryArgs, attrib.Key) + } + + query := fmt.Sprintf(`select * from ( + %s + ) settings max_threads=2`, strings.Join(tagQueries, " UNION DISTINCT ")) + + rows, err := r.db.Query(ctx, query, tagKeyQueryArgs...) if err != nil { zap.L().Error("couldn't query attrib values for suggestions", zap.Error(err)) return nil, model.InternalError(fmt.Errorf( diff --git a/pkg/query-service/tests/integration/filter_suggestions_test.go b/pkg/query-service/tests/integration/filter_suggestions_test.go index 6859a6ac2f..a1f56115c5 100644 --- a/pkg/query-service/tests/integration/filter_suggestions_test.go +++ b/pkg/query-service/tests/integration/filter_suggestions_test.go @@ -186,7 +186,7 @@ func (tb *FilterSuggestionsTestBed) mockAttribValuesQueryResponse( {Type: "Nullable(Float64)", Name: "float64TagValue"}, } - expectedAttribKeysInQuery := []string{} + expectedAttribKeysInQuery := []any{} mockResultRows := [][]any{} for idx, attrib := range expectedAttribs { expectedAttribKeysInQuery = append(expectedAttribKeysInQuery, attrib.Key) @@ -198,8 +198,8 @@ func (tb *FilterSuggestionsTestBed) mockAttribValuesQueryResponse( } tb.mockClickhouse.ExpectQuery( - "select.*tagKey.*stringTagValue.*int64TagValue.*float64TagValue.*distributed_tag_attributes.*tagKey.*in.*", - ).WithArgs(expectedAttribKeysInQuery).WillReturnRows(mockhouse.NewRows(resultCols, mockResultRows)) + "select.*tagKey.*stringTagValue.*int64TagValue.*float64TagValue.*distributed_tag_attributes.*tagKey", + ).WithArgs(expectedAttribKeysInQuery...).WillReturnRows(mockhouse.NewRows(resultCols, mockResultRows)) } type FilterSuggestionsTestBed struct {