mirror of
https://git.mirrors.martin98.com/https://github.com/SigNoz/signoz
synced 2025-08-15 01:36:10 +08:00
Fix: cheaper query for fetching log attribute values for filter suggestions (#5989)
* chore: change query for fetching multiple log attribs to make sure it is always cheap * chore: get filter suggestions tests passing
This commit is contained in:
parent
49dd5f2ef7
commit
8c891f0e87
@ -4098,44 +4098,65 @@ func (r *ClickHouseReader) GetQBFilterSuggestionsForLogs(
|
||||
func (r *ClickHouseReader) getValuesForLogAttributes(
|
||||
ctx context.Context, attributes []v3.AttributeKey, limit uint64,
|
||||
) ([][]any, *model.ApiError) {
|
||||
// query top `limit` distinct values seen for `tagKey`s of interest
|
||||
// ordered by timestamp when the value was seen
|
||||
/*
|
||||
The query used here needs to be as cheap as possible, and while uncommon, it is possible for
|
||||
a tag to have 100s of millions of values (eg: message, request_id)
|
||||
|
||||
// we added the settings max_rows_to_group_by=100, group_by_overflow_mode = 'break'
|
||||
// to avoid query from taking up all the resources when value is high cardinality.
|
||||
query := fmt.Sprintf(
|
||||
`
|
||||
select tagKey, stringTagValue, int64TagValue, float64TagValue
|
||||
from (
|
||||
select
|
||||
tagKey,
|
||||
stringTagValue,
|
||||
int64TagValue,
|
||||
float64TagValue,
|
||||
row_number() over (partition by tagKey order by ts desc) as rank
|
||||
from (
|
||||
select
|
||||
tagKey,
|
||||
stringTagValue,
|
||||
int64TagValue,
|
||||
float64TagValue,
|
||||
max(timestamp) as ts
|
||||
from %s.%s
|
||||
where tagKey in $1
|
||||
group by (tagKey, stringTagValue, int64TagValue, float64TagValue) SETTINGS max_rows_to_group_by = 100, group_by_overflow_mode = 'break', max_threads=4
|
||||
)
|
||||
Construct a query to UNION the result of querying first `limit` values for each attribute. For example:
|
||||
```
|
||||
select * from (
|
||||
(
|
||||
select tagKey, stringTagValue, int64TagValue, float64TagValue
|
||||
from signoz_logs.distributed_tag_attributes
|
||||
where tagKey = $1 and (
|
||||
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
|
||||
)
|
||||
limit 2
|
||||
) UNION DISTINCT (
|
||||
select tagKey, stringTagValue, int64TagValue, float64TagValue
|
||||
from signoz_logs.distributed_tag_attributes
|
||||
where tagKey = $2 and (
|
||||
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
|
||||
)
|
||||
limit 2
|
||||
)
|
||||
) settings max_threads=2
|
||||
```
|
||||
Since tag_attributes table uses ReplacingMergeTree, the values would be distinct and no order by
|
||||
is being used to ensure the `limit` clause minimizes the amount of data scanned.
|
||||
|
||||
This query scanned ~30k rows per attribute on fiscalnote-v2 for attributes like `message` and `time`
|
||||
that had >~110M values each
|
||||
*/
|
||||
|
||||
if len(attributes) > 10 {
|
||||
zap.L().Error(
|
||||
"log attribute values requested for too many attributes. This can lead to slow and costly queries",
|
||||
zap.Int("count", len(attributes)),
|
||||
)
|
||||
where rank <= %d
|
||||
`,
|
||||
r.logsDB, r.logsTagAttributeTable, limit,
|
||||
)
|
||||
|
||||
attribNames := []string{}
|
||||
for _, attrib := range attributes {
|
||||
attribNames = append(attribNames, attrib.Key)
|
||||
attributes = attributes[:10]
|
||||
}
|
||||
|
||||
rows, err := r.db.Query(ctx, query, attribNames)
|
||||
tagQueries := []string{}
|
||||
tagKeyQueryArgs := []any{}
|
||||
for idx, attrib := range attributes {
|
||||
tagQueries = append(tagQueries, fmt.Sprintf(`(
|
||||
select tagKey, stringTagValue, int64TagValue, float64TagValue
|
||||
from %s.%s
|
||||
where tagKey = $%d and (
|
||||
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
|
||||
)
|
||||
limit %d
|
||||
)`, r.logsDB, r.logsTagAttributeTable, idx+1, limit))
|
||||
|
||||
tagKeyQueryArgs = append(tagKeyQueryArgs, attrib.Key)
|
||||
}
|
||||
|
||||
query := fmt.Sprintf(`select * from (
|
||||
%s
|
||||
) settings max_threads=2`, strings.Join(tagQueries, " UNION DISTINCT "))
|
||||
|
||||
rows, err := r.db.Query(ctx, query, tagKeyQueryArgs...)
|
||||
if err != nil {
|
||||
zap.L().Error("couldn't query attrib values for suggestions", zap.Error(err))
|
||||
return nil, model.InternalError(fmt.Errorf(
|
||||
|
@ -186,7 +186,7 @@ func (tb *FilterSuggestionsTestBed) mockAttribValuesQueryResponse(
|
||||
{Type: "Nullable(Float64)", Name: "float64TagValue"},
|
||||
}
|
||||
|
||||
expectedAttribKeysInQuery := []string{}
|
||||
expectedAttribKeysInQuery := []any{}
|
||||
mockResultRows := [][]any{}
|
||||
for idx, attrib := range expectedAttribs {
|
||||
expectedAttribKeysInQuery = append(expectedAttribKeysInQuery, attrib.Key)
|
||||
@ -198,8 +198,8 @@ func (tb *FilterSuggestionsTestBed) mockAttribValuesQueryResponse(
|
||||
}
|
||||
|
||||
tb.mockClickhouse.ExpectQuery(
|
||||
"select.*tagKey.*stringTagValue.*int64TagValue.*float64TagValue.*distributed_tag_attributes.*tagKey.*in.*",
|
||||
).WithArgs(expectedAttribKeysInQuery).WillReturnRows(mockhouse.NewRows(resultCols, mockResultRows))
|
||||
"select.*tagKey.*stringTagValue.*int64TagValue.*float64TagValue.*distributed_tag_attributes.*tagKey",
|
||||
).WithArgs(expectedAttribKeysInQuery...).WillReturnRows(mockhouse.NewRows(resultCols, mockResultRows))
|
||||
}
|
||||
|
||||
type FilterSuggestionsTestBed struct {
|
||||
|
Loading…
x
Reference in New Issue
Block a user