mirror of
https://git.mirrors.martin98.com/https://github.com/SigNoz/signoz
synced 2025-08-15 17:55:56 +08:00
Fix: cheaper query for fetching log attribute values for filter suggestions (#5989)
* chore: change query for fetching multiple log attribs to make sure it is always cheap * chore: get filter suggestions tests passing
This commit is contained in:
parent
49dd5f2ef7
commit
8c891f0e87
@ -4098,44 +4098,65 @@ func (r *ClickHouseReader) GetQBFilterSuggestionsForLogs(
|
|||||||
func (r *ClickHouseReader) getValuesForLogAttributes(
|
func (r *ClickHouseReader) getValuesForLogAttributes(
|
||||||
ctx context.Context, attributes []v3.AttributeKey, limit uint64,
|
ctx context.Context, attributes []v3.AttributeKey, limit uint64,
|
||||||
) ([][]any, *model.ApiError) {
|
) ([][]any, *model.ApiError) {
|
||||||
// query top `limit` distinct values seen for `tagKey`s of interest
|
/*
|
||||||
// ordered by timestamp when the value was seen
|
The query used here needs to be as cheap as possible, and while uncommon, it is possible for
|
||||||
|
a tag to have 100s of millions of values (eg: message, request_id)
|
||||||
|
|
||||||
// we added the settings max_rows_to_group_by=100, group_by_overflow_mode = 'break'
|
Construct a query to UNION the result of querying first `limit` values for each attribute. For example:
|
||||||
// to avoid query from taking up all the resources when value is high cardinality.
|
```
|
||||||
query := fmt.Sprintf(
|
select * from (
|
||||||
`
|
(
|
||||||
select tagKey, stringTagValue, int64TagValue, float64TagValue
|
select tagKey, stringTagValue, int64TagValue, float64TagValue
|
||||||
from (
|
from signoz_logs.distributed_tag_attributes
|
||||||
select
|
where tagKey = $1 and (
|
||||||
tagKey,
|
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
|
||||||
stringTagValue,
|
|
||||||
int64TagValue,
|
|
||||||
float64TagValue,
|
|
||||||
row_number() over (partition by tagKey order by ts desc) as rank
|
|
||||||
from (
|
|
||||||
select
|
|
||||||
tagKey,
|
|
||||||
stringTagValue,
|
|
||||||
int64TagValue,
|
|
||||||
float64TagValue,
|
|
||||||
max(timestamp) as ts
|
|
||||||
from %s.%s
|
|
||||||
where tagKey in $1
|
|
||||||
group by (tagKey, stringTagValue, int64TagValue, float64TagValue) SETTINGS max_rows_to_group_by = 100, group_by_overflow_mode = 'break', max_threads=4
|
|
||||||
)
|
)
|
||||||
|
limit 2
|
||||||
|
) UNION DISTINCT (
|
||||||
|
select tagKey, stringTagValue, int64TagValue, float64TagValue
|
||||||
|
from signoz_logs.distributed_tag_attributes
|
||||||
|
where tagKey = $2 and (
|
||||||
|
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
|
||||||
)
|
)
|
||||||
where rank <= %d
|
limit 2
|
||||||
`,
|
|
||||||
r.logsDB, r.logsTagAttributeTable, limit,
|
|
||||||
)
|
)
|
||||||
|
) settings max_threads=2
|
||||||
|
```
|
||||||
|
Since tag_attributes table uses ReplacingMergeTree, the values would be distinct and no order by
|
||||||
|
is being used to ensure the `limit` clause minimizes the amount of data scanned.
|
||||||
|
|
||||||
attribNames := []string{}
|
This query scanned ~30k rows per attribute on fiscalnote-v2 for attributes like `message` and `time`
|
||||||
for _, attrib := range attributes {
|
that had >~110M values each
|
||||||
attribNames = append(attribNames, attrib.Key)
|
*/
|
||||||
|
|
||||||
|
if len(attributes) > 10 {
|
||||||
|
zap.L().Error(
|
||||||
|
"log attribute values requested for too many attributes. This can lead to slow and costly queries",
|
||||||
|
zap.Int("count", len(attributes)),
|
||||||
|
)
|
||||||
|
attributes = attributes[:10]
|
||||||
}
|
}
|
||||||
|
|
||||||
rows, err := r.db.Query(ctx, query, attribNames)
|
tagQueries := []string{}
|
||||||
|
tagKeyQueryArgs := []any{}
|
||||||
|
for idx, attrib := range attributes {
|
||||||
|
tagQueries = append(tagQueries, fmt.Sprintf(`(
|
||||||
|
select tagKey, stringTagValue, int64TagValue, float64TagValue
|
||||||
|
from %s.%s
|
||||||
|
where tagKey = $%d and (
|
||||||
|
stringTagValue != '' or int64TagValue is not null or float64TagValue is not null
|
||||||
|
)
|
||||||
|
limit %d
|
||||||
|
)`, r.logsDB, r.logsTagAttributeTable, idx+1, limit))
|
||||||
|
|
||||||
|
tagKeyQueryArgs = append(tagKeyQueryArgs, attrib.Key)
|
||||||
|
}
|
||||||
|
|
||||||
|
query := fmt.Sprintf(`select * from (
|
||||||
|
%s
|
||||||
|
) settings max_threads=2`, strings.Join(tagQueries, " UNION DISTINCT "))
|
||||||
|
|
||||||
|
rows, err := r.db.Query(ctx, query, tagKeyQueryArgs...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
zap.L().Error("couldn't query attrib values for suggestions", zap.Error(err))
|
zap.L().Error("couldn't query attrib values for suggestions", zap.Error(err))
|
||||||
return nil, model.InternalError(fmt.Errorf(
|
return nil, model.InternalError(fmt.Errorf(
|
||||||
|
@ -186,7 +186,7 @@ func (tb *FilterSuggestionsTestBed) mockAttribValuesQueryResponse(
|
|||||||
{Type: "Nullable(Float64)", Name: "float64TagValue"},
|
{Type: "Nullable(Float64)", Name: "float64TagValue"},
|
||||||
}
|
}
|
||||||
|
|
||||||
expectedAttribKeysInQuery := []string{}
|
expectedAttribKeysInQuery := []any{}
|
||||||
mockResultRows := [][]any{}
|
mockResultRows := [][]any{}
|
||||||
for idx, attrib := range expectedAttribs {
|
for idx, attrib := range expectedAttribs {
|
||||||
expectedAttribKeysInQuery = append(expectedAttribKeysInQuery, attrib.Key)
|
expectedAttribKeysInQuery = append(expectedAttribKeysInQuery, attrib.Key)
|
||||||
@ -198,8 +198,8 @@ func (tb *FilterSuggestionsTestBed) mockAttribValuesQueryResponse(
|
|||||||
}
|
}
|
||||||
|
|
||||||
tb.mockClickhouse.ExpectQuery(
|
tb.mockClickhouse.ExpectQuery(
|
||||||
"select.*tagKey.*stringTagValue.*int64TagValue.*float64TagValue.*distributed_tag_attributes.*tagKey.*in.*",
|
"select.*tagKey.*stringTagValue.*int64TagValue.*float64TagValue.*distributed_tag_attributes.*tagKey",
|
||||||
).WithArgs(expectedAttribKeysInQuery).WillReturnRows(mockhouse.NewRows(resultCols, mockResultRows))
|
).WithArgs(expectedAttribKeysInQuery...).WillReturnRows(mockhouse.NewRows(resultCols, mockResultRows))
|
||||||
}
|
}
|
||||||
|
|
||||||
type FilterSuggestionsTestBed struct {
|
type FilterSuggestionsTestBed struct {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user