From b9c87c13959891a37d42336094f06f9cce19aeec Mon Sep 17 00:00:00 2001 From: Nityananda Gohain Date: Thu, 25 May 2023 09:58:32 +0530 Subject: [PATCH] feat: support for top level keys in attributes in query parser logs v3 (#2753) Co-authored-by: Srikanth Chekuri --- .../app/clickhouseReader/reader.go | 9 +- .../app/logs/v3/query_builder.go | 20 +++-- .../app/logs/v3/query_builder_test.go | 85 ++++++++++++++++++- pkg/query-service/constants/constants.go | 45 +++++----- 4 files changed, 122 insertions(+), 37 deletions(-) diff --git a/pkg/query-service/app/clickhouseReader/reader.go b/pkg/query-service/app/clickhouseReader/reader.go index 9b9c1b8beb..30d551e4b8 100644 --- a/pkg/query-service/app/clickhouseReader/reader.go +++ b/pkg/query-service/app/clickhouseReader/reader.go @@ -3875,10 +3875,9 @@ func (r *ClickHouseReader) GetLogAggregateAttributes(ctx context.Context, req *v } // add other attributes for _, field := range constants.StaticFieldsLogsV3 { - if !stringAllowed && field.DataType == v3.AttributeKeyDataTypeString { + if !stringAllowed && field.DataType == v3.AttributeKeyDataTypeString && (v3.AttributeKey{} == field) { continue } else if len(req.SearchText) == 0 || strings.Contains(field.Key, req.SearchText) { - field.IsColumn = isColumn(statements[0].Statement, field.Key) response.AttributeKeys = append(response.AttributeKeys, field) } } @@ -3933,8 +3932,10 @@ func (r *ClickHouseReader) GetLogAttributeKeys(ctx context.Context, req *v3.Filt // add other attributes for _, f := range constants.StaticFieldsLogsV3 { + if (v3.AttributeKey{} == f) { + continue + } if len(req.SearchText) == 0 || strings.Contains(f.Key, req.SearchText) { - f.IsColumn = isColumn(statements[0].Statement, f.Key) response.AttributeKeys = append(response.AttributeKeys, f) } } @@ -3973,7 +3974,7 @@ func (r *ClickHouseReader) GetLogAttributeValues(ctx context.Context, req *v3.Fi searchText := fmt.Sprintf("%%%s%%", req.SearchText) // check if the tagKey is a topLevelColumn - if _, ok := constants.LogsTopLevelColumnsV3[req.FilterAttributeKey]; ok { + if _, ok := constants.StaticFieldsLogsV3[req.FilterAttributeKey]; ok { // query the column for the last 48 hours filterValueColumnWhere := req.FilterAttributeKey selectKey := req.FilterAttributeKey diff --git a/pkg/query-service/app/logs/v3/query_builder.go b/pkg/query-service/app/logs/v3/query_builder.go index 03592d5e22..ba7b3438b0 100644 --- a/pkg/query-service/app/logs/v3/query_builder.go +++ b/pkg/query-service/app/logs/v3/query_builder.go @@ -55,6 +55,13 @@ var logOperators = map[v3.FilterOperator]string{ func enrichFieldWithMetadata(field v3.AttributeKey, fields map[string]v3.AttributeKey) v3.AttributeKey { if field.Type == "" || field.DataType == "" { + // if type is unknown check if it is a top level key + if v, ok := constants.StaticFieldsLogsV3[field.Key]; ok { + if (v3.AttributeKey{} != v) { + return v + } + } + // check if the field is present in the fields map if existingField, ok := fields[field.Key]; ok { if existingField.IsColumn { @@ -62,11 +69,13 @@ func enrichFieldWithMetadata(field v3.AttributeKey, fields map[string]v3.Attribu } field.Type = existingField.Type field.DataType = existingField.DataType - } else { - // enrich with default values if metadata is not found - field.Type = v3.AttributeKeyTypeTag - field.DataType = v3.AttributeKeyDataTypeString + return field } + + // enrich with default values if metadata is not found + field.Type = v3.AttributeKeyTypeTag + field.DataType = v3.AttributeKeyDataTypeString + } return field } @@ -94,8 +103,7 @@ func getClickhouseColumnName(key v3.AttributeKey, fields map[string]v3.Attribute clickhouseColumn := key.Key //if the key is present in the topLevelColumn then it will be only searched in those columns, //regardless if it is indexed/present again in resource or column attribute - _, isTopLevelCol := constants.LogsTopLevelColumnsV3[key.Key] - if !isTopLevelCol && !key.IsColumn { + if !key.IsColumn { columnType := getClickhouseLogsColumnType(key.Type) columnDataType := getClickhouseLogsColumnDataType(key.DataType) clickhouseColumn = fmt.Sprintf("%s_%s_value[indexOf(%s_%s_key, '%s')]", columnType, columnDataType, columnType, columnDataType, key.Key) diff --git a/pkg/query-service/app/logs/v3/query_builder_test.go b/pkg/query-service/app/logs/v3/query_builder_test.go index 093ec46a31..11b2c20770 100644 --- a/pkg/query-service/app/logs/v3/query_builder_test.go +++ b/pkg/query-service/app/logs/v3/query_builder_test.go @@ -29,13 +29,13 @@ var testGetClickhouseColumnNameData = []struct { ExpectedColumnName: "servicename", }, { - Name: "top level column", + Name: "same name as top level column", AttributeKey: v3.AttributeKey{Key: "trace_id", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, - ExpectedColumnName: "trace_id", + ExpectedColumnName: "attributes_string_value[indexOf(attributes_string_key, 'trace_id')]", }, { - Name: "top level column with isColumn ignored", - AttributeKey: v3.AttributeKey{Key: "trace_id", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: false}, + Name: "top level column", + AttributeKey: v3.AttributeKey{Key: "trace_id", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}, ExpectedColumnName: "trace_id", }, } @@ -83,6 +83,18 @@ var testGetSelectLabelsData = []struct { GroupByTags: []v3.AttributeKey{{Key: "host", IsColumn: true}}, SelectLabels: ", host as host", }, + { + Name: "trace_id field with missing meta", + AggregateOperator: v3.AggregateOperatorCount, + GroupByTags: []v3.AttributeKey{{Key: "trace_id"}}, + SelectLabels: ", trace_id as trace_id", + }, + { + Name: "trace_id field as an attribute", + AggregateOperator: v3.AggregateOperatorCount, + GroupByTags: []v3.AttributeKey{{Key: "trace_id", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + SelectLabels: ", attributes_string_value[indexOf(attributes_string_key, 'trace_id')] as trace_id", + }, } func TestGetSelectLabels(t *testing.T) { @@ -221,6 +233,21 @@ var timeSeriesFilterQueryData = []struct { Fields: map[string]v3.AttributeKey{"bytes": {Key: "bytes", DataType: v3.AttributeKeyDataTypeInt64, Type: v3.AttributeKeyTypeTag}}, ExpectedFilter: " AND attributes_int64_value[indexOf(attributes_int64_key, 'bytes')] = 102", }, + { + Name: "Test top level field w/o metadata", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "body"}, Value: "%test%", Operator: "like"}, + }}, + ExpectedFilter: " AND body ILIKE '%test%'", + }, + { + Name: "Test top level field with metadata", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "%test%", Operator: "like"}, + }}, + Fields: map[string]v3.AttributeKey{"body": {Key: "body", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + ExpectedFilter: " AND attributes_string_value[indexOf(attributes_string_key, 'body')] ILIKE '%test%'", + }, } func TestBuildLogsTimeSeriesFilterQuery(t *testing.T) { @@ -647,6 +674,56 @@ var testBuildLogsQueryData = []struct { TableName: "logs", ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' group by ts having value > 10 order by ts", }, + { + Name: "Test top level key", + Start: 1680066360726210000, + End: 1680066458000000000, + Step: 60, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeUnspecified, IsColumn: true}, Value: "%test%", Operator: "like"}, + }, + }, + Having: []v3.Having{ + { + ColumnName: "name", + Operator: ">", + Value: 10, + }, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND body ILIKE '%test%' group by ts having value > 10 order by ts", + }, + { + Name: "Test attribute with same name as top level key", + Start: 1680066360726210000, + End: 1680066458000000000, + Step: 60, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "%test%", Operator: "like"}, + }, + }, + Having: []v3.Having{ + { + ColumnName: "name", + Operator: ">", + Value: 10, + }, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'body')] ILIKE '%test%' group by ts having value > 10 order by ts", + }, } func TestBuildLogsQuery(t *testing.T) { diff --git a/pkg/query-service/constants/constants.go b/pkg/query-service/constants/constants.go index db1555f1a3..ee7f858dbc 100644 --- a/pkg/query-service/constants/constants.go +++ b/pkg/query-service/constants/constants.go @@ -248,47 +248,46 @@ var ReservedColumnTargetAliases = map[string]struct{}{ const LogsPPLPfx = "logstransform/pipeline_" // The datatype present here doesn't represent the actual datatype of column in the logs table. -var StaticFieldsLogsV3 = []v3.AttributeKey{ - { + +var StaticFieldsLogsV3 = map[string]v3.AttributeKey{ + "timestamp": {}, + "id": {}, + "trace_id": { Key: "trace_id", DataType: v3.AttributeKeyDataTypeString, - Type: v3.AttributeKeyTypeTag, + Type: v3.AttributeKeyTypeUnspecified, + IsColumn: true, }, - { + "span_id": { Key: "span_id", DataType: v3.AttributeKeyDataTypeString, - Type: v3.AttributeKeyTypeTag, + Type: v3.AttributeKeyTypeUnspecified, + IsColumn: true, }, - { + "trace_flags": { Key: "trace_flags", DataType: v3.AttributeKeyDataTypeInt64, - Type: v3.AttributeKeyTypeTag, + Type: v3.AttributeKeyTypeUnspecified, + IsColumn: true, }, - { + "severity_text": { Key: "severity_text", DataType: v3.AttributeKeyDataTypeString, - Type: v3.AttributeKeyTypeTag, + Type: v3.AttributeKeyTypeUnspecified, + IsColumn: true, }, - { + "severity_number": { Key: "severity_number", DataType: v3.AttributeKeyDataTypeInt64, - Type: v3.AttributeKeyTypeTag, + Type: v3.AttributeKeyTypeUnspecified, + IsColumn: true, }, - { + "body": { Key: "body", DataType: v3.AttributeKeyDataTypeString, - Type: v3.AttributeKeyTypeTag, + Type: v3.AttributeKeyTypeUnspecified, + IsColumn: true, }, } -var LogsTopLevelColumnsV3 = map[string]struct{}{ - "trace_id": {}, - "span_id": {}, - "trace_flags": {}, - "severity_text": {}, - "severity_number": {}, - "timestamp": {}, - "id": {}, -} - const SigNozOrderByValue = "#SIGNOZ_VALUE"