From 17ae197bc340348dc1be9b10b8219cdb6dfd48bd Mon Sep 17 00:00:00 2001 From: Nityananda Gohain Date: Wed, 13 Sep 2023 13:46:06 +0530 Subject: [PATCH] feat: support for filtering json inside body (#3534) * feat: support for json query on body * feat: json filter parser updated for scalar values --- pkg/query-service/app/logs/v3/enrich_query.go | 6 + .../app/logs/v3/enrich_query_test.go | 18 ++ pkg/query-service/app/logs/v3/json_filter.go | 122 ++++++++ .../app/logs/v3/json_filter_test.go | 260 ++++++++++++++++++ .../app/logs/v3/query_builder.go | 10 +- .../app/logs/v3/query_builder_test.go | 68 +++++ pkg/query-service/model/v3/v3.go | 19 +- 7 files changed, 496 insertions(+), 7 deletions(-) create mode 100644 pkg/query-service/app/logs/v3/json_filter.go create mode 100644 pkg/query-service/app/logs/v3/json_filter_test.go diff --git a/pkg/query-service/app/logs/v3/enrich_query.go b/pkg/query-service/app/logs/v3/enrich_query.go index 5cf15618e2..c9a133ce0f 100644 --- a/pkg/query-service/app/logs/v3/enrich_query.go +++ b/pkg/query-service/app/logs/v3/enrich_query.go @@ -27,6 +27,9 @@ func EnrichmentRequired(params *v3.QueryRangeParamsV3) bool { // check filter attribute if query.Filters != nil && len(query.Filters.Items) != 0 { for _, item := range query.Filters.Items { + if item.Key.IsJSON { + continue + } if !isEnriched(item.Key) { return true } @@ -97,6 +100,9 @@ func enrichLogsQuery(query *v3.BuilderQuery, fields map[string]v3.AttributeKey) // enrich filter attribute if query.Filters != nil && len(query.Filters.Items) != 0 { for i := 0; i < len(query.Filters.Items); i++ { + if query.Filters.Items[i].Key.IsJSON { + continue + } query.Filters.Items[i].Key = enrichFieldWithMetadata(query.Filters.Items[i].Key, fields) } } diff --git a/pkg/query-service/app/logs/v3/enrich_query_test.go b/pkg/query-service/app/logs/v3/enrich_query_test.go index 16d5e74404..6d1ba3953b 100644 --- a/pkg/query-service/app/logs/v3/enrich_query_test.go +++ b/pkg/query-service/app/logs/v3/enrich_query_test.go @@ -86,6 +86,24 @@ var testEnrichmentRequiredData = []struct { }, EnrichmentRequired: true, }, + { + Name: "filter enrichment not required required json", + Params: v3.QueryRangeParamsV3{ + CompositeQuery: &v3.CompositeQuery{ + BuilderQueries: map[string]*v3.BuilderQuery{ + "test": { + QueryName: "test", + Expression: "test", + DataSource: v3.DataSourceLogs, + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "user_name", IsJSON: true}, Value: "john", Operator: "="}, + }}, + }, + }, + }, + }, + EnrichmentRequired: false, + }, { Name: "groupBy enrichment not required", Params: v3.QueryRangeParamsV3{ diff --git a/pkg/query-service/app/logs/v3/json_filter.go b/pkg/query-service/app/logs/v3/json_filter.go new file mode 100644 index 0000000000..f15b101a8c --- /dev/null +++ b/pkg/query-service/app/logs/v3/json_filter.go @@ -0,0 +1,122 @@ +package v3 + +import ( + "fmt" + "strings" + + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" + "go.signoz.io/signoz/pkg/query-service/utils" +) + +const ( + STRING = "String" + INT64 = "Int64" + FLOAT64 = "Float64" + ARRAY_STRING = "Array(String)" + ARRAY_INT64 = "Array(Int64)" + ARRAY_FLOAT64 = "Array(Float64)" +) + +var dataTypeMapping = map[string]string{ + "string": STRING, + "int64": INT64, + "float64": FLOAT64, + "array(string)": ARRAY_STRING, + "array(int64)": ARRAY_INT64, + "array(float64)": ARRAY_FLOAT64, +} + +var arrayValueTypeMapping = map[string]string{ + "array(string)": "string", + "array(int64)": "int64", + "array(float64)": "float64", +} + +var jsonLogOperators = map[v3.FilterOperator]string{ + v3.FilterOperatorEqual: "=", + v3.FilterOperatorNotEqual: "!=", + v3.FilterOperatorLessThan: "<", + v3.FilterOperatorLessThanOrEq: "<=", + v3.FilterOperatorGreaterThan: ">", + v3.FilterOperatorGreaterThanOrEq: ">=", + v3.FilterOperatorLike: "ILIKE", + v3.FilterOperatorNotLike: "NOT ILIKE", + v3.FilterOperatorContains: "ILIKE", + v3.FilterOperatorNotContains: "NOT ILIKE", + v3.FilterOperatorRegex: "match(%s, %s)", + v3.FilterOperatorNotRegex: "NOT match(%s, %s)", + v3.FilterOperatorIn: "IN", + v3.FilterOperatorNotIn: "NOT IN", + v3.FilterOperatorHas: "has(%s, %s)", + v3.FilterOperatorNotHas: "NOT has(%s, %s)", +} + +func getJSONFilterKey(key v3.AttributeKey, isArray bool) (string, error) { + keyArr := strings.Split(key.Key, ".") + if len(keyArr) < 2 { + return "", fmt.Errorf("incorrect key, should contain at least 2 parts") + } + + // only body is supported as of now + if strings.Compare(keyArr[0], "body") != 0 { + return "", fmt.Errorf("only body can be the root key") + } + + var dataType string + var ok bool + if dataType, ok = dataTypeMapping[string(key.DataType)]; !ok { + return "", fmt.Errorf("unsupported dataType for JSON: %s", key.DataType) + } + + if isArray { + return fmt.Sprintf("JSONExtract(JSON_QUERY(%s, '$.%s'), '%s')", keyArr[0], strings.Join(keyArr[1:], "."), dataType), nil + } + + // for non array + keyname := fmt.Sprintf("JSON_VALUE(%s, '$.%s')", keyArr[0], strings.Join(keyArr[1:], ".")) + if dataType != "String" { + keyname = fmt.Sprintf("JSONExtract(%s, '%s')", keyname, dataType) + } + + return keyname, nil +} + +func GetJSONFilter(item v3.FilterItem) (string, error) { + + dataType := item.Key.DataType + isArray := false + // check if its an array and handle it + if val, ok := arrayValueTypeMapping[string(item.Key.DataType)]; ok { + if item.Operator != v3.FilterOperatorHas && item.Operator != v3.FilterOperatorNotHas { + return "", fmt.Errorf("only has operator is supported for array") + } + isArray = true + dataType = v3.AttributeKeyDataType(val) + } + + key, err := getJSONFilterKey(item.Key, isArray) + if err != nil { + return "", err + } + + // non array + value, err := utils.ValidateAndCastValue(item.Value, dataType) + if err != nil { + return "", fmt.Errorf("failed to validate and cast value for %s: %v", item.Key.Key, err) + } + + op := v3.FilterOperator(strings.ToLower(strings.TrimSpace(string(item.Operator)))) + if logsOp, ok := jsonLogOperators[op]; ok { + switch op { + case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex, v3.FilterOperatorHas, v3.FilterOperatorNotHas: + fmtVal := utils.ClickHouseFormattedValue(value) + return fmt.Sprintf(logsOp, key, fmtVal), nil + case v3.FilterOperatorContains, v3.FilterOperatorNotContains: + return fmt.Sprintf("%s %s '%%%s%%'", key, logsOp, item.Value), nil + default: + fmtVal := utils.ClickHouseFormattedValue(value) + return fmt.Sprintf("%s %s %s", key, logsOp, fmtVal), nil + } + } + return "", fmt.Errorf("unsupported operator: %s", op) +} diff --git a/pkg/query-service/app/logs/v3/json_filter_test.go b/pkg/query-service/app/logs/v3/json_filter_test.go new file mode 100644 index 0000000000..6509bf5d16 --- /dev/null +++ b/pkg/query-service/app/logs/v3/json_filter_test.go @@ -0,0 +1,260 @@ +package v3 + +import ( + "testing" + + . "github.com/smartystreets/goconvey/convey" + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" +) + +var testGetJSONFilterKeyData = []struct { + Name string + Key v3.AttributeKey + IsArray bool + ClickhouseKey string + Error bool +}{ + { + Name: "Incorrect Key", + Key: v3.AttributeKey{ + Key: "requestor_list[*]", + DataType: "array(string)", + IsJSON: true, + }, + IsArray: true, + Error: true, + }, + { + Name: "Using anything other than body", + Key: v3.AttributeKey{ + Key: "trace_id.requestor_list[*]", + DataType: "array(string)", + IsJSON: true, + }, + IsArray: true, + Error: true, + }, + { + Name: "Array String", + Key: v3.AttributeKey{ + Key: "body.requestor_list[*]", + DataType: "array(string)", + IsJSON: true, + }, + IsArray: true, + ClickhouseKey: "JSONExtract(JSON_QUERY(body, '$.requestor_list[*]'), '" + ARRAY_STRING + "')", + }, + { + Name: "Array String Nested", + Key: v3.AttributeKey{ + Key: "body.nested[*].key[*]", + DataType: "array(string)", + IsJSON: true, + }, + IsArray: true, + ClickhouseKey: "JSONExtract(JSON_QUERY(body, '$.nested[*].key[*]'), '" + ARRAY_STRING + "')", + }, + { + Name: "Array Int", + Key: v3.AttributeKey{ + Key: "body.int_numbers[*]", + DataType: "array(int64)", + IsJSON: true, + }, + IsArray: true, + ClickhouseKey: "JSONExtract(JSON_QUERY(body, '$.int_numbers[*]'), '" + ARRAY_INT64 + "')", + }, + { + Name: "Array Float", + Key: v3.AttributeKey{ + Key: "body.nested_num[*].float_nums[*]", + DataType: "array(float64)", + IsJSON: true, + }, + IsArray: true, + ClickhouseKey: "JSONExtract(JSON_QUERY(body, '$.nested_num[*].float_nums[*]'), '" + ARRAY_FLOAT64 + "')", + }, + { + Name: "String", + Key: v3.AttributeKey{ + Key: "body.message", + DataType: "string", + IsJSON: true, + }, + IsArray: false, + ClickhouseKey: "JSON_VALUE(body, '$.message')", + }, + { + Name: "Int", + Key: v3.AttributeKey{ + Key: "body.status", + DataType: "int64", + IsJSON: true, + }, + IsArray: false, + ClickhouseKey: "JSONExtract(JSON_VALUE(body, '$.status'), '" + INT64 + "')", + }, + { + Name: "Float", + Key: v3.AttributeKey{ + Key: "body.fraction", + DataType: "float64", + IsJSON: true, + }, + IsArray: false, + ClickhouseKey: "JSONExtract(JSON_VALUE(body, '$.fraction'), '" + FLOAT64 + "')", + }, +} + +func TestGetJSONFilterKey(t *testing.T) { + for _, tt := range testGetJSONFilterKeyData { + Convey("testgetKey", t, func() { + columnName, err := getJSONFilterKey(tt.Key, tt.IsArray) + if tt.Error { + So(err, ShouldNotBeNil) + } else { + So(err, ShouldBeNil) + So(columnName, ShouldEqual, tt.ClickhouseKey) + } + }) + } +} + +var testGetJSONFilterData = []struct { + Name string + FilterItem v3.FilterItem + Filter string + Error bool +}{ + { + Name: "Array membership string", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.requestor_list[*]", + DataType: "array(string)", + IsJSON: true, + }, + Operator: "has", + Value: "index_service", + }, + Filter: "has(JSONExtract(JSON_QUERY(body, '$.requestor_list[*]'), 'Array(String)'), 'index_service')", + }, + { + Name: "Array membership int64", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.int_numbers[*]", + DataType: "array(int64)", + IsJSON: true, + }, + Operator: "has", + Value: 2, + }, + Filter: "has(JSONExtract(JSON_QUERY(body, '$.int_numbers[*]'), '" + ARRAY_INT64 + "'), 2)", + }, + { + Name: "Array membership float64", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.nested_num[*].float_nums[*]", + DataType: "array(float64)", + IsJSON: true, + }, + Operator: "nhas", + Value: 2.2, + }, + Filter: "NOT has(JSONExtract(JSON_QUERY(body, '$.nested_num[*].float_nums[*]'), '" + ARRAY_FLOAT64 + "'), 2.200000)", + }, + { + Name: "eq operator", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.message", + DataType: "string", + IsJSON: true, + }, + Operator: "=", + Value: "hello", + }, + Filter: "JSON_VALUE(body, '$.message') = 'hello'", + }, + { + Name: "eq operator number", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.status", + DataType: "int64", + IsJSON: true, + }, + Operator: "=", + Value: 1, + }, + Filter: "JSONExtract(JSON_VALUE(body, '$.status'), '" + INT64 + "') = 1", + }, + { + Name: "neq operator number", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.status", + DataType: "float64", + IsJSON: true, + }, + Operator: "=", + Value: 1.1, + }, + Filter: "JSONExtract(JSON_VALUE(body, '$.status'), '" + FLOAT64 + "') = 1.100000", + }, + { + Name: "greater than operator", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.status", + DataType: "int64", + IsJSON: true, + }, + Operator: ">", + Value: 1, + }, + Filter: "JSONExtract(JSON_VALUE(body, '$.status'), '" + INT64 + "') > 1", + }, + { + Name: "regex operator", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.message", + DataType: "string", + IsJSON: true, + }, + Operator: "regex", + Value: "a*", + }, + Filter: "match(JSON_VALUE(body, '$.message'), 'a*')", + }, + { + Name: "contains operator", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.message", + DataType: "string", + IsJSON: true, + }, + Operator: "contains", + Value: "a", + }, + Filter: "JSON_VALUE(body, '$.message') ILIKE '%a%'", + }, +} + +func TestGetJSONFilter(t *testing.T) { + for _, tt := range testGetJSONFilterData { + Convey("testGetJSONFilter", t, func() { + filter, err := GetJSONFilter(tt.FilterItem) + if tt.Error { + So(err, ShouldNotBeNil) + } else { + So(err, ShouldBeNil) + So(filter, ShouldEqual, tt.Filter) + } + }) + } +} diff --git a/pkg/query-service/app/logs/v3/query_builder.go b/pkg/query-service/app/logs/v3/query_builder.go index b083a04c4e..5deb2c26f3 100644 --- a/pkg/query-service/app/logs/v3/query_builder.go +++ b/pkg/query-service/app/logs/v3/query_builder.go @@ -49,7 +49,6 @@ var logOperators = map[v3.FilterOperator]string{ v3.FilterOperatorNotIn: "NOT IN", v3.FilterOperatorExists: "has(%s_%s_key, '%s')", v3.FilterOperatorNotExists: "not has(%s_%s_key, '%s')", - // (todo) check contains/not contains/ } func getClickhouseLogsColumnType(columnType v3.AttributeKeyType) string { @@ -161,6 +160,15 @@ func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, groupBy []v3.AttributeKey, if fs != nil && len(fs.Items) != 0 { for _, item := range fs.Items { + if item.Key.IsJSON { + filter, err := GetJSONFilter(item) + if err != nil { + return "", err + } + conditions = append(conditions, filter) + continue + } + op := v3.FilterOperator(strings.ToLower(strings.TrimSpace(string(item.Operator)))) var value interface{} diff --git a/pkg/query-service/app/logs/v3/query_builder_test.go b/pkg/query-service/app/logs/v3/query_builder_test.go index 9d3dc649ae..79f103a0a3 100644 --- a/pkg/query-service/app/logs/v3/query_builder_test.go +++ b/pkg/query-service/app/logs/v3/query_builder_test.go @@ -876,6 +876,74 @@ var testBuildLogsQueryData = []struct { TableName: "logs", ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as name, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND indexOf(attributes_string_key, 'name') > 0 group by name order by name DESC", }, + { + Name: "TABLE: Test count with JSON Filter, groupBy, orderBy", + PanelType: v3.PanelTypeTable, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorCount, + Expression: "A", + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "body.message", + DataType: "string", + IsJSON: true, + }, + Operator: "contains", + Value: "a", + }, + }, + }, + GroupBy: []v3.AttributeKey{ + {Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + }, + OrderBy: []v3.OrderBy{ + {ColumnName: "name", Order: "DESC"}, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as name, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND JSON_VALUE(body, '$.message') ILIKE '%a%' AND indexOf(attributes_string_key, 'name') > 0 group by name order by name DESC", + }, + { + Name: "TABLE: Test count with JSON Filter Array, groupBy, orderBy", + PanelType: v3.PanelTypeTable, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorCount, + Expression: "A", + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "body.requestor_list[*]", + DataType: "array(string)", + IsJSON: true, + }, + Operator: "has", + Value: "index_service", + }, + }, + }, + GroupBy: []v3.AttributeKey{ + {Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + }, + OrderBy: []v3.OrderBy{ + {ColumnName: "name", Order: "DESC"}, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as name, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND has(JSONExtract(JSON_QUERY(body, '$.requestor_list[*]'), 'Array(String)'), 'index_service') AND indexOf(attributes_string_key, 'name') > 0 group by name order by name DESC", + }, } func TestBuildLogsQuery(t *testing.T) { diff --git a/pkg/query-service/model/v3/v3.go b/pkg/query-service/model/v3/v3.go index d46c644eb4..a8c03066b8 100644 --- a/pkg/query-service/model/v3/v3.go +++ b/pkg/query-service/model/v3/v3.go @@ -234,11 +234,14 @@ type FilterAttributeKeyRequest struct { type AttributeKeyDataType string const ( - AttributeKeyDataTypeUnspecified AttributeKeyDataType = "" - AttributeKeyDataTypeString AttributeKeyDataType = "string" - AttributeKeyDataTypeInt64 AttributeKeyDataType = "int64" - AttributeKeyDataTypeFloat64 AttributeKeyDataType = "float64" - AttributeKeyDataTypeBool AttributeKeyDataType = "bool" + AttributeKeyDataTypeUnspecified AttributeKeyDataType = "" + AttributeKeyDataTypeString AttributeKeyDataType = "string" + AttributeKeyDataTypeInt64 AttributeKeyDataType = "int64" + AttributeKeyDataTypeFloat64 AttributeKeyDataType = "float64" + AttributeKeyDataTypeBool AttributeKeyDataType = "bool" + AttributeKeyDataTypeArrayString AttributeKeyDataType = "array(string)" + AttributeKeyDataTypeArrayInt64 AttributeKeyDataType = "array(int64)" + AttributeKeyDataTypeArrayFloat64 AttributeKeyDataType = "array(float64)" ) func (q AttributeKeyDataType) Validate() error { @@ -285,6 +288,7 @@ type AttributeKey struct { DataType AttributeKeyDataType `json:"dataType"` Type AttributeKeyType `json:"type"` IsColumn bool `json:"isColumn"` + IsJSON bool `json:"isJSON"` } func (a AttributeKey) CacheKey() string { @@ -293,7 +297,7 @@ func (a AttributeKey) CacheKey() string { func (a AttributeKey) Validate() error { switch a.DataType { - case AttributeKeyDataTypeBool, AttributeKeyDataTypeInt64, AttributeKeyDataTypeFloat64, AttributeKeyDataTypeString, AttributeKeyDataTypeUnspecified: + case AttributeKeyDataTypeBool, AttributeKeyDataTypeInt64, AttributeKeyDataTypeFloat64, AttributeKeyDataTypeString, AttributeKeyDataTypeArrayFloat64, AttributeKeyDataTypeArrayString, AttributeKeyDataTypeArrayInt64, AttributeKeyDataTypeUnspecified: break default: return fmt.Errorf("invalid attribute dataType: %s", a.DataType) @@ -545,6 +549,9 @@ const ( FilterOperatorExists FilterOperator = "exists" FilterOperatorNotExists FilterOperator = "nexists" + + FilterOperatorHas FilterOperator = "has" + FilterOperatorNotHas FilterOperator = "nhas" ) type FilterItem struct {