feat: support for filtering json inside body (#3534)

* feat: support for json query on body

* feat: json filter parser updated for scalar values
This commit is contained in:
Nityananda Gohain 2023-09-13 13:46:06 +05:30 committed by GitHub
parent 27cda7a437
commit 17ae197bc3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 496 additions and 7 deletions

View File

@ -27,6 +27,9 @@ func EnrichmentRequired(params *v3.QueryRangeParamsV3) bool {
// check filter attribute
if query.Filters != nil && len(query.Filters.Items) != 0 {
for _, item := range query.Filters.Items {
if item.Key.IsJSON {
continue
}
if !isEnriched(item.Key) {
return true
}
@ -97,6 +100,9 @@ func enrichLogsQuery(query *v3.BuilderQuery, fields map[string]v3.AttributeKey)
// enrich filter attribute
if query.Filters != nil && len(query.Filters.Items) != 0 {
for i := 0; i < len(query.Filters.Items); i++ {
if query.Filters.Items[i].Key.IsJSON {
continue
}
query.Filters.Items[i].Key = enrichFieldWithMetadata(query.Filters.Items[i].Key, fields)
}
}

View File

@ -86,6 +86,24 @@ var testEnrichmentRequiredData = []struct {
},
EnrichmentRequired: true,
},
{
Name: "filter enrichment not required required json",
Params: v3.QueryRangeParamsV3{
CompositeQuery: &v3.CompositeQuery{
BuilderQueries: map[string]*v3.BuilderQuery{
"test": {
QueryName: "test",
Expression: "test",
DataSource: v3.DataSourceLogs,
Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{
{Key: v3.AttributeKey{Key: "user_name", IsJSON: true}, Value: "john", Operator: "="},
}},
},
},
},
},
EnrichmentRequired: false,
},
{
Name: "groupBy enrichment not required",
Params: v3.QueryRangeParamsV3{

View File

@ -0,0 +1,122 @@
package v3
import (
"fmt"
"strings"
v3 "go.signoz.io/signoz/pkg/query-service/model/v3"
"go.signoz.io/signoz/pkg/query-service/utils"
)
const (
STRING = "String"
INT64 = "Int64"
FLOAT64 = "Float64"
ARRAY_STRING = "Array(String)"
ARRAY_INT64 = "Array(Int64)"
ARRAY_FLOAT64 = "Array(Float64)"
)
var dataTypeMapping = map[string]string{
"string": STRING,
"int64": INT64,
"float64": FLOAT64,
"array(string)": ARRAY_STRING,
"array(int64)": ARRAY_INT64,
"array(float64)": ARRAY_FLOAT64,
}
var arrayValueTypeMapping = map[string]string{
"array(string)": "string",
"array(int64)": "int64",
"array(float64)": "float64",
}
var jsonLogOperators = map[v3.FilterOperator]string{
v3.FilterOperatorEqual: "=",
v3.FilterOperatorNotEqual: "!=",
v3.FilterOperatorLessThan: "<",
v3.FilterOperatorLessThanOrEq: "<=",
v3.FilterOperatorGreaterThan: ">",
v3.FilterOperatorGreaterThanOrEq: ">=",
v3.FilterOperatorLike: "ILIKE",
v3.FilterOperatorNotLike: "NOT ILIKE",
v3.FilterOperatorContains: "ILIKE",
v3.FilterOperatorNotContains: "NOT ILIKE",
v3.FilterOperatorRegex: "match(%s, %s)",
v3.FilterOperatorNotRegex: "NOT match(%s, %s)",
v3.FilterOperatorIn: "IN",
v3.FilterOperatorNotIn: "NOT IN",
v3.FilterOperatorHas: "has(%s, %s)",
v3.FilterOperatorNotHas: "NOT has(%s, %s)",
}
func getJSONFilterKey(key v3.AttributeKey, isArray bool) (string, error) {
keyArr := strings.Split(key.Key, ".")
if len(keyArr) < 2 {
return "", fmt.Errorf("incorrect key, should contain at least 2 parts")
}
// only body is supported as of now
if strings.Compare(keyArr[0], "body") != 0 {
return "", fmt.Errorf("only body can be the root key")
}
var dataType string
var ok bool
if dataType, ok = dataTypeMapping[string(key.DataType)]; !ok {
return "", fmt.Errorf("unsupported dataType for JSON: %s", key.DataType)
}
if isArray {
return fmt.Sprintf("JSONExtract(JSON_QUERY(%s, '$.%s'), '%s')", keyArr[0], strings.Join(keyArr[1:], "."), dataType), nil
}
// for non array
keyname := fmt.Sprintf("JSON_VALUE(%s, '$.%s')", keyArr[0], strings.Join(keyArr[1:], "."))
if dataType != "String" {
keyname = fmt.Sprintf("JSONExtract(%s, '%s')", keyname, dataType)
}
return keyname, nil
}
func GetJSONFilter(item v3.FilterItem) (string, error) {
dataType := item.Key.DataType
isArray := false
// check if its an array and handle it
if val, ok := arrayValueTypeMapping[string(item.Key.DataType)]; ok {
if item.Operator != v3.FilterOperatorHas && item.Operator != v3.FilterOperatorNotHas {
return "", fmt.Errorf("only has operator is supported for array")
}
isArray = true
dataType = v3.AttributeKeyDataType(val)
}
key, err := getJSONFilterKey(item.Key, isArray)
if err != nil {
return "", err
}
// non array
value, err := utils.ValidateAndCastValue(item.Value, dataType)
if err != nil {
return "", fmt.Errorf("failed to validate and cast value for %s: %v", item.Key.Key, err)
}
op := v3.FilterOperator(strings.ToLower(strings.TrimSpace(string(item.Operator))))
if logsOp, ok := jsonLogOperators[op]; ok {
switch op {
case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex, v3.FilterOperatorHas, v3.FilterOperatorNotHas:
fmtVal := utils.ClickHouseFormattedValue(value)
return fmt.Sprintf(logsOp, key, fmtVal), nil
case v3.FilterOperatorContains, v3.FilterOperatorNotContains:
return fmt.Sprintf("%s %s '%%%s%%'", key, logsOp, item.Value), nil
default:
fmtVal := utils.ClickHouseFormattedValue(value)
return fmt.Sprintf("%s %s %s", key, logsOp, fmtVal), nil
}
}
return "", fmt.Errorf("unsupported operator: %s", op)
}

View File

@ -0,0 +1,260 @@
package v3
import (
"testing"
. "github.com/smartystreets/goconvey/convey"
v3 "go.signoz.io/signoz/pkg/query-service/model/v3"
)
var testGetJSONFilterKeyData = []struct {
Name string
Key v3.AttributeKey
IsArray bool
ClickhouseKey string
Error bool
}{
{
Name: "Incorrect Key",
Key: v3.AttributeKey{
Key: "requestor_list[*]",
DataType: "array(string)",
IsJSON: true,
},
IsArray: true,
Error: true,
},
{
Name: "Using anything other than body",
Key: v3.AttributeKey{
Key: "trace_id.requestor_list[*]",
DataType: "array(string)",
IsJSON: true,
},
IsArray: true,
Error: true,
},
{
Name: "Array String",
Key: v3.AttributeKey{
Key: "body.requestor_list[*]",
DataType: "array(string)",
IsJSON: true,
},
IsArray: true,
ClickhouseKey: "JSONExtract(JSON_QUERY(body, '$.requestor_list[*]'), '" + ARRAY_STRING + "')",
},
{
Name: "Array String Nested",
Key: v3.AttributeKey{
Key: "body.nested[*].key[*]",
DataType: "array(string)",
IsJSON: true,
},
IsArray: true,
ClickhouseKey: "JSONExtract(JSON_QUERY(body, '$.nested[*].key[*]'), '" + ARRAY_STRING + "')",
},
{
Name: "Array Int",
Key: v3.AttributeKey{
Key: "body.int_numbers[*]",
DataType: "array(int64)",
IsJSON: true,
},
IsArray: true,
ClickhouseKey: "JSONExtract(JSON_QUERY(body, '$.int_numbers[*]'), '" + ARRAY_INT64 + "')",
},
{
Name: "Array Float",
Key: v3.AttributeKey{
Key: "body.nested_num[*].float_nums[*]",
DataType: "array(float64)",
IsJSON: true,
},
IsArray: true,
ClickhouseKey: "JSONExtract(JSON_QUERY(body, '$.nested_num[*].float_nums[*]'), '" + ARRAY_FLOAT64 + "')",
},
{
Name: "String",
Key: v3.AttributeKey{
Key: "body.message",
DataType: "string",
IsJSON: true,
},
IsArray: false,
ClickhouseKey: "JSON_VALUE(body, '$.message')",
},
{
Name: "Int",
Key: v3.AttributeKey{
Key: "body.status",
DataType: "int64",
IsJSON: true,
},
IsArray: false,
ClickhouseKey: "JSONExtract(JSON_VALUE(body, '$.status'), '" + INT64 + "')",
},
{
Name: "Float",
Key: v3.AttributeKey{
Key: "body.fraction",
DataType: "float64",
IsJSON: true,
},
IsArray: false,
ClickhouseKey: "JSONExtract(JSON_VALUE(body, '$.fraction'), '" + FLOAT64 + "')",
},
}
func TestGetJSONFilterKey(t *testing.T) {
for _, tt := range testGetJSONFilterKeyData {
Convey("testgetKey", t, func() {
columnName, err := getJSONFilterKey(tt.Key, tt.IsArray)
if tt.Error {
So(err, ShouldNotBeNil)
} else {
So(err, ShouldBeNil)
So(columnName, ShouldEqual, tt.ClickhouseKey)
}
})
}
}
var testGetJSONFilterData = []struct {
Name string
FilterItem v3.FilterItem
Filter string
Error bool
}{
{
Name: "Array membership string",
FilterItem: v3.FilterItem{
Key: v3.AttributeKey{
Key: "body.requestor_list[*]",
DataType: "array(string)",
IsJSON: true,
},
Operator: "has",
Value: "index_service",
},
Filter: "has(JSONExtract(JSON_QUERY(body, '$.requestor_list[*]'), 'Array(String)'), 'index_service')",
},
{
Name: "Array membership int64",
FilterItem: v3.FilterItem{
Key: v3.AttributeKey{
Key: "body.int_numbers[*]",
DataType: "array(int64)",
IsJSON: true,
},
Operator: "has",
Value: 2,
},
Filter: "has(JSONExtract(JSON_QUERY(body, '$.int_numbers[*]'), '" + ARRAY_INT64 + "'), 2)",
},
{
Name: "Array membership float64",
FilterItem: v3.FilterItem{
Key: v3.AttributeKey{
Key: "body.nested_num[*].float_nums[*]",
DataType: "array(float64)",
IsJSON: true,
},
Operator: "nhas",
Value: 2.2,
},
Filter: "NOT has(JSONExtract(JSON_QUERY(body, '$.nested_num[*].float_nums[*]'), '" + ARRAY_FLOAT64 + "'), 2.200000)",
},
{
Name: "eq operator",
FilterItem: v3.FilterItem{
Key: v3.AttributeKey{
Key: "body.message",
DataType: "string",
IsJSON: true,
},
Operator: "=",
Value: "hello",
},
Filter: "JSON_VALUE(body, '$.message') = 'hello'",
},
{
Name: "eq operator number",
FilterItem: v3.FilterItem{
Key: v3.AttributeKey{
Key: "body.status",
DataType: "int64",
IsJSON: true,
},
Operator: "=",
Value: 1,
},
Filter: "JSONExtract(JSON_VALUE(body, '$.status'), '" + INT64 + "') = 1",
},
{
Name: "neq operator number",
FilterItem: v3.FilterItem{
Key: v3.AttributeKey{
Key: "body.status",
DataType: "float64",
IsJSON: true,
},
Operator: "=",
Value: 1.1,
},
Filter: "JSONExtract(JSON_VALUE(body, '$.status'), '" + FLOAT64 + "') = 1.100000",
},
{
Name: "greater than operator",
FilterItem: v3.FilterItem{
Key: v3.AttributeKey{
Key: "body.status",
DataType: "int64",
IsJSON: true,
},
Operator: ">",
Value: 1,
},
Filter: "JSONExtract(JSON_VALUE(body, '$.status'), '" + INT64 + "') > 1",
},
{
Name: "regex operator",
FilterItem: v3.FilterItem{
Key: v3.AttributeKey{
Key: "body.message",
DataType: "string",
IsJSON: true,
},
Operator: "regex",
Value: "a*",
},
Filter: "match(JSON_VALUE(body, '$.message'), 'a*')",
},
{
Name: "contains operator",
FilterItem: v3.FilterItem{
Key: v3.AttributeKey{
Key: "body.message",
DataType: "string",
IsJSON: true,
},
Operator: "contains",
Value: "a",
},
Filter: "JSON_VALUE(body, '$.message') ILIKE '%a%'",
},
}
func TestGetJSONFilter(t *testing.T) {
for _, tt := range testGetJSONFilterData {
Convey("testGetJSONFilter", t, func() {
filter, err := GetJSONFilter(tt.FilterItem)
if tt.Error {
So(err, ShouldNotBeNil)
} else {
So(err, ShouldBeNil)
So(filter, ShouldEqual, tt.Filter)
}
})
}
}

View File

@ -49,7 +49,6 @@ var logOperators = map[v3.FilterOperator]string{
v3.FilterOperatorNotIn: "NOT IN",
v3.FilterOperatorExists: "has(%s_%s_key, '%s')",
v3.FilterOperatorNotExists: "not has(%s_%s_key, '%s')",
// (todo) check contains/not contains/
}
func getClickhouseLogsColumnType(columnType v3.AttributeKeyType) string {
@ -161,6 +160,15 @@ func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, groupBy []v3.AttributeKey,
if fs != nil && len(fs.Items) != 0 {
for _, item := range fs.Items {
if item.Key.IsJSON {
filter, err := GetJSONFilter(item)
if err != nil {
return "", err
}
conditions = append(conditions, filter)
continue
}
op := v3.FilterOperator(strings.ToLower(strings.TrimSpace(string(item.Operator))))
var value interface{}

View File

@ -876,6 +876,74 @@ var testBuildLogsQueryData = []struct {
TableName: "logs",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as name, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND indexOf(attributes_string_key, 'name') > 0 group by name order by name DESC",
},
{
Name: "TABLE: Test count with JSON Filter, groupBy, orderBy",
PanelType: v3.PanelTypeTable,
Start: 1680066360726210000,
End: 1680066458000000000,
BuilderQuery: &v3.BuilderQuery{
QueryName: "A",
StepInterval: 60,
AggregateOperator: v3.AggregateOperatorCount,
Expression: "A",
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "body.message",
DataType: "string",
IsJSON: true,
},
Operator: "contains",
Value: "a",
},
},
},
GroupBy: []v3.AttributeKey{
{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag},
},
OrderBy: []v3.OrderBy{
{ColumnName: "name", Order: "DESC"},
},
},
TableName: "logs",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as name, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND JSON_VALUE(body, '$.message') ILIKE '%a%' AND indexOf(attributes_string_key, 'name') > 0 group by name order by name DESC",
},
{
Name: "TABLE: Test count with JSON Filter Array, groupBy, orderBy",
PanelType: v3.PanelTypeTable,
Start: 1680066360726210000,
End: 1680066458000000000,
BuilderQuery: &v3.BuilderQuery{
QueryName: "A",
StepInterval: 60,
AggregateOperator: v3.AggregateOperatorCount,
Expression: "A",
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "body.requestor_list[*]",
DataType: "array(string)",
IsJSON: true,
},
Operator: "has",
Value: "index_service",
},
},
},
GroupBy: []v3.AttributeKey{
{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag},
},
OrderBy: []v3.OrderBy{
{ColumnName: "name", Order: "DESC"},
},
},
TableName: "logs",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as name, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND has(JSONExtract(JSON_QUERY(body, '$.requestor_list[*]'), 'Array(String)'), 'index_service') AND indexOf(attributes_string_key, 'name') > 0 group by name order by name DESC",
},
}
func TestBuildLogsQuery(t *testing.T) {

View File

@ -239,6 +239,9 @@ const (
AttributeKeyDataTypeInt64 AttributeKeyDataType = "int64"
AttributeKeyDataTypeFloat64 AttributeKeyDataType = "float64"
AttributeKeyDataTypeBool AttributeKeyDataType = "bool"
AttributeKeyDataTypeArrayString AttributeKeyDataType = "array(string)"
AttributeKeyDataTypeArrayInt64 AttributeKeyDataType = "array(int64)"
AttributeKeyDataTypeArrayFloat64 AttributeKeyDataType = "array(float64)"
)
func (q AttributeKeyDataType) Validate() error {
@ -285,6 +288,7 @@ type AttributeKey struct {
DataType AttributeKeyDataType `json:"dataType"`
Type AttributeKeyType `json:"type"`
IsColumn bool `json:"isColumn"`
IsJSON bool `json:"isJSON"`
}
func (a AttributeKey) CacheKey() string {
@ -293,7 +297,7 @@ func (a AttributeKey) CacheKey() string {
func (a AttributeKey) Validate() error {
switch a.DataType {
case AttributeKeyDataTypeBool, AttributeKeyDataTypeInt64, AttributeKeyDataTypeFloat64, AttributeKeyDataTypeString, AttributeKeyDataTypeUnspecified:
case AttributeKeyDataTypeBool, AttributeKeyDataTypeInt64, AttributeKeyDataTypeFloat64, AttributeKeyDataTypeString, AttributeKeyDataTypeArrayFloat64, AttributeKeyDataTypeArrayString, AttributeKeyDataTypeArrayInt64, AttributeKeyDataTypeUnspecified:
break
default:
return fmt.Errorf("invalid attribute dataType: %s", a.DataType)
@ -545,6 +549,9 @@ const (
FilterOperatorExists FilterOperator = "exists"
FilterOperatorNotExists FilterOperator = "nexists"
FilterOperatorHas FilterOperator = "has"
FilterOperatorNotHas FilterOperator = "nhas"
)
type FilterItem struct {