diff --git a/pkg/query-service/app/logs/v3/json_filter.go b/pkg/query-service/app/logs/v3/json_filter.go index a9acdeaab3..887baaab4c 100644 --- a/pkg/query-service/app/logs/v3/json_filter.go +++ b/pkg/query-service/app/logs/v3/json_filter.go @@ -17,6 +17,7 @@ const ( ARRAY_INT64 = "Array(Int64)" ARRAY_FLOAT64 = "Array(Float64)" ARRAY_BOOL = "Array(Bool)" + NGRAM_SIZE = 4 ) var dataTypeMapping = map[string]string{ @@ -72,6 +73,7 @@ func getPath(keyArr []string) string { func getJSONFilterKey(key v3.AttributeKey, op v3.FilterOperator, isArray bool) (string, error) { keyArr := strings.Split(key.Key, ".") + // i.e it should be at least body.name, and not something like body if len(keyArr) < 2 { return "", fmt.Errorf("incorrect key, should contain at least 2 parts") } @@ -106,6 +108,29 @@ func getJSONFilterKey(key v3.AttributeKey, op v3.FilterOperator, isArray bool) ( return keyname, nil } +// takes the path and the values and generates where clauses for better usage of index +func getPathIndexFilter(path string) string { + filters := []string{} + keyArr := strings.Split(path, ".") + if len(keyArr) < 2 { + return "" + } + + for i, key := range keyArr { + if i == 0 { + continue + } + key = strings.TrimSuffix(key, "[*]") + if len(key) >= NGRAM_SIZE { + filters = append(filters, strings.ToLower(key)) + } + } + if len(filters) > 0 { + return fmt.Sprintf("lower(body) like lower('%%%s%%')", strings.Join(filters, "%")) + } + return "" +} + func GetJSONFilter(item v3.FilterItem) (string, error) { dataType := item.Key.DataType @@ -154,11 +179,28 @@ func GetJSONFilter(item v3.FilterItem) (string, error) { return "", fmt.Errorf("unsupported operator: %s", op) } + filters := []string{} + + pathFilter := getPathIndexFilter(item.Key.Key) + if pathFilter != "" { + filters = append(filters, pathFilter) + } + if op == v3.FilterOperatorContains || + op == v3.FilterOperatorEqual || + op == v3.FilterOperatorHas { + val, ok := item.Value.(string) + if ok && len(val) >= NGRAM_SIZE { + filters = append(filters, fmt.Sprintf("lower(body) like lower('%%%s%%')", utils.QuoteEscapedString(strings.ToLower(val)))) + } + } + // add exists check for non array items as default values of int/float/bool will corrupt the results if !isArray && !(item.Operator == v3.FilterOperatorExists || item.Operator == v3.FilterOperatorNotExists) { existsFilter := fmt.Sprintf("JSON_EXISTS(body, '$.%s')", getPath(strings.Split(item.Key.Key, ".")[1:])) filter = fmt.Sprintf("%s AND %s", existsFilter, filter) } - return filter, nil + filters = append(filters, filter) + + return strings.Join(filters, " AND "), nil } diff --git a/pkg/query-service/app/logs/v3/json_filter_test.go b/pkg/query-service/app/logs/v3/json_filter_test.go index ac9d8edbf4..0a71cd67b2 100644 --- a/pkg/query-service/app/logs/v3/json_filter_test.go +++ b/pkg/query-service/app/logs/v3/json_filter_test.go @@ -168,7 +168,7 @@ var testGetJSONFilterData = []struct { Operator: "has", Value: "index_service", }, - Filter: "has(JSONExtract(JSON_QUERY(body, '$.\"requestor_list\"[*]'), 'Array(String)'), 'index_service')", + Filter: "lower(body) like lower('%requestor_list%') AND lower(body) like lower('%index_service%') AND has(JSONExtract(JSON_QUERY(body, '$.\"requestor_list\"[*]'), 'Array(String)'), 'index_service')", }, { Name: "Array membership int64", @@ -181,7 +181,7 @@ var testGetJSONFilterData = []struct { Operator: "has", Value: 2, }, - Filter: "has(JSONExtract(JSON_QUERY(body, '$.\"int_numbers\"[*]'), '" + ARRAY_INT64 + "'), 2)", + Filter: "lower(body) like lower('%int_numbers%') AND has(JSONExtract(JSON_QUERY(body, '$.\"int_numbers\"[*]'), '" + ARRAY_INT64 + "'), 2)", }, { Name: "Array membership float64", @@ -194,7 +194,7 @@ var testGetJSONFilterData = []struct { Operator: "nhas", Value: 2.2, }, - Filter: "NOT has(JSONExtract(JSON_QUERY(body, '$.\"nested_num\"[*].\"float_nums\"[*]'), '" + ARRAY_FLOAT64 + "'), 2.200000)", + Filter: "lower(body) like lower('%nested_num%float_nums%') AND NOT has(JSONExtract(JSON_QUERY(body, '$.\"nested_num\"[*].\"float_nums\"[*]'), '" + ARRAY_FLOAT64 + "'), 2.200000)", }, { Name: "Array membership bool", @@ -207,7 +207,7 @@ var testGetJSONFilterData = []struct { Operator: "has", Value: true, }, - Filter: "has(JSONExtract(JSON_QUERY(body, '$.\"bool\"[*]'), '" + ARRAY_BOOL + "'), true)", + Filter: "lower(body) like lower('%bool%') AND has(JSONExtract(JSON_QUERY(body, '$.\"bool\"[*]'), '" + ARRAY_BOOL + "'), true)", }, { Name: "eq operator", @@ -220,7 +220,7 @@ var testGetJSONFilterData = []struct { Operator: "=", Value: "hello", }, - Filter: "JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') = 'hello'", + Filter: "lower(body) like lower('%message%') AND lower(body) like lower('%hello%') AND JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') = 'hello'", }, { Name: "eq operator number", @@ -233,7 +233,7 @@ var testGetJSONFilterData = []struct { Operator: "=", Value: 1, }, - Filter: "JSON_EXISTS(body, '$.\"status\"') AND JSONExtract(JSON_VALUE(body, '$.\"status\"'), '" + INT64 + "') = 1", + Filter: "lower(body) like lower('%status%') AND JSON_EXISTS(body, '$.\"status\"') AND JSONExtract(JSON_VALUE(body, '$.\"status\"'), '" + INT64 + "') = 1", }, { Name: "neq operator number", @@ -246,7 +246,7 @@ var testGetJSONFilterData = []struct { Operator: "=", Value: 1.1, }, - Filter: "JSON_EXISTS(body, '$.\"status\"') AND JSONExtract(JSON_VALUE(body, '$.\"status\"'), '" + FLOAT64 + "') = 1.100000", + Filter: "lower(body) like lower('%status%') AND JSON_EXISTS(body, '$.\"status\"') AND JSONExtract(JSON_VALUE(body, '$.\"status\"'), '" + FLOAT64 + "') = 1.100000", }, { Name: "eq operator bool", @@ -259,7 +259,7 @@ var testGetJSONFilterData = []struct { Operator: "=", Value: true, }, - Filter: "JSON_EXISTS(body, '$.\"boolkey\"') AND JSONExtract(JSON_VALUE(body, '$.\"boolkey\"'), '" + BOOL + "') = true", + Filter: "lower(body) like lower('%boolkey%') AND JSON_EXISTS(body, '$.\"boolkey\"') AND JSONExtract(JSON_VALUE(body, '$.\"boolkey\"'), '" + BOOL + "') = true", }, { Name: "greater than operator", @@ -272,7 +272,7 @@ var testGetJSONFilterData = []struct { Operator: ">", Value: 1, }, - Filter: "JSON_EXISTS(body, '$.\"status\"') AND JSONExtract(JSON_VALUE(body, '$.\"status\"'), '" + INT64 + "') > 1", + Filter: "lower(body) like lower('%status%') AND JSON_EXISTS(body, '$.\"status\"') AND JSONExtract(JSON_VALUE(body, '$.\"status\"'), '" + INT64 + "') > 1", }, { Name: "regex operator", @@ -285,7 +285,7 @@ var testGetJSONFilterData = []struct { Operator: "regex", Value: "a*", }, - Filter: "JSON_EXISTS(body, '$.\"message\"') AND match(JSON_VALUE(body, '$.\"message\"'), 'a*')", + Filter: "lower(body) like lower('%message%') AND JSON_EXISTS(body, '$.\"message\"') AND match(JSON_VALUE(body, '$.\"message\"'), 'a*')", }, { Name: "contains operator", @@ -298,7 +298,7 @@ var testGetJSONFilterData = []struct { Operator: "contains", Value: "a", }, - Filter: "JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') ILIKE '%a%'", + Filter: "lower(body) like lower('%message%') AND JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') ILIKE '%a%'", }, { Name: "contains operator with quotes", @@ -311,7 +311,7 @@ var testGetJSONFilterData = []struct { Operator: "contains", Value: "hello 'world'", }, - Filter: "JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') ILIKE '%hello \\'world\\'%'", + Filter: "lower(body) like lower('%message%') AND lower(body) like lower('%hello \\'world\\'%') AND JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') ILIKE '%hello \\'world\\'%'", }, { Name: "exists", @@ -324,7 +324,7 @@ var testGetJSONFilterData = []struct { Operator: "exists", Value: "", }, - Filter: "JSON_EXISTS(body, '$.\"message\"')", + Filter: "lower(body) like lower('%message%') AND JSON_EXISTS(body, '$.\"message\"')", }, } diff --git a/pkg/query-service/app/logs/v3/query_builder.go b/pkg/query-service/app/logs/v3/query_builder.go index 8319f96384..2aa56002ff 100644 --- a/pkg/query-service/app/logs/v3/query_builder.go +++ b/pkg/query-service/app/logs/v3/query_builder.go @@ -51,6 +51,8 @@ var logOperators = map[v3.FilterOperator]string{ v3.FilterOperatorNotExists: "not has(%s_%s_key, '%s')", } +const BODY = "body" + func getClickhouseLogsColumnType(columnType v3.AttributeKeyType) string { if columnType == v3.AttributeKeyTypeTag { return "attributes" @@ -193,10 +195,24 @@ func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, groupBy []v3.AttributeKey, case v3.FilterOperatorContains, v3.FilterOperatorNotContains: columnName := getClickhouseColumnName(item.Key) val := utils.QuoteEscapedString(fmt.Sprintf("%v", item.Value)) - conditions = append(conditions, fmt.Sprintf("%s %s '%%%s%%'", columnName, logsOp, val)) + if columnName == BODY { + logsOp = strings.Replace(logsOp, "ILIKE", "LIKE", 1) // removing i from ilike and not ilike + conditions = append(conditions, fmt.Sprintf("lower(%s) %s lower('%%%s%%')", columnName, logsOp, val)) + } else { + conditions = append(conditions, fmt.Sprintf("%s %s '%%%s%%'", columnName, logsOp, val)) + } default: columnName := getClickhouseColumnName(item.Key) fmtVal := utils.ClickHouseFormattedValue(value) + + // for use lower for like and ilike + if op == v3.FilterOperatorLike || op == v3.FilterOperatorNotLike { + if columnName == BODY { + logsOp = strings.Replace(logsOp, "ILIKE", "LIKE", 1) // removing i from ilike and not ilike + columnName = fmt.Sprintf("lower(%s)", columnName) + fmtVal = fmt.Sprintf("lower(%s)", fmtVal) + } + } conditions = append(conditions, fmt.Sprintf("%s %s %s", columnName, logsOp, fmtVal)) } } else { diff --git a/pkg/query-service/app/logs/v3/query_builder_test.go b/pkg/query-service/app/logs/v3/query_builder_test.go index dc41ce8c66..db57cb2549 100644 --- a/pkg/query-service/app/logs/v3/query_builder_test.go +++ b/pkg/query-service/app/logs/v3/query_builder_test.go @@ -130,6 +130,14 @@ var timeSeriesFilterQueryData = []struct { }}, ExpectedFilter: "attributes_string_value[indexOf(attributes_string_key, 'user_name')] = 'john' AND resources_string_value[indexOf(resources_string_key, 'k8s_namespace')] != 'my_service'", }, + { + Name: "Test attribute and resource attribute with different case", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "user_name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "%JoHn%", Operator: "like"}, + {Key: v3.AttributeKey{Key: "k8s_namespace", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeResource}, Value: "%MyService%", Operator: "nlike"}, + }}, + ExpectedFilter: "attributes_string_value[indexOf(attributes_string_key, 'user_name')] ILIKE '%JoHn%' AND resources_string_value[indexOf(resources_string_key, 'k8s_namespace')] NOT ILIKE '%MyService%'", + }, { Name: "Test materialized column", FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ @@ -287,6 +295,22 @@ var timeSeriesFilterQueryData = []struct { }}, ExpectedFilter: "`attribute_int64_status_exists`=false", }, + { + Name: "Test for body contains and ncontains", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, IsColumn: true}, Operator: "contains", Value: "test"}, + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, IsColumn: true}, Operator: "ncontains", Value: "test1"}, + }}, + ExpectedFilter: "lower(body) LIKE lower('%test%') AND lower(body) NOT LIKE lower('%test1%')", + }, + { + Name: "Test for body like and nlike", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, IsColumn: true}, Operator: "like", Value: "test"}, + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, IsColumn: true}, Operator: "nlike", Value: "test1"}, + }}, + ExpectedFilter: "lower(body) LIKE lower('test') AND lower(body) NOT LIKE lower('test1')", + }, } func TestBuildLogsTimeSeriesFilterQuery(t *testing.T) { @@ -851,7 +875,7 @@ var testBuildLogsQueryData = []struct { }, }, TableName: "logs", - ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND body ILIKE '%test%' AND has(attributes_string_key, 'name') group by ts having value > 10 order by value DESC", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND lower(body) LIKE lower('%test%') AND has(attributes_string_key, 'name') group by ts having value > 10 order by value DESC", }, { Name: "Test attribute with same name as top level key", @@ -981,7 +1005,7 @@ var testBuildLogsQueryData = []struct { }, }, TableName: "logs", - ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') ILIKE '%a%' AND has(attributes_string_key, 'name') group by `name` order by `name` DESC", + ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND lower(body) like lower('%message%') AND JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') ILIKE '%a%' AND has(attributes_string_key, 'name') group by `name` order by `name` DESC", }, { Name: "TABLE: Test count with JSON Filter Array, groupBy, orderBy", @@ -1015,7 +1039,7 @@ var testBuildLogsQueryData = []struct { }, }, TableName: "logs", - ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND has(JSONExtract(JSON_QUERY(body, '$.\"requestor_list\"[*]'), 'Array(String)'), 'index_service') AND has(attributes_string_key, 'name') group by `name` order by `name` DESC", + ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND lower(body) like lower('%requestor_list%') AND lower(body) like lower('%index_service%') AND has(JSONExtract(JSON_QUERY(body, '$.\"requestor_list\"[*]'), 'Array(String)'), 'index_service') AND has(attributes_string_key, 'name') group by `name` order by `name` DESC", }, } diff --git a/pkg/query-service/model/v3/v3.go b/pkg/query-service/model/v3/v3.go index 7facd2ff50..e6ac8441d6 100644 --- a/pkg/query-service/model/v3/v3.go +++ b/pkg/query-service/model/v3/v3.go @@ -907,7 +907,8 @@ const ( FilterOperatorNotContains FilterOperator = "ncontains" FilterOperatorRegex FilterOperator = "regex" FilterOperatorNotRegex FilterOperator = "nregex" - // (I)LIKE is faster than REGEX and supports index + // (I)LIKE is faster than REGEX + // ilike doesn't support index so internally we use lower(body) like for query FilterOperatorLike FilterOperator = "like" FilterOperatorNotLike FilterOperator = "nlike"