feat: support for case insensitive for contains and like queries (#6045)

* feat: support for case insensitive for contains and like queries

* fix: make index filter lowercase for like and not like
This commit is contained in:
Nityananda Gohain 2024-09-23 12:27:14 +05:30 committed by GitHub
parent f9ac41b865
commit 3866f89d3e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 78 additions and 31 deletions

View File

@ -17,10 +17,10 @@ var logOperators = map[v3.FilterOperator]string{
v3.FilterOperatorLessThanOrEq: "<=", v3.FilterOperatorLessThanOrEq: "<=",
v3.FilterOperatorGreaterThan: ">", v3.FilterOperatorGreaterThan: ">",
v3.FilterOperatorGreaterThanOrEq: ">=", v3.FilterOperatorGreaterThanOrEq: ">=",
v3.FilterOperatorLike: "LIKE", v3.FilterOperatorLike: "ILIKE",
v3.FilterOperatorNotLike: "NOT LIKE", v3.FilterOperatorNotLike: "NOT ILIKE",
v3.FilterOperatorContains: "LIKE", v3.FilterOperatorContains: "ILIKE",
v3.FilterOperatorNotContains: "NOT LIKE", v3.FilterOperatorNotContains: "NOT ILIKE",
v3.FilterOperatorRegex: "match(%s, %s)", v3.FilterOperatorRegex: "match(%s, %s)",
v3.FilterOperatorNotRegex: "NOT match(%s, %s)", v3.FilterOperatorNotRegex: "NOT match(%s, %s)",
v3.FilterOperatorIn: "IN", v3.FilterOperatorIn: "IN",
@ -150,6 +150,7 @@ func buildAttributeFilter(item v3.FilterItem) (string, error) {
val := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", item.Value)) val := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", item.Value))
// for body the contains is case insensitive // for body the contains is case insensitive
if keyName == BODY { if keyName == BODY {
logsOp = strings.Replace(logsOp, "ILIKE", "LIKE", 1) // removing i from ilike and not ilike
return fmt.Sprintf("lower(%s) %s lower('%%%s%%')", keyName, logsOp, val), nil return fmt.Sprintf("lower(%s) %s lower('%%%s%%')", keyName, logsOp, val), nil
} else { } else {
return fmt.Sprintf("%s %s '%%%s%%'", keyName, logsOp, val), nil return fmt.Sprintf("%s %s '%%%s%%'", keyName, logsOp, val), nil
@ -158,6 +159,7 @@ func buildAttributeFilter(item v3.FilterItem) (string, error) {
// for body use lower for like and ilike // for body use lower for like and ilike
val := utils.QuoteEscapedString(fmt.Sprintf("%s", item.Value)) val := utils.QuoteEscapedString(fmt.Sprintf("%s", item.Value))
if keyName == BODY { if keyName == BODY {
logsOp = strings.Replace(logsOp, "ILIKE", "LIKE", 1) // removing i from ilike and not ilike
return fmt.Sprintf("lower(%s) %s lower('%s')", keyName, logsOp, val), nil return fmt.Sprintf("lower(%s) %s lower('%s')", keyName, logsOp, val), nil
} else { } else {
return fmt.Sprintf("%s %s '%s'", keyName, logsOp, val), nil return fmt.Sprintf("%s %s '%s'", keyName, logsOp, val), nil

View File

@ -250,7 +250,7 @@ func Test_buildAttributeFilter(t *testing.T) {
Value: "test", Value: "test",
}, },
}, },
want: "resources_string['service.name'] LIKE '%test%'", want: "resources_string['service.name'] ILIKE '%test%'",
}, },
{ {
name: "build attribute filter contains- body", name: "build attribute filter contains- body",
@ -280,7 +280,7 @@ func Test_buildAttributeFilter(t *testing.T) {
Value: "test%", Value: "test%",
}, },
}, },
want: "resources_string['service.name'] LIKE 'test%'", want: "resources_string['service.name'] ILIKE 'test%'",
}, },
{ {
name: "build attribute filter like-body", name: "build attribute filter like-body",
@ -956,7 +956,7 @@ func TestPrepareLogsQuery(t *testing.T) {
}, },
want: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body, attributes_string, attributes_number, attributes_bool, resources_string from " + want: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body, attributes_string, attributes_number, attributes_bool, resources_string from " +
"signoz_logs.distributed_logs_v2 where attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') AND " + "signoz_logs.distributed_logs_v2 where attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') AND " +
"(resource_fingerprint GLOBAL IN (SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE simpleJSONExtractString(labels, 'service.name') LIKE '%app%' AND labels like '%service.name%app%' AND ", "(resource_fingerprint GLOBAL IN (SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE simpleJSONExtractString(lower(labels), 'service.name') LIKE '%app%' AND lower(labels) like '%service.name%app%' AND ",
}, },
{ {
name: "Live Tail Query W/O filter", name: "Live Tail Query W/O filter",

View File

@ -8,12 +8,37 @@ import (
"go.signoz.io/signoz/pkg/query-service/utils" "go.signoz.io/signoz/pkg/query-service/utils"
) )
var resourceLogOperators = map[v3.FilterOperator]string{
v3.FilterOperatorEqual: "=",
v3.FilterOperatorNotEqual: "!=",
v3.FilterOperatorLessThan: "<",
v3.FilterOperatorLessThanOrEq: "<=",
v3.FilterOperatorGreaterThan: ">",
v3.FilterOperatorGreaterThanOrEq: ">=",
v3.FilterOperatorLike: "LIKE",
v3.FilterOperatorNotLike: "NOT LIKE",
v3.FilterOperatorContains: "LIKE",
v3.FilterOperatorNotContains: "NOT LIKE",
v3.FilterOperatorRegex: "match(%s, %s)",
v3.FilterOperatorNotRegex: "NOT match(%s, %s)",
v3.FilterOperatorIn: "IN",
v3.FilterOperatorNotIn: "NOT IN",
v3.FilterOperatorExists: "mapContains(%s_%s, '%s')",
v3.FilterOperatorNotExists: "not mapContains(%s_%s, '%s')",
}
// buildResourceFilter builds a clickhouse filter string for resource labels // buildResourceFilter builds a clickhouse filter string for resource labels
func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value interface{}) string { func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value interface{}) string {
// for all operators except contains and like
searchKey := fmt.Sprintf("simpleJSONExtractString(labels, '%s')", key) searchKey := fmt.Sprintf("simpleJSONExtractString(labels, '%s')", key)
// for contains and like it will be case insensitive
lowerSearchKey := fmt.Sprintf("simpleJSONExtractString(lower(labels), '%s')", key)
chFmtVal := utils.ClickHouseFormattedValue(value) chFmtVal := utils.ClickHouseFormattedValue(value)
lowerValue := strings.ToLower(fmt.Sprintf("%s", value))
switch op { switch op {
case v3.FilterOperatorExists: case v3.FilterOperatorExists:
return fmt.Sprintf("simpleJSONHas(labels, '%s')", key) return fmt.Sprintf("simpleJSONHas(labels, '%s')", key)
@ -24,20 +49,20 @@ func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value
case v3.FilterOperatorContains, v3.FilterOperatorNotContains: case v3.FilterOperatorContains, v3.FilterOperatorNotContains:
// this is required as clickhouseFormattedValue add's quotes to the string // this is required as clickhouseFormattedValue add's quotes to the string
// we also want to treat %, _ as literals for contains // we also want to treat %, _ as literals for contains
escapedStringValue := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", value)) escapedStringValue := utils.QuoteEscapedStringForContains(lowerValue)
return fmt.Sprintf("%s %s '%%%s%%'", searchKey, logsOp, escapedStringValue) return fmt.Sprintf("%s %s '%%%s%%'", lowerSearchKey, logsOp, escapedStringValue)
case v3.FilterOperatorLike, v3.FilterOperatorNotLike: case v3.FilterOperatorLike, v3.FilterOperatorNotLike:
// this is required as clickhouseFormattedValue add's quotes to the string // this is required as clickhouseFormattedValue add's quotes to the string
escapedStringValue := utils.QuoteEscapedString(fmt.Sprintf("%s", value)) escapedStringValue := utils.QuoteEscapedString(lowerValue)
return fmt.Sprintf("%s %s '%s'", searchKey, logsOp, escapedStringValue) return fmt.Sprintf("%s %s '%s'", lowerSearchKey, logsOp, escapedStringValue)
default: default:
return fmt.Sprintf("%s %s %s", searchKey, logsOp, chFmtVal) return fmt.Sprintf("%s %s %s", searchKey, logsOp, chFmtVal)
} }
} }
// buildIndexFilterForInOperator builds a clickhouse filter string for in operator // buildIndexFilterForInOperator builds a clickhouse filter string for in operator
// example:= x in a,b,c = (labels like '%x%a%' or labels like '%"x":"b"%' or labels like '%"x"="c"%') // example:= x in a,b,c = (labels like '%"x"%"a"%' or labels like '%"x":"b"%' or labels like '%"x"="c"%')
// example:= x nin a,b,c = (labels nlike '%x%a%' AND labels nlike '%"x"="b"' AND labels nlike '%"x"="c"%') // example:= x nin a,b,c = (labels nlike '%"x"%"a"%' AND labels nlike '%"x"="b"' AND labels nlike '%"x"="c"%')
func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value interface{}) string { func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value interface{}) string {
conditions := []string{} conditions := []string{}
separator := " OR " separator := " OR "
@ -77,24 +102,35 @@ func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value inter
// buildResourceIndexFilter builds a clickhouse filter string for resource labels // buildResourceIndexFilter builds a clickhouse filter string for resource labels
// example:= x like '%john%' = labels like '%x%john%' // example:= x like '%john%' = labels like '%x%john%'
// we have two indexes for resource attributes one is lower and one is normal.
// for all operators other then like/contains we will use normal index
// for like/contains we will use lower index
// we can use lower index for =, in etc but it's difficult to do it for !=, NIN etc
// if as x != "ABC" we cannot predict something like "not lower(labels) like '%%x%%abc%%'". It has it be "not lower(labels) like '%%x%%ABC%%'"
func buildResourceIndexFilter(key string, op v3.FilterOperator, value interface{}) string { func buildResourceIndexFilter(key string, op v3.FilterOperator, value interface{}) string {
// not using clickhouseFormattedValue as we don't wan't the quotes // not using clickhouseFormattedValue as we don't wan't the quotes
strVal := fmt.Sprintf("%s", value) strVal := fmt.Sprintf("%s", value)
formattedValueEscapedForContains := utils.QuoteEscapedStringForContains(strVal) formattedValueEscapedForContains := strings.ToLower(utils.QuoteEscapedStringForContains(strVal))
formattedValueEscaped := utils.QuoteEscapedString(strVal) formattedValueEscaped := utils.QuoteEscapedString(strVal)
formattedValueEscapedLower := strings.ToLower(formattedValueEscaped)
// add index filters // add index filters
switch op { switch op {
case v3.FilterOperatorContains: case v3.FilterOperatorContains:
return fmt.Sprintf("labels like '%%%s%%%s%%'", key, formattedValueEscapedForContains) return fmt.Sprintf("lower(labels) like '%%%s%%%s%%'", key, formattedValueEscapedForContains)
case v3.FilterOperatorNotContains: case v3.FilterOperatorNotContains:
return fmt.Sprintf("labels not like '%%%s%%%s%%'", key, formattedValueEscapedForContains) return fmt.Sprintf("lower(labels) not like '%%%s%%%s%%'", key, formattedValueEscapedForContains)
case v3.FilterOperatorLike, v3.FilterOperatorEqual: case v3.FilterOperatorLike:
return fmt.Sprintf("lower(labels) like '%%%s%%%s%%'", key, formattedValueEscapedLower)
case v3.FilterOperatorNotLike:
return fmt.Sprintf("lower(labels) not like '%%%s%%%s%%'", key, formattedValueEscapedLower)
case v3.FilterOperatorEqual:
return fmt.Sprintf("labels like '%%%s%%%s%%'", key, formattedValueEscaped) return fmt.Sprintf("labels like '%%%s%%%s%%'", key, formattedValueEscaped)
case v3.FilterOperatorNotLike, v3.FilterOperatorNotEqual: case v3.FilterOperatorNotEqual:
return fmt.Sprintf("labels not like '%%%s%%%s%%'", key, formattedValueEscaped) return fmt.Sprintf("labels not like '%%%s%%%s%%'", key, formattedValueEscaped)
case v3.FilterOperatorNotRegex: case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex:
return fmt.Sprintf("labels not like '%%%s%%'", key) // don't try to do anything for regex.
return ""
case v3.FilterOperatorIn, v3.FilterOperatorNotIn: case v3.FilterOperatorIn, v3.FilterOperatorNotIn:
return buildIndexFilterForInOperator(key, op, value) return buildIndexFilterForInOperator(key, op, value)
default: default:
@ -137,7 +173,7 @@ func buildResourceFiltersFromFilterItems(fs *v3.FilterSet) ([]string, error) {
} }
} }
if logsOp, ok := logOperators[op]; ok { if logsOp, ok := resourceLogOperators[op]; ok {
// the filter // the filter
if resourceFilter := buildResourceFilter(logsOp, keyName, op, value); resourceFilter != "" { if resourceFilter := buildResourceFilter(logsOp, keyName, op, value); resourceFilter != "" {
conditions = append(conditions, resourceFilter) conditions = append(conditions, resourceFilter)

View File

@ -53,7 +53,7 @@ func Test_buildResourceFilter(t *testing.T) {
op: v3.FilterOperatorContains, op: v3.FilterOperatorContains,
value: "Application%_", value: "Application%_",
}, },
want: `simpleJSONExtractString(labels, 'service.name') LIKE '%Application\%\_%'`, want: `simpleJSONExtractString(lower(labels), 'service.name') LIKE '%application\%\_%'`,
}, },
{ {
name: "test eq", name: "test eq",
@ -83,7 +83,7 @@ func Test_buildResourceFilter(t *testing.T) {
op: v3.FilterOperatorLike, op: v3.FilterOperatorLike,
value: "Application%_", value: "Application%_",
}, },
want: `simpleJSONExtractString(labels, 'service.name') LIKE 'Application%_'`, want: `simpleJSONExtractString(lower(labels), 'service.name') LIKE 'application%_'`,
}, },
} }
for _, tt := range tests { for _, tt := range tests {
@ -170,7 +170,7 @@ func Test_buildResourceIndexFilter(t *testing.T) {
op: v3.FilterOperatorContains, op: v3.FilterOperatorContains,
value: "application", value: "application",
}, },
want: `labels like '%service.name%application%'`, want: `lower(labels) like '%service.name%application%'`,
}, },
{ {
name: "test not contains", name: "test not contains",
@ -179,7 +179,7 @@ func Test_buildResourceIndexFilter(t *testing.T) {
op: v3.FilterOperatorNotContains, op: v3.FilterOperatorNotContains,
value: "application", value: "application",
}, },
want: `labels not like '%service.name%application%'`, want: `lower(labels) not like '%service.name%application%'`,
}, },
{ {
name: "test contains with % and _", name: "test contains with % and _",
@ -188,7 +188,16 @@ func Test_buildResourceIndexFilter(t *testing.T) {
op: v3.FilterOperatorNotContains, op: v3.FilterOperatorNotContains,
value: "application%_test", value: "application%_test",
}, },
want: `labels not like '%service.name%application\%\_test%'`, want: `lower(labels) not like '%service.name%application\%\_test%'`,
},
{
name: "test like with % and _",
args: args{
key: "service.name",
op: v3.FilterOperatorLike,
value: "Application%_test",
},
want: `lower(labels) like '%service.name%application%_test%'`,
}, },
{ {
name: "test like with % and _", name: "test like with % and _",
@ -197,7 +206,7 @@ func Test_buildResourceIndexFilter(t *testing.T) {
op: v3.FilterOperatorLike, op: v3.FilterOperatorLike,
value: "application%_test", value: "application%_test",
}, },
want: `labels like '%service.name%application%_test%'`, want: `lower(labels) like '%service.name%application%_test%'`,
}, },
{ {
name: "test not regex", name: "test not regex",
@ -206,7 +215,7 @@ func Test_buildResourceIndexFilter(t *testing.T) {
op: v3.FilterOperatorNotRegex, op: v3.FilterOperatorNotRegex,
value: ".*", value: ".*",
}, },
want: `labels not like '%service.name%'`, want: ``,
}, },
{ {
name: "test in", name: "test in",
@ -318,8 +327,8 @@ func Test_buildResourceFiltersFromFilterItems(t *testing.T) {
want: []string{ want: []string{
"simpleJSONExtractString(labels, 'service.name') = 'test'", "simpleJSONExtractString(labels, 'service.name') = 'test'",
"labels like '%service.name%test%'", "labels like '%service.name%test%'",
"simpleJSONExtractString(labels, 'namespace') LIKE '%test1%'", "simpleJSONExtractString(lower(labels), 'namespace') LIKE '%test1%'",
"labels like '%namespace%test1%'", "lower(labels) like '%namespace%test1%'",
}, },
wantErr: false, wantErr: false,
}, },
@ -480,7 +489,7 @@ func Test_buildResourceSubQuery(t *testing.T) {
want: "(SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE " + want: "(SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE " +
"(seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND " + "(seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND " +
"simpleJSONExtractString(labels, 'service.name') = 'test' AND labels like '%service.name%test%' " + "simpleJSONExtractString(labels, 'service.name') = 'test' AND labels like '%service.name%test%' " +
"AND simpleJSONExtractString(labels, 'namespace') LIKE '%test1%' AND labels like '%namespace%test1%' " + "AND simpleJSONExtractString(lower(labels), 'namespace') LIKE '%test1%' AND lower(labels) like '%namespace%test1%' " +
"AND (simpleJSONHas(labels, 'cluster.name') AND labels like '%cluster.name%') AND " + "AND (simpleJSONHas(labels, 'cluster.name') AND labels like '%cluster.name%') AND " +
"( (simpleJSONHas(labels, 'host.name') AND labels like '%host.name%') ))", "( (simpleJSONHas(labels, 'host.name') AND labels like '%host.name%') ))",
wantErr: false, wantErr: false,