feat: support for case insensitive for contains and like queries (#6045)

* feat: support for case insensitive for contains and like queries

* fix: make index filter lowercase for like and not like
This commit is contained in:
Nityananda Gohain 2024-09-23 12:27:14 +05:30 committed by GitHub
parent f9ac41b865
commit 3866f89d3e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 78 additions and 31 deletions

View File

@ -17,10 +17,10 @@ var logOperators = map[v3.FilterOperator]string{
v3.FilterOperatorLessThanOrEq: "<=",
v3.FilterOperatorGreaterThan: ">",
v3.FilterOperatorGreaterThanOrEq: ">=",
v3.FilterOperatorLike: "LIKE",
v3.FilterOperatorNotLike: "NOT LIKE",
v3.FilterOperatorContains: "LIKE",
v3.FilterOperatorNotContains: "NOT LIKE",
v3.FilterOperatorLike: "ILIKE",
v3.FilterOperatorNotLike: "NOT ILIKE",
v3.FilterOperatorContains: "ILIKE",
v3.FilterOperatorNotContains: "NOT ILIKE",
v3.FilterOperatorRegex: "match(%s, %s)",
v3.FilterOperatorNotRegex: "NOT match(%s, %s)",
v3.FilterOperatorIn: "IN",
@ -150,6 +150,7 @@ func buildAttributeFilter(item v3.FilterItem) (string, error) {
val := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", item.Value))
// for body the contains is case insensitive
if keyName == BODY {
logsOp = strings.Replace(logsOp, "ILIKE", "LIKE", 1) // removing i from ilike and not ilike
return fmt.Sprintf("lower(%s) %s lower('%%%s%%')", keyName, logsOp, val), nil
} else {
return fmt.Sprintf("%s %s '%%%s%%'", keyName, logsOp, val), nil
@ -158,6 +159,7 @@ func buildAttributeFilter(item v3.FilterItem) (string, error) {
// for body use lower for like and ilike
val := utils.QuoteEscapedString(fmt.Sprintf("%s", item.Value))
if keyName == BODY {
logsOp = strings.Replace(logsOp, "ILIKE", "LIKE", 1) // removing i from ilike and not ilike
return fmt.Sprintf("lower(%s) %s lower('%s')", keyName, logsOp, val), nil
} else {
return fmt.Sprintf("%s %s '%s'", keyName, logsOp, val), nil

View File

@ -250,7 +250,7 @@ func Test_buildAttributeFilter(t *testing.T) {
Value: "test",
},
},
want: "resources_string['service.name'] LIKE '%test%'",
want: "resources_string['service.name'] ILIKE '%test%'",
},
{
name: "build attribute filter contains- body",
@ -280,7 +280,7 @@ func Test_buildAttributeFilter(t *testing.T) {
Value: "test%",
},
},
want: "resources_string['service.name'] LIKE 'test%'",
want: "resources_string['service.name'] ILIKE 'test%'",
},
{
name: "build attribute filter like-body",
@ -956,7 +956,7 @@ func TestPrepareLogsQuery(t *testing.T) {
},
want: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body, attributes_string, attributes_number, attributes_bool, resources_string from " +
"signoz_logs.distributed_logs_v2 where attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') AND " +
"(resource_fingerprint GLOBAL IN (SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE simpleJSONExtractString(labels, 'service.name') LIKE '%app%' AND labels like '%service.name%app%' AND ",
"(resource_fingerprint GLOBAL IN (SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE simpleJSONExtractString(lower(labels), 'service.name') LIKE '%app%' AND lower(labels) like '%service.name%app%' AND ",
},
{
name: "Live Tail Query W/O filter",

View File

@ -8,12 +8,37 @@ import (
"go.signoz.io/signoz/pkg/query-service/utils"
)
var resourceLogOperators = map[v3.FilterOperator]string{
v3.FilterOperatorEqual: "=",
v3.FilterOperatorNotEqual: "!=",
v3.FilterOperatorLessThan: "<",
v3.FilterOperatorLessThanOrEq: "<=",
v3.FilterOperatorGreaterThan: ">",
v3.FilterOperatorGreaterThanOrEq: ">=",
v3.FilterOperatorLike: "LIKE",
v3.FilterOperatorNotLike: "NOT LIKE",
v3.FilterOperatorContains: "LIKE",
v3.FilterOperatorNotContains: "NOT LIKE",
v3.FilterOperatorRegex: "match(%s, %s)",
v3.FilterOperatorNotRegex: "NOT match(%s, %s)",
v3.FilterOperatorIn: "IN",
v3.FilterOperatorNotIn: "NOT IN",
v3.FilterOperatorExists: "mapContains(%s_%s, '%s')",
v3.FilterOperatorNotExists: "not mapContains(%s_%s, '%s')",
}
// buildResourceFilter builds a clickhouse filter string for resource labels
func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value interface{}) string {
// for all operators except contains and like
searchKey := fmt.Sprintf("simpleJSONExtractString(labels, '%s')", key)
// for contains and like it will be case insensitive
lowerSearchKey := fmt.Sprintf("simpleJSONExtractString(lower(labels), '%s')", key)
chFmtVal := utils.ClickHouseFormattedValue(value)
lowerValue := strings.ToLower(fmt.Sprintf("%s", value))
switch op {
case v3.FilterOperatorExists:
return fmt.Sprintf("simpleJSONHas(labels, '%s')", key)
@ -24,20 +49,20 @@ func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value
case v3.FilterOperatorContains, v3.FilterOperatorNotContains:
// this is required as clickhouseFormattedValue add's quotes to the string
// we also want to treat %, _ as literals for contains
escapedStringValue := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", value))
return fmt.Sprintf("%s %s '%%%s%%'", searchKey, logsOp, escapedStringValue)
escapedStringValue := utils.QuoteEscapedStringForContains(lowerValue)
return fmt.Sprintf("%s %s '%%%s%%'", lowerSearchKey, logsOp, escapedStringValue)
case v3.FilterOperatorLike, v3.FilterOperatorNotLike:
// this is required as clickhouseFormattedValue add's quotes to the string
escapedStringValue := utils.QuoteEscapedString(fmt.Sprintf("%s", value))
return fmt.Sprintf("%s %s '%s'", searchKey, logsOp, escapedStringValue)
escapedStringValue := utils.QuoteEscapedString(lowerValue)
return fmt.Sprintf("%s %s '%s'", lowerSearchKey, logsOp, escapedStringValue)
default:
return fmt.Sprintf("%s %s %s", searchKey, logsOp, chFmtVal)
}
}
// buildIndexFilterForInOperator builds a clickhouse filter string for in operator
// example:= x in a,b,c = (labels like '%x%a%' or labels like '%"x":"b"%' or labels like '%"x"="c"%')
// example:= x nin a,b,c = (labels nlike '%x%a%' AND labels nlike '%"x"="b"' AND labels nlike '%"x"="c"%')
// example:= x in a,b,c = (labels like '%"x"%"a"%' or labels like '%"x":"b"%' or labels like '%"x"="c"%')
// example:= x nin a,b,c = (labels nlike '%"x"%"a"%' AND labels nlike '%"x"="b"' AND labels nlike '%"x"="c"%')
func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value interface{}) string {
conditions := []string{}
separator := " OR "
@ -77,24 +102,35 @@ func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value inter
// buildResourceIndexFilter builds a clickhouse filter string for resource labels
// example:= x like '%john%' = labels like '%x%john%'
// we have two indexes for resource attributes one is lower and one is normal.
// for all operators other then like/contains we will use normal index
// for like/contains we will use lower index
// we can use lower index for =, in etc but it's difficult to do it for !=, NIN etc
// if as x != "ABC" we cannot predict something like "not lower(labels) like '%%x%%abc%%'". It has it be "not lower(labels) like '%%x%%ABC%%'"
func buildResourceIndexFilter(key string, op v3.FilterOperator, value interface{}) string {
// not using clickhouseFormattedValue as we don't wan't the quotes
strVal := fmt.Sprintf("%s", value)
formattedValueEscapedForContains := utils.QuoteEscapedStringForContains(strVal)
formattedValueEscapedForContains := strings.ToLower(utils.QuoteEscapedStringForContains(strVal))
formattedValueEscaped := utils.QuoteEscapedString(strVal)
formattedValueEscapedLower := strings.ToLower(formattedValueEscaped)
// add index filters
switch op {
case v3.FilterOperatorContains:
return fmt.Sprintf("labels like '%%%s%%%s%%'", key, formattedValueEscapedForContains)
return fmt.Sprintf("lower(labels) like '%%%s%%%s%%'", key, formattedValueEscapedForContains)
case v3.FilterOperatorNotContains:
return fmt.Sprintf("labels not like '%%%s%%%s%%'", key, formattedValueEscapedForContains)
case v3.FilterOperatorLike, v3.FilterOperatorEqual:
return fmt.Sprintf("lower(labels) not like '%%%s%%%s%%'", key, formattedValueEscapedForContains)
case v3.FilterOperatorLike:
return fmt.Sprintf("lower(labels) like '%%%s%%%s%%'", key, formattedValueEscapedLower)
case v3.FilterOperatorNotLike:
return fmt.Sprintf("lower(labels) not like '%%%s%%%s%%'", key, formattedValueEscapedLower)
case v3.FilterOperatorEqual:
return fmt.Sprintf("labels like '%%%s%%%s%%'", key, formattedValueEscaped)
case v3.FilterOperatorNotLike, v3.FilterOperatorNotEqual:
case v3.FilterOperatorNotEqual:
return fmt.Sprintf("labels not like '%%%s%%%s%%'", key, formattedValueEscaped)
case v3.FilterOperatorNotRegex:
return fmt.Sprintf("labels not like '%%%s%%'", key)
case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex:
// don't try to do anything for regex.
return ""
case v3.FilterOperatorIn, v3.FilterOperatorNotIn:
return buildIndexFilterForInOperator(key, op, value)
default:
@ -137,7 +173,7 @@ func buildResourceFiltersFromFilterItems(fs *v3.FilterSet) ([]string, error) {
}
}
if logsOp, ok := logOperators[op]; ok {
if logsOp, ok := resourceLogOperators[op]; ok {
// the filter
if resourceFilter := buildResourceFilter(logsOp, keyName, op, value); resourceFilter != "" {
conditions = append(conditions, resourceFilter)

View File

@ -53,7 +53,7 @@ func Test_buildResourceFilter(t *testing.T) {
op: v3.FilterOperatorContains,
value: "Application%_",
},
want: `simpleJSONExtractString(labels, 'service.name') LIKE '%Application\%\_%'`,
want: `simpleJSONExtractString(lower(labels), 'service.name') LIKE '%application\%\_%'`,
},
{
name: "test eq",
@ -83,7 +83,7 @@ func Test_buildResourceFilter(t *testing.T) {
op: v3.FilterOperatorLike,
value: "Application%_",
},
want: `simpleJSONExtractString(labels, 'service.name') LIKE 'Application%_'`,
want: `simpleJSONExtractString(lower(labels), 'service.name') LIKE 'application%_'`,
},
}
for _, tt := range tests {
@ -170,7 +170,7 @@ func Test_buildResourceIndexFilter(t *testing.T) {
op: v3.FilterOperatorContains,
value: "application",
},
want: `labels like '%service.name%application%'`,
want: `lower(labels) like '%service.name%application%'`,
},
{
name: "test not contains",
@ -179,7 +179,7 @@ func Test_buildResourceIndexFilter(t *testing.T) {
op: v3.FilterOperatorNotContains,
value: "application",
},
want: `labels not like '%service.name%application%'`,
want: `lower(labels) not like '%service.name%application%'`,
},
{
name: "test contains with % and _",
@ -188,7 +188,16 @@ func Test_buildResourceIndexFilter(t *testing.T) {
op: v3.FilterOperatorNotContains,
value: "application%_test",
},
want: `labels not like '%service.name%application\%\_test%'`,
want: `lower(labels) not like '%service.name%application\%\_test%'`,
},
{
name: "test like with % and _",
args: args{
key: "service.name",
op: v3.FilterOperatorLike,
value: "Application%_test",
},
want: `lower(labels) like '%service.name%application%_test%'`,
},
{
name: "test like with % and _",
@ -197,7 +206,7 @@ func Test_buildResourceIndexFilter(t *testing.T) {
op: v3.FilterOperatorLike,
value: "application%_test",
},
want: `labels like '%service.name%application%_test%'`,
want: `lower(labels) like '%service.name%application%_test%'`,
},
{
name: "test not regex",
@ -206,7 +215,7 @@ func Test_buildResourceIndexFilter(t *testing.T) {
op: v3.FilterOperatorNotRegex,
value: ".*",
},
want: `labels not like '%service.name%'`,
want: ``,
},
{
name: "test in",
@ -318,8 +327,8 @@ func Test_buildResourceFiltersFromFilterItems(t *testing.T) {
want: []string{
"simpleJSONExtractString(labels, 'service.name') = 'test'",
"labels like '%service.name%test%'",
"simpleJSONExtractString(labels, 'namespace') LIKE '%test1%'",
"labels like '%namespace%test1%'",
"simpleJSONExtractString(lower(labels), 'namespace') LIKE '%test1%'",
"lower(labels) like '%namespace%test1%'",
},
wantErr: false,
},
@ -480,7 +489,7 @@ func Test_buildResourceSubQuery(t *testing.T) {
want: "(SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE " +
"(seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND " +
"simpleJSONExtractString(labels, 'service.name') = 'test' AND labels like '%service.name%test%' " +
"AND simpleJSONExtractString(labels, 'namespace') LIKE '%test1%' AND labels like '%namespace%test1%' " +
"AND simpleJSONExtractString(lower(labels), 'namespace') LIKE '%test1%' AND lower(labels) like '%namespace%test1%' " +
"AND (simpleJSONHas(labels, 'cluster.name') AND labels like '%cluster.name%') AND " +
"( (simpleJSONHas(labels, 'host.name') AND labels like '%host.name%') ))",
wantErr: false,