feat: logsV4 resource table query builder (#5872)

* feat: logsV4 resource table query builder

* fix: address pr comments

* fix: escape %, _ for contains queries

* fix: resource attribute filtering case sensitive

---------

Co-authored-by: Srikanth Chekuri <srikanth.chekuri92@gmail.com>
This commit is contained in:
Nityananda Gohain 2024-09-08 14:14:13 +05:30 committed by GitHub
parent 7b5ff54f47
commit 12f2f80958
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 722 additions and 0 deletions

View File

@ -0,0 +1,31 @@
package v4
import (
v3 "go.signoz.io/signoz/pkg/query-service/model/v3"
)
var logOperators = map[v3.FilterOperator]string{
v3.FilterOperatorEqual: "=",
v3.FilterOperatorNotEqual: "!=",
v3.FilterOperatorLessThan: "<",
v3.FilterOperatorLessThanOrEq: "<=",
v3.FilterOperatorGreaterThan: ">",
v3.FilterOperatorGreaterThanOrEq: ">=",
v3.FilterOperatorLike: "LIKE",
v3.FilterOperatorNotLike: "NOT LIKE",
v3.FilterOperatorContains: "LIKE",
v3.FilterOperatorNotContains: "NOT LIKE",
v3.FilterOperatorRegex: "match(%s, %s)",
v3.FilterOperatorNotRegex: "NOT match(%s, %s)",
v3.FilterOperatorIn: "IN",
v3.FilterOperatorNotIn: "NOT IN",
v3.FilterOperatorExists: "mapContains(%s_%s, '%s')",
v3.FilterOperatorNotExists: "not mapContains(%s_%s, '%s')",
}
const (
BODY = "body"
DISTRIBUTED_LOGS_V2 = "distributed_logs_v2"
DISTRIBUTED_LOGS_V2_RESOURCE = "distributed_logs_v2_resource"
NANOSECOND = 1000000000
)

View File

@ -0,0 +1,201 @@
package v4
import (
"fmt"
"strings"
v3 "go.signoz.io/signoz/pkg/query-service/model/v3"
"go.signoz.io/signoz/pkg/query-service/utils"
)
// buildResourceFilter builds a clickhouse filter string for resource labels
func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value interface{}) string {
searchKey := fmt.Sprintf("simpleJSONExtractString(labels, '%s')", key)
chFmtVal := utils.ClickHouseFormattedValue(value)
switch op {
case v3.FilterOperatorExists:
return fmt.Sprintf("simpleJSONHas(labels, '%s')", key)
case v3.FilterOperatorNotExists:
return fmt.Sprintf("not simpleJSONHas(labels, '%s')", key)
case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex:
return fmt.Sprintf(logsOp, searchKey, chFmtVal)
case v3.FilterOperatorContains, v3.FilterOperatorNotContains:
// this is required as clickhouseFormattedValue add's quotes to the string
escapedStringValue := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", value))
return fmt.Sprintf("%s %s '%%%s%%'", searchKey, logsOp, escapedStringValue)
default:
return fmt.Sprintf("%s %s %s", searchKey, logsOp, chFmtVal)
}
}
// buildIndexFilterForInOperator builds a clickhouse filter string for in operator
// example:= x in a,b,c = (labels like '%x%a%' or labels like '%"x":"b"%' or labels like '%"x"="c"%')
// example:= x nin a,b,c = (labels nlike '%x%a%' AND labels nlike '%"x"="b"' AND labels nlike '%"x"="c"%')
func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value interface{}) string {
conditions := []string{}
separator := " OR "
sqlOp := "like"
if op == v3.FilterOperatorNotIn {
separator = " AND "
sqlOp = "not like"
}
// values is a slice of strings, we need to convert value to this type
// value can be string or []interface{}
values := []string{}
switch value.(type) {
case string:
values = append(values, value.(string))
case []interface{}:
for _, v := range (value).([]interface{}) {
// also resources attributes are always string values
strV, ok := v.(string)
if !ok {
continue
}
values = append(values, strV)
}
}
// if there are no values to filter on, return an empty string
if len(values) > 0 {
for _, v := range values {
value := utils.QuoteEscapedStringForContains(v)
conditions = append(conditions, fmt.Sprintf("labels %s '%%\"%s\":\"%s\"%%'", sqlOp, key, value))
}
return "(" + strings.Join(conditions, separator) + ")"
}
return ""
}
// buildResourceIndexFilter builds a clickhouse filter string for resource labels
// example:= x like '%john%' = labels like '%x%john%'
func buildResourceIndexFilter(key string, op v3.FilterOperator, value interface{}) string {
// not using clickhouseFormattedValue as we don't wan't the quotes
formattedValueEscaped := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", value))
// add index filters
switch op {
case v3.FilterOperatorContains, v3.FilterOperatorEqual, v3.FilterOperatorLike:
return fmt.Sprintf("labels like '%%%s%%%s%%'", key, formattedValueEscaped)
case v3.FilterOperatorNotContains, v3.FilterOperatorNotEqual, v3.FilterOperatorNotLike:
return fmt.Sprintf("labels not like '%%%s%%%s%%'", key, formattedValueEscaped)
case v3.FilterOperatorNotRegex:
return fmt.Sprintf("labels not like '%%%s%%'", key)
case v3.FilterOperatorIn, v3.FilterOperatorNotIn:
return buildIndexFilterForInOperator(key, op, value)
default:
return fmt.Sprintf("labels like '%%%s%%'", key)
}
}
// buildResourceFiltersFromFilterItems builds a list of clickhouse filter strings for resource labels from a FilterSet.
// It skips any filter items that are not resource attributes and checks that the operator is supported and the data type is correct.
func buildResourceFiltersFromFilterItems(fs *v3.FilterSet) ([]string, error) {
var conditions []string
if fs == nil || len(fs.Items) == 0 {
return nil, nil
}
for _, item := range fs.Items {
// skip anything other than resource attribute
if item.Key.Type != v3.AttributeKeyTypeResource {
continue
}
// since out map is in lower case we are converting it to lowercase
operatorLower := strings.ToLower(string(item.Operator))
op := v3.FilterOperator(operatorLower)
keyName := item.Key.Key
// resource filter value data type will always be string
// will be an interface if the operator is IN or NOT IN
if item.Key.DataType != v3.AttributeKeyDataTypeString &&
(op != v3.FilterOperatorIn && op != v3.FilterOperatorNotIn) {
return nil, fmt.Errorf("invalid data type for resource attribute: %s", item.Key.Key)
}
var value interface{}
var err error
if op != v3.FilterOperatorExists && op != v3.FilterOperatorNotExists {
// make sure to cast the value regardless of the actual type
value, err = utils.ValidateAndCastValue(item.Value, item.Key.DataType)
if err != nil {
return nil, fmt.Errorf("failed to validate and cast value for %s: %v", item.Key.Key, err)
}
}
if logsOp, ok := logOperators[op]; ok {
// the filter
if resourceFilter := buildResourceFilter(logsOp, keyName, op, value); resourceFilter != "" {
conditions = append(conditions, resourceFilter)
}
// the additional filter for better usage of the index
if resourceIndexFilter := buildResourceIndexFilter(keyName, op, value); resourceIndexFilter != "" {
conditions = append(conditions, resourceIndexFilter)
}
} else {
return nil, fmt.Errorf("unsupported operator: %s", op)
}
}
return conditions, nil
}
func buildResourceFiltersFromGroupBy(groupBy []v3.AttributeKey) []string {
var conditions []string
for _, attr := range groupBy {
if attr.Type != v3.AttributeKeyTypeResource {
continue
}
conditions = append(conditions, fmt.Sprintf("(simpleJSONHas(labels, '%s') AND labels like '%%%s%%')", attr.Key, attr.Key))
}
return conditions
}
func buildResourceFiltersFromAggregateAttribute(aggregateAttribute v3.AttributeKey) string {
if aggregateAttribute.Key != "" && aggregateAttribute.Type == v3.AttributeKeyTypeResource {
return fmt.Sprintf("(simpleJSONHas(labels, '%s') AND labels like '%%%s%%')", aggregateAttribute.Key, aggregateAttribute.Key)
}
return ""
}
func buildResourceSubQuery(bucketStart, bucketEnd int64, fs *v3.FilterSet, groupBy []v3.AttributeKey, aggregateAttribute v3.AttributeKey) (string, error) {
// BUILD THE WHERE CLAUSE
var conditions []string
// only add the resource attributes to the filters here
rs, err := buildResourceFiltersFromFilterItems(fs)
if err != nil {
return "", err
}
conditions = append(conditions, rs...)
// for aggregate attribute add exists check in resources
aggregateAttributeResourceFilter := buildResourceFiltersFromAggregateAttribute(aggregateAttribute)
if aggregateAttributeResourceFilter != "" {
conditions = append(conditions, aggregateAttributeResourceFilter)
}
groupByResourceFilters := buildResourceFiltersFromGroupBy(groupBy)
if len(groupByResourceFilters) > 0 {
// TODO: change AND to OR once we know how to solve for group by ( i.e show values if one is not present)
groupByStr := "( " + strings.Join(groupByResourceFilters, " AND ") + " )"
conditions = append(conditions, groupByStr)
}
if len(conditions) == 0 {
return "", nil
}
conditionStr := strings.Join(conditions, " AND ")
// BUILD THE FINAL QUERY
query := fmt.Sprintf("SELECT fingerprint FROM signoz_logs.%s WHERE (seen_at_ts_bucket_start >= %d) AND (seen_at_ts_bucket_start <= %d) AND ", DISTRIBUTED_LOGS_V2_RESOURCE, bucketStart, bucketEnd)
query = "(" + query + conditionStr + ")"
return query, nil
}

View File

@ -0,0 +1,482 @@
package v4
import (
"reflect"
"testing"
v3 "go.signoz.io/signoz/pkg/query-service/model/v3"
)
func Test_buildResourceFilter(t *testing.T) {
type args struct {
logsOp string
key string
op v3.FilterOperator
value interface{}
}
tests := []struct {
name string
args args
want string
}{
{
name: "test exists",
args: args{
key: "service.name",
op: v3.FilterOperatorExists,
},
want: `simpleJSONHas(labels, 'service.name')`,
},
{
name: "test nexists",
args: args{
key: "service.name",
op: v3.FilterOperatorNotExists,
},
want: `not simpleJSONHas(labels, 'service.name')`,
},
{
name: "test regex",
args: args{
logsOp: "match(%s, %s)",
key: "service.name",
op: v3.FilterOperatorRegex,
value: ".*",
},
want: `match(simpleJSONExtractString(labels, 'service.name'), '.*')`,
},
{
name: "test contains",
args: args{
logsOp: "LIKE",
key: "service.name",
op: v3.FilterOperatorContains,
value: "Application%_",
},
want: `simpleJSONExtractString(labels, 'service.name') LIKE '%Application\%\_%'`,
},
{
name: "test eq",
args: args{
logsOp: "=",
key: "service.name",
op: v3.FilterOperatorEqual,
value: "Application",
},
want: `simpleJSONExtractString(labels, 'service.name') = 'Application'`,
},
{
name: "test value with quotes",
args: args{
logsOp: "=",
key: "service.name",
op: v3.FilterOperatorEqual,
value: "Application's",
},
want: `simpleJSONExtractString(labels, 'service.name') = 'Application\'s'`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := buildResourceFilter(tt.args.logsOp, tt.args.key, tt.args.op, tt.args.value); got != tt.want {
t.Errorf("buildResourceFilter() = %v, want %v", got, tt.want)
}
})
}
}
func Test_buildIndexFilterForInOperator(t *testing.T) {
type args struct {
key string
op v3.FilterOperator
value interface{}
}
tests := []struct {
name string
args args
want string
}{
{
name: "test in array",
args: args{
key: "service.name",
op: v3.FilterOperatorIn,
value: []interface{}{"Application", "Test"},
},
want: `(labels like '%"service.name":"Application"%' OR labels like '%"service.name":"Test"%')`,
},
{
name: "test nin array",
args: args{
key: "service.name",
op: v3.FilterOperatorNotIn,
value: []interface{}{"Application", "Test"},
},
want: `(labels not like '%"service.name":"Application"%' AND labels not like '%"service.name":"Test"%')`,
},
{
name: "test in string",
args: args{
key: "service.name",
op: v3.FilterOperatorIn,
value: "application",
},
want: `(labels like '%"service.name":"application"%')`,
},
{
name: "test nin string",
args: args{
key: "service.name",
op: v3.FilterOperatorNotIn,
value: "application'\"_s",
},
want: `(labels not like '%"service.name":"application\'"\_s"%')`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := buildIndexFilterForInOperator(tt.args.key, tt.args.op, tt.args.value); got != tt.want {
t.Errorf("buildIndexFilterForInOperator() = %v, want %v", got, tt.want)
}
})
}
}
func Test_buildResourceIndexFilter(t *testing.T) {
type args struct {
key string
op v3.FilterOperator
value interface{}
}
tests := []struct {
name string
args args
want string
}{
{
name: "test contains",
args: args{
key: "service.name",
op: v3.FilterOperatorContains,
value: "application",
},
want: `labels like '%service.name%application%'`,
},
{
name: "test not contains",
args: args{
key: "service.name",
op: v3.FilterOperatorNotContains,
value: "application",
},
want: `labels not like '%service.name%application%'`,
},
{
name: "test contains with % and _",
args: args{
key: "service.name",
op: v3.FilterOperatorNotContains,
value: "application%_test",
},
want: `labels not like '%service.name%application\%\_test%'`,
},
{
name: "test not regex",
args: args{
key: "service.name",
op: v3.FilterOperatorNotRegex,
value: ".*",
},
want: `labels not like '%service.name%'`,
},
{
name: "test in",
args: args{
key: "service.name",
op: v3.FilterOperatorNotIn,
value: []interface{}{"Application", "Test"},
},
want: `(labels not like '%"service.name":"Application"%' AND labels not like '%"service.name":"Test"%')`,
},
{
name: "test eq",
args: args{
key: "service.name",
op: v3.FilterOperatorEqual,
value: "Application",
},
want: `labels like '%service.name%Application%'`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := buildResourceIndexFilter(tt.args.key, tt.args.op, tt.args.value); got != tt.want {
t.Errorf("buildResourceIndexFilter() = %v, want %v", got, tt.want)
}
})
}
}
func Test_buildResourceFiltersFromFilterItems(t *testing.T) {
type args struct {
fs *v3.FilterSet
}
tests := []struct {
name string
args args
want []string
wantErr bool
}{
{
name: "ignore attribute",
args: args{
fs: &v3.FilterSet{
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "service.name",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeTag,
},
Operator: v3.FilterOperatorEqual,
Value: "test",
},
},
},
},
want: nil,
wantErr: false,
},
{
name: "build filter",
args: args{
fs: &v3.FilterSet{
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "service.name",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
Operator: v3.FilterOperatorEqual,
Value: "test",
},
},
},
},
want: []string{
"simpleJSONExtractString(labels, 'service.name') = 'test'",
"labels like '%service.name%test%'",
},
wantErr: false,
},
{
name: "build filter with multiple items",
args: args{
fs: &v3.FilterSet{
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "service.name",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
Operator: v3.FilterOperatorEqual,
Value: "test",
},
{
Key: v3.AttributeKey{
Key: "namespace",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
Operator: v3.FilterOperatorContains,
Value: "test1",
},
},
},
},
want: []string{
"simpleJSONExtractString(labels, 'service.name') = 'test'",
"labels like '%service.name%test%'",
"simpleJSONExtractString(labels, 'namespace') LIKE '%test1%'",
"labels like '%namespace%test1%'",
},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := buildResourceFiltersFromFilterItems(tt.args.fs)
if (err != nil) != tt.wantErr {
t.Errorf("buildResourceFiltersFromFilterItems() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("buildResourceFiltersFromFilterItems() = %v, want %v", got, tt.want)
}
})
}
}
func Test_buildResourceFiltersFromGroupBy(t *testing.T) {
type args struct {
groupBy []v3.AttributeKey
}
tests := []struct {
name string
args args
want []string
}{
{
name: "build filter",
args: args{
groupBy: []v3.AttributeKey{
{
Key: "service.name",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
},
},
want: []string{
"(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')",
},
},
{
name: "build filter multiple group by",
args: args{
groupBy: []v3.AttributeKey{
{
Key: "service.name",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
{
Key: "namespace",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
},
},
want: []string{
"(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')",
"(simpleJSONHas(labels, 'namespace') AND labels like '%namespace%')",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := buildResourceFiltersFromGroupBy(tt.args.groupBy); !reflect.DeepEqual(got, tt.want) {
t.Errorf("buildResourceFiltersFromGroupBy() = %v, want %v", got, tt.want)
}
})
}
}
func Test_buildResourceFiltersFromAggregateAttribute(t *testing.T) {
type args struct {
aggregateAttribute v3.AttributeKey
}
tests := []struct {
name string
args args
want string
}{
{
name: "build filter",
args: args{
aggregateAttribute: v3.AttributeKey{
Key: "service.name",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
},
want: "(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := buildResourceFiltersFromAggregateAttribute(tt.args.aggregateAttribute); got != tt.want {
t.Errorf("buildResourceFiltersFromAggregateAttribute() = %v, want %v", got, tt.want)
}
})
}
}
func Test_buildResourceSubQuery(t *testing.T) {
type args struct {
bucketStart int64
bucketEnd int64
fs *v3.FilterSet
groupBy []v3.AttributeKey
aggregateAttribute v3.AttributeKey
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
name: "build sub query",
args: args{
bucketStart: 1680064560,
bucketEnd: 1680066458,
fs: &v3.FilterSet{
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "service.name",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
Operator: v3.FilterOperatorEqual,
Value: "test",
},
{
Key: v3.AttributeKey{
Key: "namespace",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
Operator: v3.FilterOperatorContains,
Value: "test1",
},
},
},
groupBy: []v3.AttributeKey{
{
Key: "host.name",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
},
aggregateAttribute: v3.AttributeKey{
Key: "cluster.name",
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
},
want: "(SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE " +
"(seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND " +
"simpleJSONExtractString(labels, 'service.name') = 'test' AND labels like '%service.name%test%' " +
"AND simpleJSONExtractString(labels, 'namespace') LIKE '%test1%' AND labels like '%namespace%test1%' " +
"AND (simpleJSONHas(labels, 'cluster.name') AND labels like '%cluster.name%') AND " +
"( (simpleJSONHas(labels, 'host.name') AND labels like '%host.name%') ))",
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := buildResourceSubQuery(tt.args.bucketStart, tt.args.bucketEnd, tt.args.fs, tt.args.groupBy, tt.args.aggregateAttribute)
if (err != nil) != tt.wantErr {
t.Errorf("buildResourceSubQuery() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("buildResourceSubQuery() = %v, want %v", got, tt.want)
}
})
}
}

View File

@ -154,6 +154,14 @@ func QuoteEscapedString(str string) string {
return str
}
func QuoteEscapedStringForContains(str string) string {
// https: //clickhouse.com/docs/en/sql-reference/functions/string-search-functions#like
str = QuoteEscapedString(str)
str = strings.ReplaceAll(str, `%`, `\%`)
str = strings.ReplaceAll(str, `_`, `\_`)
return str
}
// ClickHouseFormattedValue formats the value to be used in clickhouse query
func ClickHouseFormattedValue(v interface{}) string {
// if it's pointer convert it to a value