From 21c6d3ba991e0a9a8ec4f8157063db631af1f42a Mon Sep 17 00:00:00 2001 From: Nityananda Gohain Date: Mon, 24 Apr 2023 18:01:13 +0530 Subject: [PATCH] =?UTF-8?q?fix:=20remove=20log=20lines=20which=20doesn't?= =?UTF-8?q?=20contain=20the=20key=20while=20performing=20=E2=80=A6=20(#260?= =?UTF-8?q?9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: remove log lines which doesn't contain the key while performing groupBy * fix: print removed --- .../app/logs/v3/query_builder.go | 18 ++++++++-- .../app/logs/v3/query_builder_test.go | 34 +++++++++++++++++-- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/pkg/query-service/app/logs/v3/query_builder.go b/pkg/query-service/app/logs/v3/query_builder.go index d2b1a145e6..19c4efc5a1 100644 --- a/pkg/query-service/app/logs/v3/query_builder.go +++ b/pkg/query-service/app/logs/v3/query_builder.go @@ -123,7 +123,7 @@ func getSelectLabels(aggregatorOperator v3.AggregateOperator, groupBy []v3.Attri return selectLabels, nil } -func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, fields map[string]v3.AttributeKey) (string, error) { +func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, groupBy []v3.AttributeKey, fields map[string]v3.AttributeKey) (string, error) { var conditions []string if fs != nil && len(fs.Items) != 0 { @@ -162,6 +162,20 @@ func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, fields map[string]v3.Attri } } } + + // add group by conditions to filter out log lines which doesn't have the key + for _, attr := range groupBy { + enrichedAttr, err := encrichFieldWithMetadata(attr, fields) + if err != nil { + return "", err + } + if !enrichedAttr.IsColumn { + columnType := getClickhouseLogsColumnType(enrichedAttr.Type) + columnDataType := getClickhouseLogsColumnDataType(enrichedAttr.DataType) + conditions = append(conditions, fmt.Sprintf("indexOf(%s_%s_key, '%s') > 0", columnType, columnDataType, enrichedAttr.Key)) + } + } + queryString := strings.Join(conditions, " AND ") if len(queryString) > 0 { @@ -186,7 +200,7 @@ func getZerosForEpochNano(epoch int64) int64 { func buildLogsQuery(start, end, step int64, mq *v3.BuilderQuery, fields map[string]v3.AttributeKey) (string, error) { - filterSubQuery, err := buildLogsTimeSeriesFilterQuery(mq.Filters, fields) + filterSubQuery, err := buildLogsTimeSeriesFilterQuery(mq.Filters, mq.GroupBy, fields) if err != nil { return "", err } diff --git a/pkg/query-service/app/logs/v3/query_builder_test.go b/pkg/query-service/app/logs/v3/query_builder_test.go index b67d5b4f8e..caff3867f3 100644 --- a/pkg/query-service/app/logs/v3/query_builder_test.go +++ b/pkg/query-service/app/logs/v3/query_builder_test.go @@ -98,6 +98,7 @@ func TestGetSelectLabels(t *testing.T) { var timeSeriesFilterQueryData = []struct { Name string FilterSet *v3.FilterSet + GroupBy []v3.AttributeKey ExpectedFilter string }{ { @@ -172,12 +173,28 @@ var timeSeriesFilterQueryData = []struct { }}, ExpectedFilter: " AND attributes_string_value[indexOf(attributes_string_key, 'host')] NOT ILIKE '%102.%'", }, + { + Name: "Test groupBy", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.", Operator: "ncontains"}, + }}, + GroupBy: []v3.AttributeKey{{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + ExpectedFilter: " AND attributes_string_value[indexOf(attributes_string_key, 'host')] NOT ILIKE '%102.%' AND indexOf(attributes_string_key, 'host') > 0", + }, + { + Name: "Test groupBy isColumn", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.", Operator: "ncontains"}, + }}, + GroupBy: []v3.AttributeKey{{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}}, + ExpectedFilter: " AND attributes_string_value[indexOf(attributes_string_key, 'host')] NOT ILIKE '%102.%'", + }, } func TestBuildLogsTimeSeriesFilterQuery(t *testing.T) { for _, tt := range timeSeriesFilterQueryData { Convey("TestBuildLogsTimeSeriesFilterQuery", t, func() { - query, err := buildLogsTimeSeriesFilterQuery(tt.FilterSet, map[string]v3.AttributeKey{}) + query, err := buildLogsTimeSeriesFilterQuery(tt.FilterSet, tt.GroupBy, map[string]v3.AttributeKey{}) So(err, ShouldBeNil) So(query, ShouldEqual, tt.ExpectedFilter) }) @@ -292,6 +309,7 @@ var testBuildLogsQueryData = []struct { "toFloat64(count(distinct(name))) as value from signoz_logs.distributed_logs " + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + "AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND resources_string_value[indexOf(resources_string_key, 'x')] != 'abc' " + + "AND indexOf(attributes_string_key, 'method') > 0 " + "group by method,ts " + "order by method ASC,ts", }, @@ -320,6 +338,8 @@ var testBuildLogsQueryData = []struct { "toFloat64(count(distinct(name))) as value from signoz_logs.distributed_logs " + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + "AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND resources_string_value[indexOf(resources_string_key, 'x')] != 'abc' " + + "AND indexOf(attributes_string_key, 'method') > 0 " + + "AND indexOf(resources_string_key, 'x') > 0 " + "group by method,x,ts " + "order by method ASC,x ASC,ts", }, @@ -347,6 +367,7 @@ var testBuildLogsQueryData = []struct { "from signoz_logs.distributed_logs " + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + "AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " + + "AND indexOf(attributes_string_key, 'method') > 0 " + "group by method,ts " + "order by method ASC,ts", }, @@ -374,6 +395,7 @@ var testBuildLogsQueryData = []struct { "from signoz_logs.distributed_logs " + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + "AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " + + "AND indexOf(attributes_string_key, 'method') > 0 " + "group by method,ts " + "order by method ASC,ts", }, @@ -401,6 +423,7 @@ var testBuildLogsQueryData = []struct { "from signoz_logs.distributed_logs " + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + "AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " + + "AND indexOf(attributes_string_key, 'method') > 0 " + "group by method,ts " + "order by method ASC,ts", }, @@ -428,6 +451,7 @@ var testBuildLogsQueryData = []struct { "from signoz_logs.distributed_logs " + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + "AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " + + "AND indexOf(attributes_string_key, 'method') > 0 " + "group by method,ts " + "order by method ASC,ts", }, @@ -451,6 +475,7 @@ var testBuildLogsQueryData = []struct { "quantile(0.05)(bytes) as value " + "from signoz_logs.distributed_logs " + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + + "AND indexOf(attributes_string_key, 'method') > 0 " + "group by method,ts " + "order by method ASC,ts", }, @@ -471,8 +496,9 @@ var testBuildLogsQueryData = []struct { TableName: "logs", ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string_value[indexOf(attributes_string_key, 'method')] as method" + ", sum(bytes)/60 as value from signoz_logs.distributed_logs " + - "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000)" + - " group by method,ts order by method ASC,ts", + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + + "AND indexOf(attributes_string_key, 'method') > 0 " + + "group by method,ts order by method ASC,ts", }, { Name: "Test aggregate rate", @@ -492,6 +518,7 @@ var testBuildLogsQueryData = []struct { ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string_value[indexOf(attributes_string_key, 'method')] as method" + ", count(attributes_float64_value[indexOf(attributes_float64_key, 'bytes')])/60 as value " + "from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + + "AND indexOf(attributes_string_key, 'method') > 0 " + "group by method,ts " + "order by method ASC,ts", }, @@ -514,6 +541,7 @@ var testBuildLogsQueryData = []struct { "attributes_string_value[indexOf(attributes_string_key, 'method')] as method, " + "sum(attributes_float64_value[indexOf(attributes_float64_key, 'bytes')])/60 as value " + "from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + + "AND indexOf(attributes_string_key, 'method') > 0 " + "group by method,ts " + "order by method ASC,ts", },