fix: remove log lines which doesn't contain the key while performing … (#2609)

* fix: remove log lines which doesn't contain the key while performing groupBy

* fix: print removed
This commit is contained in:
Nityananda Gohain 2023-04-24 18:01:13 +05:30 committed by GitHub
parent 6949c659af
commit 21c6d3ba99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 47 additions and 5 deletions

View File

@ -123,7 +123,7 @@ func getSelectLabels(aggregatorOperator v3.AggregateOperator, groupBy []v3.Attri
return selectLabels, nil
}
func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, fields map[string]v3.AttributeKey) (string, error) {
func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, groupBy []v3.AttributeKey, fields map[string]v3.AttributeKey) (string, error) {
var conditions []string
if fs != nil && len(fs.Items) != 0 {
@ -162,6 +162,20 @@ func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, fields map[string]v3.Attri
}
}
}
// add group by conditions to filter out log lines which doesn't have the key
for _, attr := range groupBy {
enrichedAttr, err := encrichFieldWithMetadata(attr, fields)
if err != nil {
return "", err
}
if !enrichedAttr.IsColumn {
columnType := getClickhouseLogsColumnType(enrichedAttr.Type)
columnDataType := getClickhouseLogsColumnDataType(enrichedAttr.DataType)
conditions = append(conditions, fmt.Sprintf("indexOf(%s_%s_key, '%s') > 0", columnType, columnDataType, enrichedAttr.Key))
}
}
queryString := strings.Join(conditions, " AND ")
if len(queryString) > 0 {
@ -186,7 +200,7 @@ func getZerosForEpochNano(epoch int64) int64 {
func buildLogsQuery(start, end, step int64, mq *v3.BuilderQuery, fields map[string]v3.AttributeKey) (string, error) {
filterSubQuery, err := buildLogsTimeSeriesFilterQuery(mq.Filters, fields)
filterSubQuery, err := buildLogsTimeSeriesFilterQuery(mq.Filters, mq.GroupBy, fields)
if err != nil {
return "", err
}

View File

@ -98,6 +98,7 @@ func TestGetSelectLabels(t *testing.T) {
var timeSeriesFilterQueryData = []struct {
Name string
FilterSet *v3.FilterSet
GroupBy []v3.AttributeKey
ExpectedFilter string
}{
{
@ -172,12 +173,28 @@ var timeSeriesFilterQueryData = []struct {
}},
ExpectedFilter: " AND attributes_string_value[indexOf(attributes_string_key, 'host')] NOT ILIKE '%102.%'",
},
{
Name: "Test groupBy",
FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{
{Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.", Operator: "ncontains"},
}},
GroupBy: []v3.AttributeKey{{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}},
ExpectedFilter: " AND attributes_string_value[indexOf(attributes_string_key, 'host')] NOT ILIKE '%102.%' AND indexOf(attributes_string_key, 'host') > 0",
},
{
Name: "Test groupBy isColumn",
FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{
{Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.", Operator: "ncontains"},
}},
GroupBy: []v3.AttributeKey{{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}},
ExpectedFilter: " AND attributes_string_value[indexOf(attributes_string_key, 'host')] NOT ILIKE '%102.%'",
},
}
func TestBuildLogsTimeSeriesFilterQuery(t *testing.T) {
for _, tt := range timeSeriesFilterQueryData {
Convey("TestBuildLogsTimeSeriesFilterQuery", t, func() {
query, err := buildLogsTimeSeriesFilterQuery(tt.FilterSet, map[string]v3.AttributeKey{})
query, err := buildLogsTimeSeriesFilterQuery(tt.FilterSet, tt.GroupBy, map[string]v3.AttributeKey{})
So(err, ShouldBeNil)
So(query, ShouldEqual, tt.ExpectedFilter)
})
@ -292,6 +309,7 @@ var testBuildLogsQueryData = []struct {
"toFloat64(count(distinct(name))) as value from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND resources_string_value[indexOf(resources_string_key, 'x')] != 'abc' " +
"AND indexOf(attributes_string_key, 'method') > 0 " +
"group by method,ts " +
"order by method ASC,ts",
},
@ -320,6 +338,8 @@ var testBuildLogsQueryData = []struct {
"toFloat64(count(distinct(name))) as value from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND resources_string_value[indexOf(resources_string_key, 'x')] != 'abc' " +
"AND indexOf(attributes_string_key, 'method') > 0 " +
"AND indexOf(resources_string_key, 'x') > 0 " +
"group by method,x,ts " +
"order by method ASC,x ASC,ts",
},
@ -347,6 +367,7 @@ var testBuildLogsQueryData = []struct {
"from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " +
"AND indexOf(attributes_string_key, 'method') > 0 " +
"group by method,ts " +
"order by method ASC,ts",
},
@ -374,6 +395,7 @@ var testBuildLogsQueryData = []struct {
"from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " +
"AND indexOf(attributes_string_key, 'method') > 0 " +
"group by method,ts " +
"order by method ASC,ts",
},
@ -401,6 +423,7 @@ var testBuildLogsQueryData = []struct {
"from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " +
"AND indexOf(attributes_string_key, 'method') > 0 " +
"group by method,ts " +
"order by method ASC,ts",
},
@ -428,6 +451,7 @@ var testBuildLogsQueryData = []struct {
"from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " +
"AND indexOf(attributes_string_key, 'method') > 0 " +
"group by method,ts " +
"order by method ASC,ts",
},
@ -451,6 +475,7 @@ var testBuildLogsQueryData = []struct {
"quantile(0.05)(bytes) as value " +
"from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND indexOf(attributes_string_key, 'method') > 0 " +
"group by method,ts " +
"order by method ASC,ts",
},
@ -471,8 +496,9 @@ var testBuildLogsQueryData = []struct {
TableName: "logs",
ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string_value[indexOf(attributes_string_key, 'method')] as method" +
", sum(bytes)/60 as value from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000)" +
" group by method,ts order by method ASC,ts",
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND indexOf(attributes_string_key, 'method') > 0 " +
"group by method,ts order by method ASC,ts",
},
{
Name: "Test aggregate rate",
@ -492,6 +518,7 @@ var testBuildLogsQueryData = []struct {
ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string_value[indexOf(attributes_string_key, 'method')] as method" +
", count(attributes_float64_value[indexOf(attributes_float64_key, 'bytes')])/60 as value " +
"from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND indexOf(attributes_string_key, 'method') > 0 " +
"group by method,ts " +
"order by method ASC,ts",
},
@ -514,6 +541,7 @@ var testBuildLogsQueryData = []struct {
"attributes_string_value[indexOf(attributes_string_key, 'method')] as method, " +
"sum(attributes_float64_value[indexOf(attributes_float64_key, 'bytes')])/60 as value " +
"from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND indexOf(attributes_string_key, 'method') > 0 " +
"group by method,ts " +
"order by method ASC,ts",
},