From c5d5c84a0ef532b0e098c2454e2388c2e2861c4d Mon Sep 17 00:00:00 2001 From: Srikanth Chekuri Date: Fri, 16 May 2025 20:09:57 +0530 Subject: [PATCH] chore: add fieldmapper implementation (#7955) --- pkg/errors/errors.go | 4 + pkg/parser/grammar/where_clause_visitor.go | 15 +- .../grammar/where_clause_visitor_test.go | 644 ------------------ pkg/telemetrylogs/condition_builder.go | 215 +----- pkg/telemetrylogs/condition_builder_test.go | 280 +------- pkg/telemetrylogs/field_mapper.go | 194 ++++++ pkg/telemetrylogs/field_mapper_test.go | 265 +++++++ pkg/telemetrylogs/json.go | 89 +++ pkg/telemetrymetadata/condition_builder.go | 50 +- .../condition_builder_test.go | 210 +----- pkg/telemetrymetadata/field_mapper.go | 116 ++++ pkg/telemetrymetadata/field_mapper_test.go | 216 ++++++ pkg/telemetrymetadata/metadata.go | 59 +- pkg/telemetrymetadata/stmt_parse.go | 4 +- pkg/telemetrytraces/condition_builder.go | 204 +----- pkg/telemetrytraces/condition_builder_test.go | 91 +-- pkg/telemetrytraces/field_mapper.go | 271 ++++++++ pkg/telemetrytraces/field_mapper_test.go | 95 +++ .../querybuildertypesv5/qb.go | 48 +- .../querybuildertypesv5/query.go | 2 +- pkg/types/telemetrytypes/maybe_typo.go | 111 +++ 21 files changed, 1489 insertions(+), 1694 deletions(-) create mode 100644 pkg/telemetrylogs/field_mapper.go create mode 100644 pkg/telemetrylogs/field_mapper_test.go create mode 100644 pkg/telemetrylogs/json.go create mode 100644 pkg/telemetrymetadata/field_mapper.go create mode 100644 pkg/telemetrymetadata/field_mapper_test.go create mode 100644 pkg/telemetrytraces/field_mapper.go create mode 100644 pkg/telemetrytraces/field_mapper_test.go create mode 100644 pkg/types/telemetrytypes/maybe_typo.go diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go index 49d892d6a3..764b1632c3 100644 --- a/pkg/errors/errors.go +++ b/pkg/errors/errors.go @@ -142,6 +142,10 @@ func As(err error, target any) bool { return errors.As(err, target) } +func Is(err error, target error) bool { + return errors.Is(err, target) +} + func WrapNotFoundf(cause error, code Code, format string, args ...interface{}) *base { return Wrapf(cause, TypeNotFound, code, format, args...) } diff --git a/pkg/parser/grammar/where_clause_visitor.go b/pkg/parser/grammar/where_clause_visitor.go index 3f1523cf24..4bb45a84c3 100644 --- a/pkg/parser/grammar/where_clause_visitor.go +++ b/pkg/parser/grammar/where_clause_visitor.go @@ -18,6 +18,7 @@ import ( // WhereClauseVisitor implements the FilterQueryVisitor interface // to convert the parsed filter expressions into ClickHouse WHERE clause type WhereClauseVisitor struct { + fieldMapper qbtypes.FieldMapper conditionBuilder qbtypes.ConditionBuilder warnings []error fieldKeys map[string][]*telemetrytypes.TelemetryFieldKey @@ -250,7 +251,7 @@ func (v *WhereClauseVisitor) VisitPrimary(ctx *PrimaryContext) any { if keyCtx, ok := child.(*KeyContext); ok { // create a full text search condition on the body field keyText := keyCtx.GetText() - cond, err := v.conditionBuilder.GetCondition(context.Background(), v.fullTextColumn, qbtypes.FilterOperatorRegexp, keyText, v.builder) + cond, err := v.conditionBuilder.ConditionFor(context.Background(), v.fullTextColumn, qbtypes.FilterOperatorRegexp, keyText, v.builder) if err != nil { return "" } @@ -273,7 +274,7 @@ func (v *WhereClauseVisitor) VisitComparison(ctx *ComparisonContext) any { } var conds []string for _, key := range keys { - condition, err := v.conditionBuilder.GetCondition(context.Background(), key, op, nil, v.builder) + condition, err := v.conditionBuilder.ConditionFor(context.Background(), key, op, nil, v.builder) if err != nil { return "" } @@ -291,7 +292,7 @@ func (v *WhereClauseVisitor) VisitComparison(ctx *ComparisonContext) any { } var conds []string for _, key := range keys { - condition, err := v.conditionBuilder.GetCondition(context.Background(), key, op, values, v.builder) + condition, err := v.conditionBuilder.ConditionFor(context.Background(), key, op, values, v.builder) if err != nil { return "" } @@ -317,7 +318,7 @@ func (v *WhereClauseVisitor) VisitComparison(ctx *ComparisonContext) any { var conds []string for _, key := range keys { - condition, err := v.conditionBuilder.GetCondition(context.Background(), key, op, []any{value1, value2}, v.builder) + condition, err := v.conditionBuilder.ConditionFor(context.Background(), key, op, []any{value1, value2}, v.builder) if err != nil { return "" } @@ -366,7 +367,7 @@ func (v *WhereClauseVisitor) VisitComparison(ctx *ComparisonContext) any { var conds []string for _, key := range keys { - condition, err := v.conditionBuilder.GetCondition(context.Background(), key, op, value, v.builder) + condition, err := v.conditionBuilder.ConditionFor(context.Background(), key, op, value, v.builder) if err != nil { return "" } @@ -404,7 +405,7 @@ func (v *WhereClauseVisitor) VisitValueList(ctx *ValueListContext) any { func (v *WhereClauseVisitor) VisitFullText(ctx *FullTextContext) any { // remove quotes from the quotedText quotedText := strings.Trim(ctx.QUOTED_TEXT().GetText(), "\"'") - cond, err := v.conditionBuilder.GetCondition(context.Background(), v.fullTextColumn, qbtypes.FilterOperatorRegexp, quotedText, v.builder) + cond, err := v.conditionBuilder.ConditionFor(context.Background(), v.fullTextColumn, qbtypes.FilterOperatorRegexp, quotedText, v.builder) if err != nil { return "" } @@ -461,7 +462,7 @@ func (v *WhereClauseVisitor) VisitFunctionCall(ctx *FunctionCallContext) any { if strings.HasPrefix(key.Name, telemetrylogs.BodyJSONStringSearchPrefix) { fieldName, _ = telemetrylogs.GetBodyJSONKey(context.Background(), key, qbtypes.FilterOperatorUnknown, value) } else { - fieldName, _ = v.conditionBuilder.GetTableFieldName(context.Background(), key) + fieldName, _ = v.fieldMapper.FieldFor(context.Background(), key) } var cond string diff --git a/pkg/parser/grammar/where_clause_visitor_test.go b/pkg/parser/grammar/where_clause_visitor_test.go index 60b0f0d6b6..0bfe2c257e 100644 --- a/pkg/parser/grammar/where_clause_visitor_test.go +++ b/pkg/parser/grammar/where_clause_visitor_test.go @@ -1,645 +1 @@ package parser - -import ( - "reflect" - "strings" - "testing" - - "github.com/SigNoz/signoz/pkg/telemetrylogs" - "github.com/SigNoz/signoz/pkg/telemetrytraces" - "github.com/SigNoz/signoz/pkg/types/telemetrytypes" - sqlbuilder "github.com/huandu/go-sqlbuilder" -) - -func TestConvertToClickHouseLogsQuery(t *testing.T) { - cases := []struct { - name string - fieldKeys map[string][]*telemetrytypes.TelemetryFieldKey - query string - expectedSearchString string - expectedSearchArgs []any - }{ - { - name: "test-simple-service-name-filter", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "service.name": { - { - Name: "service.name", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextResource, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "service.name=redis", - expectedSearchString: "WHERE (resources_string['service.name'] = ?)", - expectedSearchArgs: []any{"redis"}, - }, - { - name: "test-simple-service-name-filter-with-materialised-column", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "service.name": { - { - Name: "service.name", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextResource, - FieldDataType: telemetrytypes.FieldDataTypeString, - Materialized: true, - }, - }, - }, - query: "service.name=redis", - expectedSearchString: "WHERE (resource_string_service$$name = ?)", - expectedSearchArgs: []any{"redis"}, - }, - { - name: "http-status-code-multiple-data-types", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "http.status_code": { - { - Name: "http.status_code", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeFloat64, - }, - { - Name: "http.status_code", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "http.status_code=200", - expectedSearchString: "WHERE (attributes_number['http.status_code'] = ? OR toFloat64OrNull(attributes_string['http.status_code']) = ?)", - expectedSearchArgs: []any{float64(200), float64(200)}, - }, - { - name: "http-status-code-multiple-data-types-between-operator", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "http.status_code": { - { - Name: "http.status_code", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeFloat64, - }, - { - Name: "http.status_code", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "http.status_code between 200 and 300", - expectedSearchString: "WHERE (attributes_number['http.status_code'] BETWEEN ? AND ? OR toFloat64OrNull(attributes_string['http.status_code']) BETWEEN ? AND ?)", - expectedSearchArgs: []any{float64(200), float64(300), float64(200), float64(300)}, - }, - { - name: "response-body-multiple-data-types-string-contains", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "response.body": { - { - Name: "response.body", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeFloat64, - }, - { - Name: "response.body", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "response.body contains error", - expectedSearchString: "WHERE (LOWER(toString(attributes_number['response.body'])) LIKE LOWER(?) OR LOWER(attributes_string['response.body']) LIKE LOWER(?))", - expectedSearchArgs: []any{"%error%", "%error%"}, - }, - { - name: "search-on-top-level-key", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "severity_text": { - { - Name: "severity_text", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextLog, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "severity_text=error", - expectedSearchString: "WHERE (severity_text = ?)", - expectedSearchArgs: []any{"error"}, - }, - { - name: "search-on-top-level-key-conflict-with-attribute", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "severity_text": { - { - Name: "severity_text", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextLog, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - { - Name: "severity_text", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "severity_text=error", - expectedSearchString: "WHERE (severity_text = ? OR attributes_string['severity_text'] = ?)", - expectedSearchArgs: []any{"error", "error"}, - }, - { - name: "collision-with-attribute-field-and-resource-attribute", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "k8s.namespace.name": { - { - Name: "k8s.namespace.name", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextResource, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - { - Name: "k8s.namespace.name", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "k8s.namespace.name=test", - expectedSearchString: "WHERE (resources_string['k8s.namespace.name'] = ? OR attributes_string['k8s.namespace.name'] = ?)", - expectedSearchArgs: []any{"test", "test"}, - }, - { - name: "collision-with-attribute-field-and-resource-attribute-materialised-column", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "k8s.namespace.name": { - { - Name: "k8s.namespace.name", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextResource, - FieldDataType: telemetrytypes.FieldDataTypeString, - Materialized: true, - }, - { - Name: "k8s.namespace.name", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "k8s.namespace.name=test", - expectedSearchString: "WHERE (resource_string_k8s$$namespace$$name = ? OR attributes_string['k8s.namespace.name'] = ?)", - expectedSearchArgs: []any{"test", "test"}, - }, - { - name: "boolean-collision-with-attribute-field-and-data-type-boolean", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "did_user_login": { - { - Name: "did_user_login", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeBool, - }, - { - Name: "did_user_login", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "did_user_login=true", - expectedSearchString: "WHERE (attributes_bool['did_user_login'] = ? OR attributes_string['did_user_login'] = ?)", - expectedSearchArgs: []any{true, "true"}, - }, - { - name: "regexp-search", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "k8s.namespace.name": { - { - Name: "k8s.namespace.name", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - "service.name": { - { - Name: "service.name", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextResource, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "k8s.namespace.name REGEXP 'test' OR service.name='redis'", - expectedSearchString: "WHERE (((match(attributes_string['k8s.namespace.name'], ?))) OR (resources_string['service.name'] = ?))", - expectedSearchArgs: []any{"test", "redis"}, - }, - { - name: "full-text-search-multiple-words", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{}, - query: "waiting for response", - expectedSearchString: "WHERE ((match(body, ?)) AND (match(body, ?)) AND (match(body, ?)))", - expectedSearchArgs: []any{"waiting", "for", "response"}, - }, - { - name: "full-text-search-with-phrase-search", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{}, - query: `"waiting for response"`, - expectedSearchString: "WHERE (match(body, ?))", - expectedSearchArgs: []any{"waiting for response"}, - }, - { - name: "full-text-search-with-word-and-not-word", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{}, - query: "error NOT buggy_app", - expectedSearchString: "WHERE ((match(body, ?)) AND NOT ((match(body, ?))))", - expectedSearchArgs: []any{"error", "buggy_app"}, - }, - { - name: "full-text-search-with-word-and-not-word-and-not-word", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{}, - query: "error NOT buggy_app NOT redis", - expectedSearchString: "WHERE ((match(body, ?)) AND NOT ((match(body, ?))) AND NOT ((match(body, ?))))", - expectedSearchArgs: []any{"error", "buggy_app", "redis"}, - }, - { - name: "full-text-search-with-word-and-not-word-and-not-word-tricky", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{}, - query: "error NOT buggy_app OR redis", - expectedSearchString: "WHERE (((match(body, ?)) AND NOT ((match(body, ?)))) OR (match(body, ?)))", - expectedSearchArgs: []any{"error", "buggy_app", "redis"}, - }, - { - name: "has-function", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "service.name": { - { - Name: "service.name", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextResource, - }, - }, - "payload.user_ids": { - { - Name: "payload.user_ids", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - }, - }, - }, - query: "has(service.name, 'redis')", - expectedSearchString: "WHERE (has(resources_string['service.name'], ?))", - expectedSearchArgs: []any{"redis"}, - }, - { - name: "has-from-list-of-values", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{}, - query: "has(body.payload.user_ids[*], 'u1292')", - expectedSearchString: "WHERE (has(JSONExtract(JSON_QUERY(body, '$.payload.user_ids[*]'), 'Array(String)'), ?))", - expectedSearchArgs: []any{"u1292"}, - }, - { - name: "body-json-search-that-also-has-attribute-with-same-name", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "http.status_code": { - { - Name: "http.status_code", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeFloat64, - Materialized: true, - }, - }, - }, - query: "body.http.status_code=200", - expectedSearchString: "WHERE (attribute_number_http$$status_code = ? OR JSONExtract(JSON_VALUE(body, '$.http.status_code'), 'Float64') = ?)", - expectedSearchArgs: []any{float64(200), float64(200)}, - }, - } - - for _, c := range cases { - t.Logf("running test %s", c.name) - whereClause, _, err := PrepareWhereClause(c.query, c.fieldKeys, telemetrylogs.NewConditionBuilder(), &telemetrytypes.TelemetryFieldKey{ - Name: "body", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextLog, - FieldDataType: telemetrytypes.FieldDataTypeString, - }) - - if err != nil { - t.Errorf("Error converting query to ClickHouse: %v", err) - } - chQuery, chQueryArgs := whereClause.BuildWithFlavor(sqlbuilder.ClickHouse) - - if chQuery != c.expectedSearchString { - t.Errorf("Expected %s, got %s", c.expectedSearchString, chQuery) - } - if !reflect.DeepEqual(chQueryArgs, c.expectedSearchArgs) { - for i, arg := range chQueryArgs { - t.Logf("Expected %v with type %T, got %v with type %T\n", c.expectedSearchArgs[i], c.expectedSearchArgs[i], arg, arg) - } - t.Errorf("Expected %v, got %v", c.expectedSearchArgs, chQueryArgs) - } - } -} - -func TestConvertToClickHouseSpansQuery(t *testing.T) { - cases := []struct { - name string - fieldKeys map[string][]*telemetrytypes.TelemetryFieldKey - query string - expectedSearchString string - expectedSearchArgs []any - }{ - { - name: "test-simple-service-name-filter", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "service.name": { - { - Name: "service.name", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextResource, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "service.name=redis", - expectedSearchString: "WHERE (resources_string['service.name'] = ?)", - expectedSearchArgs: []any{"redis"}, - }, - { - name: "test-simple-service-name-filter-with-materialised-column", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "service.name": { - { - Name: "service.name", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextResource, - FieldDataType: telemetrytypes.FieldDataTypeString, - Materialized: true, - }, - }, - }, - query: "service.name=redis", - expectedSearchString: "WHERE (resource_string_service$$name = ?)", - expectedSearchArgs: []any{"redis"}, - }, - { - name: "http-status-code-multiple-data-types", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "http.status_code": { - { - Name: "http.status_code", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeFloat64, - }, - { - Name: "http.status_code", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "http.status_code=200", - expectedSearchString: "WHERE (attributes_number['http.status_code'] = ? OR toFloat64OrNull(attributes_string['http.status_code']) = ?)", - expectedSearchArgs: []any{float64(200), float64(200)}, - }, - { - name: "http-status-code-multiple-data-types-between-operator", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "http.status_code": { - { - Name: "http.status_code", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeFloat64, - }, - { - Name: "http.status_code", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "http.status_code between 200 and 300", - expectedSearchString: "WHERE (attributes_number['http.status_code'] BETWEEN ? AND ? OR toFloat64OrNull(attributes_string['http.status_code']) BETWEEN ? AND ?)", - expectedSearchArgs: []any{float64(200), float64(300), float64(200), float64(300)}, - }, - { - name: "response-body-multiple-data-types-string-contains", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "response.body": { - { - Name: "response.body", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeFloat64, - }, - { - Name: "response.body", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "response.body contains error", - expectedSearchString: "WHERE (LOWER(toString(attributes_number['response.body'])) LIKE LOWER(?) OR LOWER(attributes_string['response.body']) LIKE LOWER(?))", - expectedSearchArgs: []any{"%error%", "%error%"}, - }, - { - name: "collision-with-attribute-field-and-resource-attribute", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "k8s.namespace.name": { - { - Name: "k8s.namespace.name", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextResource, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - { - Name: "k8s.namespace.name", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "k8s.namespace.name=test", - expectedSearchString: "WHERE (resources_string['k8s.namespace.name'] = ? OR attributes_string['k8s.namespace.name'] = ?)", - expectedSearchArgs: []any{"test", "test"}, - }, - { - name: "collision-with-attribute-field-and-resource-attribute-materialised-column", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "k8s.namespace.name": { - { - Name: "k8s.namespace.name", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextResource, - FieldDataType: telemetrytypes.FieldDataTypeString, - Materialized: true, - }, - { - Name: "k8s.namespace.name", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "k8s.namespace.name=test", - expectedSearchString: "WHERE (resource_string_k8s$$namespace$$name = ? OR attributes_string['k8s.namespace.name'] = ?)", - expectedSearchArgs: []any{"test", "test"}, - }, - { - name: "boolean-collision-with-attribute-field-and-data-type-boolean", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "did_user_login": { - { - Name: "did_user_login", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeBool, - }, - { - Name: "did_user_login", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "did_user_login=true", - expectedSearchString: "WHERE (attributes_bool['did_user_login'] = ? OR attributes_string['did_user_login'] = ?)", - expectedSearchArgs: []any{true, "true"}, - }, - { - name: "regexp-search", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{ - "k8s.namespace.name": { - { - Name: "k8s.namespace.name", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - "service.name": { - { - Name: "service.name", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextResource, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - }, - }, - query: "k8s.namespace.name REGEXP 'test' OR service.name='redis'", - expectedSearchString: "WHERE (((match(attributes_string['k8s.namespace.name'], ?))) OR (resources_string['service.name'] = ?))", - expectedSearchArgs: []any{"test", "redis"}, - }, - } - - for _, c := range cases { - whereClause, _, err := PrepareWhereClause(c.query, c.fieldKeys, telemetrytraces.NewConditionBuilder(), &telemetrytypes.TelemetryFieldKey{ - Name: "dummy", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextSpan, - FieldDataType: telemetrytypes.FieldDataTypeString, - }) - if err != nil { - t.Errorf("Error converting query to ClickHouse: %v", err) - } - - chQuery, chQueryArgs := whereClause.BuildWithFlavor(sqlbuilder.ClickHouse) - - if chQuery != c.expectedSearchString { - t.Errorf("Expected %s, got %s", c.expectedSearchString, chQuery) - } - if !reflect.DeepEqual(chQueryArgs, c.expectedSearchArgs) { - for i, arg := range chQueryArgs { - t.Logf("Expected %v with type %T, got %v with type %T\n", c.expectedSearchArgs[i], c.expectedSearchArgs[i], arg, arg) - } - t.Errorf("Expected %v, got %v", c.expectedSearchArgs, chQueryArgs) - } - } -} - -func TestConvertToClickHouseSpansQueryWithErrors(t *testing.T) { - cases := []struct { - name string - fieldKeys map[string][]*telemetrytypes.TelemetryFieldKey - query string - expectedSearchString string - expectedSearchArgs []any - expectedErrorSubString string - expectedWarnings []error - }{ - { - name: "has-function-with-multiple-values", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{}, - query: "key.that.does.not.exist = 'redis'", - expectedSearchString: "", - expectedSearchArgs: []any{}, - expectedErrorSubString: "key `key.that.does.not.exist` not found", - expectedWarnings: []error{}, - }, - { - name: "unknown-function", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{}, - query: "unknown.function()", - expectedSearchString: "", - expectedSearchArgs: []any{}, - expectedErrorSubString: "expecting {'(', NOT, HAS, HASANY, HASALL, QUOTED_TEXT, KEY, FREETEXT}", - expectedWarnings: []error{}, - }, - { - name: "has-function-not-enough-params", - fieldKeys: map[string][]*telemetrytypes.TelemetryFieldKey{}, - query: "has(key.that.does.not.exist)", - expectedSearchString: "", - expectedSearchArgs: []any{}, - expectedErrorSubString: "function `has` expects key and value parameters", - expectedWarnings: []error{}, - }, - } - - for _, c := range cases { - _, warnings, err := PrepareWhereClause(c.query, c.fieldKeys, telemetrytraces.NewConditionBuilder(), &telemetrytypes.TelemetryFieldKey{ - Name: "dummy", - Signal: telemetrytypes.SignalTraces, - FieldContext: telemetrytypes.FieldContextSpan, - FieldDataType: telemetrytypes.FieldDataTypeString, - }) - if err != nil { - if !strings.Contains(err.Error(), c.expectedErrorSubString) { - t.Errorf("Expected error %v, got %v", c.expectedErrorSubString, err) - } - } - - if len(warnings) != len(c.expectedWarnings) { - t.Errorf("Expected %d warnings, got %d", len(c.expectedWarnings), len(warnings)) - } - for i, warning := range warnings { - if warning.Error() != c.expectedWarnings[i].Error() { - t.Errorf("Expected warning %d to be %v, got %v", i, c.expectedWarnings[i], warning) - } - } - } -} diff --git a/pkg/telemetrylogs/condition_builder.go b/pkg/telemetrylogs/condition_builder.go index 63cb5b64d8..4e432a5ec2 100644 --- a/pkg/telemetrylogs/condition_builder.go +++ b/pkg/telemetrylogs/condition_builder.go @@ -3,237 +3,37 @@ package telemetrylogs import ( "context" "fmt" - "strconv" "strings" schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator" qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" "github.com/SigNoz/signoz/pkg/types/telemetrytypes" - "github.com/SigNoz/signoz/pkg/valuer" "github.com/huandu/go-sqlbuilder" ) -var ( - logsV2Columns = map[string]*schema.Column{ - "ts_bucket_start": {Name: "ts_bucket_start", Type: schema.ColumnTypeUInt64}, - "resource_fingerprint": {Name: "resource_fingerprint", Type: schema.ColumnTypeString}, - - "timestamp": {Name: "timestamp", Type: schema.ColumnTypeUInt64}, - "observed_timestamp": {Name: "observed_timestamp", Type: schema.ColumnTypeUInt64}, - "id": {Name: "id", Type: schema.ColumnTypeString}, - "trace_id": {Name: "trace_id", Type: schema.ColumnTypeString}, - "span_id": {Name: "span_id", Type: schema.ColumnTypeString}, - "trace_flags": {Name: "trace_flags", Type: schema.ColumnTypeUInt32}, - "severity_text": {Name: "severity_text", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "severity_number": {Name: "severity_number", Type: schema.ColumnTypeUInt8}, - "body": {Name: "body", Type: schema.ColumnTypeString}, - "attributes_string": {Name: "attributes_string", Type: schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeString, - }}, - "attributes_number": {Name: "attributes_number", Type: schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeFloat64, - }}, - "attributes_bool": {Name: "attributes_bool", Type: schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeBool, - }}, - "resources_string": {Name: "resources_string", Type: schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeString, - }}, - "scope_name": {Name: "scope_name", Type: schema.ColumnTypeString}, - "scope_version": {Name: "scope_version", Type: schema.ColumnTypeString}, - "scope_string": {Name: "scope_string", Type: schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeString, - }}, - } -) - -var _ qbtypes.ConditionBuilder = &conditionBuilder{} - type conditionBuilder struct { + fm qbtypes.FieldMapper } -func NewConditionBuilder() qbtypes.ConditionBuilder { - return &conditionBuilder{} +func NewConditionBuilder(fm qbtypes.FieldMapper) *conditionBuilder { + return &conditionBuilder{fm: fm} } -func (c *conditionBuilder) GetColumn(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (*schema.Column, error) { - - switch key.FieldContext { - case telemetrytypes.FieldContextResource: - return logsV2Columns["resources_string"], nil - case telemetrytypes.FieldContextScope: - switch key.Name { - case "name", "scope.name", "scope_name": - return logsV2Columns["scope_name"], nil - case "version", "scope.version", "scope_version": - return logsV2Columns["scope_version"], nil - } - return logsV2Columns["scope_string"], nil - case telemetrytypes.FieldContextAttribute: - switch key.FieldDataType { - case telemetrytypes.FieldDataTypeString: - return logsV2Columns["attributes_string"], nil - case telemetrytypes.FieldDataTypeInt64, telemetrytypes.FieldDataTypeFloat64, telemetrytypes.FieldDataTypeNumber: - return logsV2Columns["attributes_number"], nil - case telemetrytypes.FieldDataTypeBool: - return logsV2Columns["attributes_bool"], nil - } - case telemetrytypes.FieldContextLog, telemetrytypes.FieldContextUnspecified: - col, ok := logsV2Columns[key.Name] - if !ok { - // check if the key has body JSON search - if strings.HasPrefix(key.Name, BodyJSONStringSearchPrefix) { - return logsV2Columns["body"], nil - } - return nil, qbtypes.ErrColumnNotFound - } - return col, nil - } - - return nil, qbtypes.ErrColumnNotFound -} - -func (c *conditionBuilder) GetTableFieldName(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (string, error) { - column, err := c.GetColumn(ctx, key) - if err != nil { - return "", err - } - - switch column.Type { - case schema.ColumnTypeString, - schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - schema.ColumnTypeUInt64, - schema.ColumnTypeUInt32, - schema.ColumnTypeUInt8: - return column.Name, nil - case schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeString, - }: - // a key could have been materialized, if so return the materialized column name - if key.Materialized { - return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil - } - return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil - case schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeFloat64, - }: - // a key could have been materialized, if so return the materialized column name - if key.Materialized { - return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil - } - return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil - case schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeBool, - }: - // a key could have been materialized, if so return the materialized column name - if key.Materialized { - return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil - } - return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil - } - // should not reach here - return column.Name, nil -} - -func parseStrValue(valueStr string, operator qbtypes.FilterOperator) (telemetrytypes.FieldDataType, any) { - - valueType := telemetrytypes.FieldDataTypeString - - // return the value as is for the following operators - // as they are always string - if operator == qbtypes.FilterOperatorContains || operator == qbtypes.FilterOperatorNotContains || - operator == qbtypes.FilterOperatorRegexp || operator == qbtypes.FilterOperatorNotRegexp || - operator == qbtypes.FilterOperatorLike || operator == qbtypes.FilterOperatorNotLike || - operator == qbtypes.FilterOperatorILike || operator == qbtypes.FilterOperatorNotILike { - return valueType, valueStr - } - - var err error - var parsedValue any - if parsedValue, err = strconv.ParseBool(valueStr); err == nil { - valueType = telemetrytypes.FieldDataTypeBool - } else if parsedValue, err = strconv.ParseInt(valueStr, 10, 64); err == nil { - valueType = telemetrytypes.FieldDataTypeInt64 - } else if parsedValue, err = strconv.ParseFloat(valueStr, 64); err == nil { - valueType = telemetrytypes.FieldDataTypeFloat64 - } else { - parsedValue = valueStr - valueType = telemetrytypes.FieldDataTypeString - } - - return valueType, parsedValue -} - -func inferDataType(value any, operator qbtypes.FilterOperator, key *telemetrytypes.TelemetryFieldKey) (telemetrytypes.FieldDataType, any) { - // check if the value is a int, float, string, bool - valueType := telemetrytypes.FieldDataTypeUnspecified - switch v := value.(type) { - case []any: - // take the first element and infer the type - if len(v) > 0 { - valueType, _ = inferDataType(v[0], operator, key) - } - return valueType, v - case uint8, uint16, uint32, uint64, int, int8, int16, int32, int64: - valueType = telemetrytypes.FieldDataTypeInt64 - case float32, float64: - valueType = telemetrytypes.FieldDataTypeFloat64 - case string: - valueType, value = parseStrValue(v, operator) - case bool: - valueType = telemetrytypes.FieldDataTypeBool - } - - // check if it is array - if strings.HasSuffix(key.Name, "[*]") { - valueType = telemetrytypes.FieldDataType{String: valuer.NewString(fmt.Sprintf("[]%s", valueType.StringValue()))} - } - - return valueType, value -} - -func GetBodyJSONKey(_ context.Context, key *telemetrytypes.TelemetryFieldKey, operator qbtypes.FilterOperator, value any) (string, any) { - - dataType, value := inferDataType(value, operator, key) - - // all body json keys are of the form body. - path := strings.Join(strings.Split(key.Name, ".")[1:], ".") - - // for array types, we need to extract the value from the JSON_QUERY - if dataType == telemetrytypes.FieldDataTypeArrayInt64 || - dataType == telemetrytypes.FieldDataTypeArrayFloat64 || - dataType == telemetrytypes.FieldDataTypeArrayString || - dataType == telemetrytypes.FieldDataTypeArrayBool || - dataType == telemetrytypes.FieldDataTypeArrayNumber { - return fmt.Sprintf("JSONExtract(JSON_QUERY(body, '$.%s'), '%s')", path, dataType.CHDataType()), value - } - - // for all other types, we need to extract the value from the JSON_VALUE - return fmt.Sprintf("JSONExtract(JSON_VALUE(body, '$.%s'), '%s')", path, dataType.CHDataType()), value -} - -func (c *conditionBuilder) GetCondition( +func (c *conditionBuilder) ConditionFor( ctx context.Context, key *telemetrytypes.TelemetryFieldKey, operator qbtypes.FilterOperator, value any, sb *sqlbuilder.SelectBuilder, ) (string, error) { - column, err := c.GetColumn(ctx, key) + + column, err := c.fm.ColumnFor(ctx, key) if err != nil { return "", err } - tblFieldName, err := c.GetTableFieldName(ctx, key) + tblFieldName, err := c.fm.FieldFor(ctx, key) if err != nil { return "", err } @@ -316,7 +116,6 @@ func (c *conditionBuilder) GetCondition( return sb.NotIn(tblFieldName, values...), nil // exists and not exists - // but how could you live and have no story to tell // in the UI based query builder, `exists` and `not exists` are used for // key membership checks, so depending on the column type, the condition changes case qbtypes.FilterOperatorExists, qbtypes.FilterOperatorNotExists: diff --git a/pkg/telemetrylogs/condition_builder_test.go b/pkg/telemetrylogs/condition_builder_test.go index 45049a6c06..3adb4f64de 100644 --- a/pkg/telemetrylogs/condition_builder_test.go +++ b/pkg/telemetrylogs/condition_builder_test.go @@ -4,7 +4,6 @@ import ( "context" "testing" - schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator" qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" "github.com/SigNoz/signoz/pkg/types/telemetrytypes" "github.com/huandu/go-sqlbuilder" @@ -12,260 +11,8 @@ import ( "github.com/stretchr/testify/require" ) -func TestGetColumn(t *testing.T) { +func TestConditionFor(t *testing.T) { ctx := context.Background() - conditionBuilder := NewConditionBuilder() - - testCases := []struct { - name string - key telemetrytypes.TelemetryFieldKey - expectedCol *schema.Column - expectedError error - }{ - { - name: "Resource field", - key: telemetrytypes.TelemetryFieldKey{ - Name: "service.name", - FieldContext: telemetrytypes.FieldContextResource, - }, - expectedCol: logsV2Columns["resources_string"], - expectedError: nil, - }, - { - name: "Scope field - scope name", - key: telemetrytypes.TelemetryFieldKey{ - Name: "name", - FieldContext: telemetrytypes.FieldContextScope, - }, - expectedCol: logsV2Columns["scope_name"], - expectedError: nil, - }, - { - name: "Scope field - scope.name", - key: telemetrytypes.TelemetryFieldKey{ - Name: "scope.name", - FieldContext: telemetrytypes.FieldContextScope, - }, - expectedCol: logsV2Columns["scope_name"], - expectedError: nil, - }, - { - name: "Scope field - scope_name", - key: telemetrytypes.TelemetryFieldKey{ - Name: "scope_name", - FieldContext: telemetrytypes.FieldContextScope, - }, - expectedCol: logsV2Columns["scope_name"], - expectedError: nil, - }, - { - name: "Scope field - version", - key: telemetrytypes.TelemetryFieldKey{ - Name: "version", - FieldContext: telemetrytypes.FieldContextScope, - }, - expectedCol: logsV2Columns["scope_version"], - expectedError: nil, - }, - { - name: "Scope field - other scope field", - key: telemetrytypes.TelemetryFieldKey{ - Name: "custom.scope.field", - FieldContext: telemetrytypes.FieldContextScope, - }, - expectedCol: logsV2Columns["scope_string"], - expectedError: nil, - }, - { - name: "Attribute field - string type", - key: telemetrytypes.TelemetryFieldKey{ - Name: "user.id", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - expectedCol: logsV2Columns["attributes_string"], - expectedError: nil, - }, - { - name: "Attribute field - number type", - key: telemetrytypes.TelemetryFieldKey{ - Name: "request.size", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeNumber, - }, - expectedCol: logsV2Columns["attributes_number"], - expectedError: nil, - }, - { - name: "Attribute field - int64 type", - key: telemetrytypes.TelemetryFieldKey{ - Name: "request.duration", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeInt64, - }, - expectedCol: logsV2Columns["attributes_number"], - expectedError: nil, - }, - { - name: "Attribute field - float64 type", - key: telemetrytypes.TelemetryFieldKey{ - Name: "cpu.utilization", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeFloat64, - }, - expectedCol: logsV2Columns["attributes_number"], - expectedError: nil, - }, - { - name: "Attribute field - bool type", - key: telemetrytypes.TelemetryFieldKey{ - Name: "request.success", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeBool, - }, - expectedCol: logsV2Columns["attributes_bool"], - expectedError: nil, - }, - { - name: "Log field - timestamp", - key: telemetrytypes.TelemetryFieldKey{ - Name: "timestamp", - FieldContext: telemetrytypes.FieldContextLog, - }, - expectedCol: logsV2Columns["timestamp"], - expectedError: nil, - }, - { - name: "Log field - body", - key: telemetrytypes.TelemetryFieldKey{ - Name: "body", - FieldContext: telemetrytypes.FieldContextLog, - }, - expectedCol: logsV2Columns["body"], - expectedError: nil, - }, - { - name: "Log field - nonexistent", - key: telemetrytypes.TelemetryFieldKey{ - Name: "nonexistent_field", - FieldContext: telemetrytypes.FieldContextLog, - }, - expectedCol: nil, - expectedError: qbtypes.ErrColumnNotFound, - }, - { - name: "did_user_login", - key: telemetrytypes.TelemetryFieldKey{ - Name: "did_user_login", - Signal: telemetrytypes.SignalLogs, - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeBool, - }, - expectedCol: logsV2Columns["attributes_bool"], - expectedError: nil, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - col, err := conditionBuilder.GetColumn(ctx, &tc.key) - - if tc.expectedError != nil { - assert.Equal(t, tc.expectedError, err) - } else { - require.NoError(t, err) - assert.Equal(t, tc.expectedCol, col) - } - }) - } -} - -func TestGetFieldKeyName(t *testing.T) { - ctx := context.Background() - conditionBuilder := &conditionBuilder{} - - testCases := []struct { - name string - key telemetrytypes.TelemetryFieldKey - expectedResult string - expectedError error - }{ - { - name: "Simple column type - timestamp", - key: telemetrytypes.TelemetryFieldKey{ - Name: "timestamp", - FieldContext: telemetrytypes.FieldContextLog, - }, - expectedResult: "timestamp", - expectedError: nil, - }, - { - name: "Map column type - string attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "user.id", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - expectedResult: "attributes_string['user.id']", - expectedError: nil, - }, - { - name: "Map column type - number attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "request.size", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeNumber, - }, - expectedResult: "attributes_number['request.size']", - expectedError: nil, - }, - { - name: "Map column type - bool attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "request.success", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeBool, - }, - expectedResult: "attributes_bool['request.success']", - expectedError: nil, - }, - { - name: "Map column type - resource attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "service.name", - FieldContext: telemetrytypes.FieldContextResource, - }, - expectedResult: "resources_string['service.name']", - expectedError: nil, - }, - { - name: "Non-existent column", - key: telemetrytypes.TelemetryFieldKey{ - Name: "nonexistent_field", - FieldContext: telemetrytypes.FieldContextLog, - }, - expectedResult: "", - expectedError: qbtypes.ErrColumnNotFound, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - result, err := conditionBuilder.GetTableFieldName(ctx, &tc.key) - - if tc.expectedError != nil { - assert.Equal(t, tc.expectedError, err) - } else { - require.NoError(t, err) - assert.Equal(t, tc.expectedResult, result) - } - }) - } -} - -func TestGetCondition(t *testing.T) { - ctx := context.Background() - conditionBuilder := NewConditionBuilder() testCases := []struct { name string @@ -548,10 +295,13 @@ func TestGetCondition(t *testing.T) { }, } + fm := NewFieldMapper() + conditionBuilder := NewConditionBuilder(fm) + for _, tc := range testCases { sb := sqlbuilder.NewSelectBuilder() t.Run(tc.name, func(t *testing.T) { - cond, err := conditionBuilder.GetCondition(ctx, &tc.key, tc.operator, tc.value, sb) + cond, err := conditionBuilder.ConditionFor(ctx, &tc.key, tc.operator, tc.value, sb) sb.Where(cond) if tc.expectedError != nil { @@ -565,13 +315,12 @@ func TestGetCondition(t *testing.T) { } } -func TestGetConditionMultiple(t *testing.T) { +func TestConditionForMultipleKeys(t *testing.T) { ctx := context.Background() - conditionBuilder := NewConditionBuilder() testCases := []struct { name string - keys []*telemetrytypes.TelemetryFieldKey + keys []telemetrytypes.TelemetryFieldKey operator qbtypes.FilterOperator value any expectedSQL string @@ -579,7 +328,7 @@ func TestGetConditionMultiple(t *testing.T) { }{ { name: "Equal operator - string", - keys: []*telemetrytypes.TelemetryFieldKey{ + keys: []telemetrytypes.TelemetryFieldKey{ { Name: "body", FieldContext: telemetrytypes.FieldContextLog, @@ -596,12 +345,15 @@ func TestGetConditionMultiple(t *testing.T) { }, } + fm := NewFieldMapper() + conditionBuilder := NewConditionBuilder(fm) + for _, tc := range testCases { sb := sqlbuilder.NewSelectBuilder() t.Run(tc.name, func(t *testing.T) { var err error for _, key := range tc.keys { - cond, err := conditionBuilder.GetCondition(ctx, key, tc.operator, tc.value, sb) + cond, err := conditionBuilder.ConditionFor(ctx, &key, tc.operator, tc.value, sb) sb.Where(cond) if err != nil { t.Fatalf("Error getting condition for key %s: %v", key.Name, err) @@ -619,9 +371,8 @@ func TestGetConditionMultiple(t *testing.T) { } } -func TestGetConditionJSONBodySearch(t *testing.T) { +func TestConditionForJSONBodySearch(t *testing.T) { ctx := context.Background() - conditionBuilder := NewConditionBuilder() testCases := []struct { name string @@ -793,10 +544,13 @@ func TestGetConditionJSONBodySearch(t *testing.T) { }, } + fm := NewFieldMapper() + conditionBuilder := NewConditionBuilder(fm) + for _, tc := range testCases { sb := sqlbuilder.NewSelectBuilder() t.Run(tc.name, func(t *testing.T) { - cond, err := conditionBuilder.GetCondition(ctx, &tc.key, tc.operator, tc.value, sb) + cond, err := conditionBuilder.ConditionFor(ctx, &tc.key, tc.operator, tc.value, sb) sb.Where(cond) if tc.expectedError != nil { diff --git a/pkg/telemetrylogs/field_mapper.go b/pkg/telemetrylogs/field_mapper.go new file mode 100644 index 0000000000..a52e68a45d --- /dev/null +++ b/pkg/telemetrylogs/field_mapper.go @@ -0,0 +1,194 @@ +package telemetrylogs + +import ( + "context" + "fmt" + "strings" + + schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator" + "github.com/SigNoz/signoz/pkg/errors" + qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" + "github.com/SigNoz/signoz/pkg/types/telemetrytypes" + + "golang.org/x/exp/maps" +) + +var ( + logsV2Columns = map[string]*schema.Column{ + "ts_bucket_start": {Name: "ts_bucket_start", Type: schema.ColumnTypeUInt64}, + "resource_fingerprint": {Name: "resource_fingerprint", Type: schema.ColumnTypeString}, + + "timestamp": {Name: "timestamp", Type: schema.ColumnTypeUInt64}, + "observed_timestamp": {Name: "observed_timestamp", Type: schema.ColumnTypeUInt64}, + "id": {Name: "id", Type: schema.ColumnTypeString}, + "trace_id": {Name: "trace_id", Type: schema.ColumnTypeString}, + "span_id": {Name: "span_id", Type: schema.ColumnTypeString}, + "trace_flags": {Name: "trace_flags", Type: schema.ColumnTypeUInt32}, + "severity_text": {Name: "severity_text", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "severity_number": {Name: "severity_number", Type: schema.ColumnTypeUInt8}, + "body": {Name: "body", Type: schema.ColumnTypeString}, + "attributes_string": {Name: "attributes_string", Type: schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeString, + }}, + "attributes_number": {Name: "attributes_number", Type: schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeFloat64, + }}, + "attributes_bool": {Name: "attributes_bool", Type: schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeBool, + }}, + "resources_string": {Name: "resources_string", Type: schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeString, + }}, + "scope_name": {Name: "scope_name", Type: schema.ColumnTypeString}, + "scope_version": {Name: "scope_version", Type: schema.ColumnTypeString}, + "scope_string": {Name: "scope_string", Type: schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeString, + }}, + } +) + +type fieldMapper struct{} + +func NewFieldMapper() qbtypes.FieldMapper { + return &fieldMapper{} +} + +func (m *fieldMapper) getColumn(_ context.Context, key *telemetrytypes.TelemetryFieldKey) (*schema.Column, error) { + + switch key.FieldContext { + case telemetrytypes.FieldContextResource: + return logsV2Columns["resources_string"], nil + case telemetrytypes.FieldContextScope: + switch key.Name { + case "name", "scope.name", "scope_name": + return logsV2Columns["scope_name"], nil + case "version", "scope.version", "scope_version": + return logsV2Columns["scope_version"], nil + } + return logsV2Columns["scope_string"], nil + case telemetrytypes.FieldContextAttribute: + switch key.FieldDataType { + case telemetrytypes.FieldDataTypeString: + return logsV2Columns["attributes_string"], nil + case telemetrytypes.FieldDataTypeInt64, telemetrytypes.FieldDataTypeFloat64, telemetrytypes.FieldDataTypeNumber: + return logsV2Columns["attributes_number"], nil + case telemetrytypes.FieldDataTypeBool: + return logsV2Columns["attributes_bool"], nil + } + case telemetrytypes.FieldContextLog, telemetrytypes.FieldContextUnspecified: + col, ok := logsV2Columns[key.Name] + if !ok { + // check if the key has body JSON search + if strings.HasPrefix(key.Name, BodyJSONStringSearchPrefix) { + return logsV2Columns["body"], nil + } + return nil, qbtypes.ErrColumnNotFound + } + return col, nil + } + + return nil, qbtypes.ErrColumnNotFound +} + +func (m *fieldMapper) FieldFor(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (string, error) { + column, err := m.getColumn(ctx, key) + if err != nil { + return "", err + } + + switch column.Type { + case schema.ColumnTypeString, + schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + schema.ColumnTypeUInt64, + schema.ColumnTypeUInt32, + schema.ColumnTypeUInt8: + return column.Name, nil + case schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeString, + }: + // a key could have been materialized, if so return the materialized column name + if key.Materialized { + return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil + } + return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil + case schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeFloat64, + }: + // a key could have been materialized, if so return the materialized column name + if key.Materialized { + return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil + } + return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil + case schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeBool, + }: + // a key could have been materialized, if so return the materialized column name + if key.Materialized { + return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil + } + return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil + } + // should not reach here + return column.Name, nil +} + +func (m *fieldMapper) ColumnFor(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (*schema.Column, error) { + return m.getColumn(ctx, key) +} + +func (m *fieldMapper) ColumnExpressionFor( + ctx context.Context, + field *telemetrytypes.TelemetryFieldKey, + keys map[string][]*telemetrytypes.TelemetryFieldKey, +) (string, error) { + + colName, err := m.FieldFor(ctx, field) + if errors.Is(err, qbtypes.ErrColumnNotFound) { + // the key didn't have the right context to be added to the query + // we try to use the context we know of + keysForField := keys[field.Name] + if len(keysForField) == 0 { + // is it a static field? + if _, ok := logsV2Columns[field.Name]; ok { + // if it is, attach the column name directly + field.FieldContext = telemetrytypes.FieldContextSpan + colName, _ = m.FieldFor(ctx, field) + } else { + // - the context is not provided + // - there are not keys for the field + // - it is not a static field + // - the next best thing to do is see if there is a typo + // and suggest a correction + correction, found := telemetrytypes.SuggestCorrection(field.Name, maps.Keys(keys)) + if found { + // we found a close match, in the error message send the suggestion + return "", errors.Wrapf(err, errors.TypeInvalidInput, errors.CodeInvalidInput, correction) + } else { + // not even a close match, return an error + return "", err + } + } + } else if len(keysForField) == 1 { + // we have a single key for the field, use it + colName, _ = m.FieldFor(ctx, keysForField[0]) + } else { + // select any non-empty value from the keys + args := []string{} + for _, key := range keysForField { + colName, _ = m.FieldFor(ctx, key) + args = append(args, fmt.Sprintf("toString(%s) != '', toString(%s)", colName, colName)) + } + colName = fmt.Sprintf("multiIf(%s)", strings.Join(args, ", ")) + } + } + + return fmt.Sprintf("%s AS `%s`", colName, field.Name), nil +} diff --git a/pkg/telemetrylogs/field_mapper_test.go b/pkg/telemetrylogs/field_mapper_test.go new file mode 100644 index 0000000000..f60a50a9eb --- /dev/null +++ b/pkg/telemetrylogs/field_mapper_test.go @@ -0,0 +1,265 @@ +package telemetrylogs + +import ( + "context" + "testing" + + schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator" + qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" + "github.com/SigNoz/signoz/pkg/types/telemetrytypes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGetColumn(t *testing.T) { + ctx := context.Background() + + testCases := []struct { + name string + key telemetrytypes.TelemetryFieldKey + expectedCol *schema.Column + expectedError error + }{ + { + name: "Resource field", + key: telemetrytypes.TelemetryFieldKey{ + Name: "service.name", + FieldContext: telemetrytypes.FieldContextResource, + }, + expectedCol: logsV2Columns["resources_string"], + expectedError: nil, + }, + { + name: "Scope field - scope name", + key: telemetrytypes.TelemetryFieldKey{ + Name: "name", + FieldContext: telemetrytypes.FieldContextScope, + }, + expectedCol: logsV2Columns["scope_name"], + expectedError: nil, + }, + { + name: "Scope field - scope.name", + key: telemetrytypes.TelemetryFieldKey{ + Name: "scope.name", + FieldContext: telemetrytypes.FieldContextScope, + }, + expectedCol: logsV2Columns["scope_name"], + expectedError: nil, + }, + { + name: "Scope field - scope_name", + key: telemetrytypes.TelemetryFieldKey{ + Name: "scope_name", + FieldContext: telemetrytypes.FieldContextScope, + }, + expectedCol: logsV2Columns["scope_name"], + expectedError: nil, + }, + { + name: "Scope field - version", + key: telemetrytypes.TelemetryFieldKey{ + Name: "version", + FieldContext: telemetrytypes.FieldContextScope, + }, + expectedCol: logsV2Columns["scope_version"], + expectedError: nil, + }, + { + name: "Scope field - other scope field", + key: telemetrytypes.TelemetryFieldKey{ + Name: "custom.scope.field", + FieldContext: telemetrytypes.FieldContextScope, + }, + expectedCol: logsV2Columns["scope_string"], + expectedError: nil, + }, + { + name: "Attribute field - string type", + key: telemetrytypes.TelemetryFieldKey{ + Name: "user.id", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeString, + }, + expectedCol: logsV2Columns["attributes_string"], + expectedError: nil, + }, + { + name: "Attribute field - number type", + key: telemetrytypes.TelemetryFieldKey{ + Name: "request.size", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeNumber, + }, + expectedCol: logsV2Columns["attributes_number"], + expectedError: nil, + }, + { + name: "Attribute field - int64 type", + key: telemetrytypes.TelemetryFieldKey{ + Name: "request.duration", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeInt64, + }, + expectedCol: logsV2Columns["attributes_number"], + expectedError: nil, + }, + { + name: "Attribute field - float64 type", + key: telemetrytypes.TelemetryFieldKey{ + Name: "cpu.utilization", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeFloat64, + }, + expectedCol: logsV2Columns["attributes_number"], + expectedError: nil, + }, + { + name: "Attribute field - bool type", + key: telemetrytypes.TelemetryFieldKey{ + Name: "request.success", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeBool, + }, + expectedCol: logsV2Columns["attributes_bool"], + expectedError: nil, + }, + { + name: "Log field - timestamp", + key: telemetrytypes.TelemetryFieldKey{ + Name: "timestamp", + FieldContext: telemetrytypes.FieldContextLog, + }, + expectedCol: logsV2Columns["timestamp"], + expectedError: nil, + }, + { + name: "Log field - body", + key: telemetrytypes.TelemetryFieldKey{ + Name: "body", + FieldContext: telemetrytypes.FieldContextLog, + }, + expectedCol: logsV2Columns["body"], + expectedError: nil, + }, + { + name: "Log field - nonexistent", + key: telemetrytypes.TelemetryFieldKey{ + Name: "nonexistent_field", + FieldContext: telemetrytypes.FieldContextLog, + }, + expectedCol: nil, + expectedError: qbtypes.ErrColumnNotFound, + }, + { + name: "did_user_login", + key: telemetrytypes.TelemetryFieldKey{ + Name: "did_user_login", + Signal: telemetrytypes.SignalLogs, + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeBool, + }, + expectedCol: logsV2Columns["attributes_bool"], + expectedError: nil, + }, + } + + fm := NewFieldMapper() + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + col, err := fm.ColumnFor(ctx, &tc.key) + + if tc.expectedError != nil { + assert.Equal(t, tc.expectedError, err) + } else { + require.NoError(t, err) + assert.Equal(t, tc.expectedCol, col) + } + }) + } +} + +func TestGetFieldKeyName(t *testing.T) { + ctx := context.Background() + + testCases := []struct { + name string + key telemetrytypes.TelemetryFieldKey + expectedResult string + expectedError error + }{ + { + name: "Simple column type - timestamp", + key: telemetrytypes.TelemetryFieldKey{ + Name: "timestamp", + FieldContext: telemetrytypes.FieldContextLog, + }, + expectedResult: "timestamp", + expectedError: nil, + }, + { + name: "Map column type - string attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "user.id", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeString, + }, + expectedResult: "attributes_string['user.id']", + expectedError: nil, + }, + { + name: "Map column type - number attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "request.size", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeNumber, + }, + expectedResult: "attributes_number['request.size']", + expectedError: nil, + }, + { + name: "Map column type - bool attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "request.success", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeBool, + }, + expectedResult: "attributes_bool['request.success']", + expectedError: nil, + }, + { + name: "Map column type - resource attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "service.name", + FieldContext: telemetrytypes.FieldContextResource, + }, + expectedResult: "resources_string['service.name']", + expectedError: nil, + }, + { + name: "Non-existent column", + key: telemetrytypes.TelemetryFieldKey{ + Name: "nonexistent_field", + FieldContext: telemetrytypes.FieldContextLog, + }, + expectedResult: "", + expectedError: qbtypes.ErrColumnNotFound, + }, + } + + fm := NewFieldMapper() + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := fm.FieldFor(ctx, &tc.key) + + if tc.expectedError != nil { + assert.Equal(t, tc.expectedError, err) + } else { + require.NoError(t, err) + assert.Equal(t, tc.expectedResult, result) + } + }) + } +} diff --git a/pkg/telemetrylogs/json.go b/pkg/telemetrylogs/json.go new file mode 100644 index 0000000000..987018a2c7 --- /dev/null +++ b/pkg/telemetrylogs/json.go @@ -0,0 +1,89 @@ +package telemetrylogs + +import ( + "context" + "fmt" + "strconv" + "strings" + + qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" + "github.com/SigNoz/signoz/pkg/types/telemetrytypes" + "github.com/SigNoz/signoz/pkg/valuer" +) + +func parseStrValue(valueStr string, operator qbtypes.FilterOperator) (telemetrytypes.FieldDataType, any) { + + valueType := telemetrytypes.FieldDataTypeString + + // return the value as is for the following operators + // as they are always string + if operator == qbtypes.FilterOperatorContains || operator == qbtypes.FilterOperatorNotContains || + operator == qbtypes.FilterOperatorRegexp || operator == qbtypes.FilterOperatorNotRegexp || + operator == qbtypes.FilterOperatorLike || operator == qbtypes.FilterOperatorNotLike || + operator == qbtypes.FilterOperatorILike || operator == qbtypes.FilterOperatorNotILike { + return valueType, valueStr + } + + var err error + var parsedValue any + if parsedValue, err = strconv.ParseBool(valueStr); err == nil { + valueType = telemetrytypes.FieldDataTypeBool + } else if parsedValue, err = strconv.ParseInt(valueStr, 10, 64); err == nil { + valueType = telemetrytypes.FieldDataTypeInt64 + } else if parsedValue, err = strconv.ParseFloat(valueStr, 64); err == nil { + valueType = telemetrytypes.FieldDataTypeFloat64 + } else { + parsedValue = valueStr + valueType = telemetrytypes.FieldDataTypeString + } + + return valueType, parsedValue +} + +func inferDataType(value any, operator qbtypes.FilterOperator, key *telemetrytypes.TelemetryFieldKey) (telemetrytypes.FieldDataType, any) { + // check if the value is a int, float, string, bool + valueType := telemetrytypes.FieldDataTypeUnspecified + switch v := value.(type) { + case []any: + // take the first element and infer the type + if len(v) > 0 { + valueType, _ = inferDataType(v[0], operator, key) + } + return valueType, v + case uint8, uint16, uint32, uint64, int, int8, int16, int32, int64: + valueType = telemetrytypes.FieldDataTypeInt64 + case float32, float64: + valueType = telemetrytypes.FieldDataTypeFloat64 + case string: + valueType, value = parseStrValue(v, operator) + case bool: + valueType = telemetrytypes.FieldDataTypeBool + } + + // check if it is array + if strings.HasSuffix(key.Name, "[*]") { + valueType = telemetrytypes.FieldDataType{String: valuer.NewString(fmt.Sprintf("[]%s", valueType.StringValue()))} + } + + return valueType, value +} + +func GetBodyJSONKey(_ context.Context, key *telemetrytypes.TelemetryFieldKey, operator qbtypes.FilterOperator, value any) (string, any) { + + dataType, value := inferDataType(value, operator, key) + + // all body json keys are of the form body. + path := strings.Join(strings.Split(key.Name, ".")[1:], ".") + + // for array types, we need to extract the value from the JSON_QUERY + if dataType == telemetrytypes.FieldDataTypeArrayInt64 || + dataType == telemetrytypes.FieldDataTypeArrayFloat64 || + dataType == telemetrytypes.FieldDataTypeArrayString || + dataType == telemetrytypes.FieldDataTypeArrayBool || + dataType == telemetrytypes.FieldDataTypeArrayNumber { + return fmt.Sprintf("JSONExtract(JSON_QUERY(body, '$.%s'), '%s')", path, dataType.CHDataType()), value + } + + // for all other types, we need to extract the value from the JSON_VALUE + return fmt.Sprintf("JSONExtract(JSON_VALUE(body, '$.%s'), '%s')", path, dataType.CHDataType()), value +} diff --git a/pkg/telemetrymetadata/condition_builder.go b/pkg/telemetrymetadata/condition_builder.go index 33fb22a1b2..79044f53c2 100644 --- a/pkg/telemetrymetadata/condition_builder.go +++ b/pkg/telemetrymetadata/condition_builder.go @@ -10,66 +10,28 @@ import ( "github.com/huandu/go-sqlbuilder" ) -var ( - attributeMetadataColumns = map[string]*schema.Column{ - "resource_attributes": {Name: "resource_attributes", Type: schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeString, - }}, - "attributes": {Name: "attributes", Type: schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeString, - }}, - } -) - type conditionBuilder struct { + fm qbtypes.FieldMapper } -func NewConditionBuilder() qbtypes.ConditionBuilder { - return &conditionBuilder{} +func NewConditionBuilder(fm qbtypes.FieldMapper) *conditionBuilder { + return &conditionBuilder{fm: fm} } -func (c *conditionBuilder) GetColumn(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (*schema.Column, error) { - switch key.FieldContext { - case telemetrytypes.FieldContextResource: - return attributeMetadataColumns["resource_attributes"], nil - case telemetrytypes.FieldContextAttribute: - return attributeMetadataColumns["attributes"], nil - } - return nil, qbtypes.ErrColumnNotFound -} - -func (c *conditionBuilder) GetTableFieldName(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (string, error) { - column, err := c.GetColumn(ctx, key) - if err != nil { - return "", err - } - - switch column.Type { - case schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeString, - }: - return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil - } - return column.Name, nil -} - -func (c *conditionBuilder) GetCondition( +func (c *conditionBuilder) ConditionFor( ctx context.Context, key *telemetrytypes.TelemetryFieldKey, operator qbtypes.FilterOperator, value any, sb *sqlbuilder.SelectBuilder, ) (string, error) { - column, err := c.GetColumn(ctx, key) + column, err := c.fm.ColumnFor(ctx, key) if err != nil { // if we don't have a column, we can't build a condition for related values return "", nil } - tblFieldName, err := c.GetTableFieldName(ctx, key) + tblFieldName, err := c.fm.FieldFor(ctx, key) if err != nil { // if we don't have a table field name, we can't build a condition for related values return "", nil diff --git a/pkg/telemetrymetadata/condition_builder_test.go b/pkg/telemetrymetadata/condition_builder_test.go index 1a292ab98c..001577f294 100644 --- a/pkg/telemetrymetadata/condition_builder_test.go +++ b/pkg/telemetrymetadata/condition_builder_test.go @@ -4,7 +4,6 @@ import ( "context" "testing" - schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator" qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" "github.com/SigNoz/signoz/pkg/types/telemetrytypes" "github.com/huandu/go-sqlbuilder" @@ -12,212 +11,9 @@ import ( "github.com/stretchr/testify/require" ) -func TestGetColumn(t *testing.T) { +func TestConditionFor(t *testing.T) { ctx := context.Background() - conditionBuilder := NewConditionBuilder() - - testCases := []struct { - name string - key telemetrytypes.TelemetryFieldKey - expectedCol *schema.Column - expectedError error - }{ - { - name: "Resource field", - key: telemetrytypes.TelemetryFieldKey{ - Name: "service.name", - FieldContext: telemetrytypes.FieldContextResource, - }, - expectedCol: attributeMetadataColumns["resource_attributes"], - expectedError: nil, - }, - { - name: "Scope field - scope name", - key: telemetrytypes.TelemetryFieldKey{ - Name: "name", - FieldContext: telemetrytypes.FieldContextScope, - }, - expectedCol: nil, - expectedError: qbtypes.ErrColumnNotFound, - }, - { - name: "Scope field - scope.name", - key: telemetrytypes.TelemetryFieldKey{ - Name: "scope.name", - FieldContext: telemetrytypes.FieldContextScope, - }, - expectedCol: nil, - expectedError: qbtypes.ErrColumnNotFound, - }, - { - name: "Scope field - scope_name", - key: telemetrytypes.TelemetryFieldKey{ - Name: "scope_name", - FieldContext: telemetrytypes.FieldContextScope, - }, - expectedCol: nil, - expectedError: qbtypes.ErrColumnNotFound, - }, - { - name: "Scope field - version", - key: telemetrytypes.TelemetryFieldKey{ - Name: "version", - FieldContext: telemetrytypes.FieldContextScope, - }, - expectedCol: nil, - expectedError: qbtypes.ErrColumnNotFound, - }, - { - name: "Scope field - other scope field", - key: telemetrytypes.TelemetryFieldKey{ - Name: "custom.scope.field", - FieldContext: telemetrytypes.FieldContextScope, - }, - expectedCol: nil, - expectedError: qbtypes.ErrColumnNotFound, - }, - { - name: "Attribute field - string type", - key: telemetrytypes.TelemetryFieldKey{ - Name: "user.id", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - expectedCol: attributeMetadataColumns["attributes"], - expectedError: nil, - }, - { - name: "Attribute field - number type", - key: telemetrytypes.TelemetryFieldKey{ - Name: "request.size", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeNumber, - }, - expectedCol: attributeMetadataColumns["attributes"], - expectedError: nil, - }, - { - name: "Attribute field - int64 type", - key: telemetrytypes.TelemetryFieldKey{ - Name: "request.duration", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeInt64, - }, - expectedCol: attributeMetadataColumns["attributes"], - expectedError: nil, - }, - { - name: "Attribute field - float64 type", - key: telemetrytypes.TelemetryFieldKey{ - Name: "cpu.utilization", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeFloat64, - }, - expectedCol: attributeMetadataColumns["attributes"], - expectedError: nil, - }, - { - name: "Log field - nonexistent", - key: telemetrytypes.TelemetryFieldKey{ - Name: "nonexistent_field", - FieldContext: telemetrytypes.FieldContextLog, - }, - expectedCol: nil, - expectedError: qbtypes.ErrColumnNotFound, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - col, err := conditionBuilder.GetColumn(ctx, &tc.key) - - if tc.expectedError != nil { - assert.Equal(t, tc.expectedError, err) - } else { - require.NoError(t, err) - assert.Equal(t, tc.expectedCol, col) - } - }) - } -} - -func TestGetFieldKeyName(t *testing.T) { - ctx := context.Background() - conditionBuilder := &conditionBuilder{} - - testCases := []struct { - name string - key telemetrytypes.TelemetryFieldKey - expectedResult string - expectedError error - }{ - { - name: "Map column type - string attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "user.id", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - expectedResult: "attributes['user.id']", - expectedError: nil, - }, - { - name: "Map column type - number attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "request.size", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeNumber, - }, - expectedResult: "attributes['request.size']", - expectedError: nil, - }, - { - name: "Map column type - bool attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "request.success", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeBool, - }, - expectedResult: "attributes['request.success']", - expectedError: nil, - }, - { - name: "Map column type - resource attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "service.name", - FieldContext: telemetrytypes.FieldContextResource, - }, - expectedResult: "resource_attributes['service.name']", - expectedError: nil, - }, - { - name: "Non-existent column", - key: telemetrytypes.TelemetryFieldKey{ - Name: "nonexistent_field", - FieldContext: telemetrytypes.FieldContextLog, - }, - expectedResult: "", - expectedError: qbtypes.ErrColumnNotFound, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - result, err := conditionBuilder.GetTableFieldName(ctx, &tc.key) - - if tc.expectedError != nil { - assert.Equal(t, tc.expectedError, err) - } else { - require.NoError(t, err) - assert.Equal(t, tc.expectedResult, result) - } - }) - } -} - -func TestGetCondition(t *testing.T) { - ctx := context.Background() - conditionBuilder := NewConditionBuilder() + conditionBuilder := NewConditionBuilder(NewFieldMapper()) testCases := []struct { name string @@ -257,7 +53,7 @@ func TestGetCondition(t *testing.T) { for _, tc := range testCases { sb := sqlbuilder.NewSelectBuilder() t.Run(tc.name, func(t *testing.T) { - cond, err := conditionBuilder.GetCondition(ctx, &tc.key, tc.operator, tc.value, sb) + cond, err := conditionBuilder.ConditionFor(ctx, &tc.key, tc.operator, tc.value, sb) sb.Where(cond) if tc.expectedError != nil { diff --git a/pkg/telemetrymetadata/field_mapper.go b/pkg/telemetrymetadata/field_mapper.go new file mode 100644 index 0000000000..5374c9d16c --- /dev/null +++ b/pkg/telemetrymetadata/field_mapper.go @@ -0,0 +1,116 @@ +package telemetrymetadata + +import ( + "context" + "fmt" + "strings" + + schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator" + "github.com/SigNoz/signoz/pkg/errors" + qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" + "github.com/SigNoz/signoz/pkg/types/telemetrytypes" + "golang.org/x/exp/maps" +) + +var ( + attributeMetadataColumns = map[string]*schema.Column{ + "resource_attributes": {Name: "resource_attributes", Type: schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeString, + }}, + "attributes": {Name: "attributes", Type: schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeString, + }}, + } +) + +type fieldMapper struct { +} + +func NewFieldMapper() qbtypes.FieldMapper { + return &fieldMapper{} +} + +func (m *fieldMapper) getColumn(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (*schema.Column, error) { + switch key.FieldContext { + case telemetrytypes.FieldContextResource: + return attributeMetadataColumns["resource_attributes"], nil + case telemetrytypes.FieldContextAttribute: + return attributeMetadataColumns["attributes"], nil + } + return nil, qbtypes.ErrColumnNotFound +} + +func (m *fieldMapper) ColumnFor(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (*schema.Column, error) { + column, err := m.getColumn(ctx, key) + if err != nil { + return nil, err + } + return column, nil +} + +func (m *fieldMapper) FieldFor(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (string, error) { + column, err := m.getColumn(ctx, key) + if err != nil { + return "", err + } + + switch column.Type { + case schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeString, + }: + return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil + } + return column.Name, nil +} + +func (m *fieldMapper) ColumnExpressionFor( + ctx context.Context, + field *telemetrytypes.TelemetryFieldKey, + keys map[string][]*telemetrytypes.TelemetryFieldKey, +) (string, error) { + + colName, err := m.FieldFor(ctx, field) + if errors.Is(err, qbtypes.ErrColumnNotFound) { + // the key didn't have the right context to be added to the query + // we try to use the context we know of + keysForField := keys[field.Name] + if len(keysForField) == 0 { + // is it a static field? + if _, ok := attributeMetadataColumns[field.Name]; ok { + // if it is, attach the column name directly + field.FieldContext = telemetrytypes.FieldContextSpan + colName, _ = m.FieldFor(ctx, field) + } else { + // - the context is not provided + // - there are not keys for the field + // - it is not a static field + // - the next best thing to do is see if there is a typo + // and suggest a correction + correction, found := telemetrytypes.SuggestCorrection(field.Name, maps.Keys(keys)) + if found { + // we found a close match, in the error message send the suggestion + return "", errors.Wrapf(err, errors.TypeInvalidInput, errors.CodeInvalidInput, correction) + } else { + // not even a close match, return an error + return "", err + } + } + } else if len(keysForField) == 1 { + // we have a single key for the field, use it + colName, _ = m.FieldFor(ctx, keysForField[0]) + } else { + // select any non-empty value from the keys + args := []string{} + for _, key := range keysForField { + colName, _ = m.FieldFor(ctx, key) + args = append(args, fmt.Sprintf("toString(%s) != '', toString(%s)", colName, colName)) + } + colName = fmt.Sprintf("multiIf(%s)", strings.Join(args, ", ")) + } + } + + return fmt.Sprintf("%s AS `%s`", colName, field.Name), nil +} diff --git a/pkg/telemetrymetadata/field_mapper_test.go b/pkg/telemetrymetadata/field_mapper_test.go new file mode 100644 index 0000000000..c18de63b54 --- /dev/null +++ b/pkg/telemetrymetadata/field_mapper_test.go @@ -0,0 +1,216 @@ +package telemetrymetadata + +import ( + "context" + "testing" + + schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator" + qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" + "github.com/SigNoz/signoz/pkg/types/telemetrytypes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGetColumn(t *testing.T) { + + testCases := []struct { + name string + key telemetrytypes.TelemetryFieldKey + expectedCol *schema.Column + expectedError error + }{ + { + name: "Resource field", + key: telemetrytypes.TelemetryFieldKey{ + Name: "service.name", + FieldContext: telemetrytypes.FieldContextResource, + }, + expectedCol: attributeMetadataColumns["resource_attributes"], + expectedError: nil, + }, + { + name: "Scope field - scope name", + key: telemetrytypes.TelemetryFieldKey{ + Name: "name", + FieldContext: telemetrytypes.FieldContextScope, + }, + expectedCol: nil, + expectedError: qbtypes.ErrColumnNotFound, + }, + { + name: "Scope field - scope.name", + key: telemetrytypes.TelemetryFieldKey{ + Name: "scope.name", + FieldContext: telemetrytypes.FieldContextScope, + }, + expectedCol: nil, + expectedError: qbtypes.ErrColumnNotFound, + }, + { + name: "Scope field - scope_name", + key: telemetrytypes.TelemetryFieldKey{ + Name: "scope_name", + FieldContext: telemetrytypes.FieldContextScope, + }, + expectedCol: nil, + expectedError: qbtypes.ErrColumnNotFound, + }, + { + name: "Scope field - version", + key: telemetrytypes.TelemetryFieldKey{ + Name: "version", + FieldContext: telemetrytypes.FieldContextScope, + }, + expectedCol: nil, + expectedError: qbtypes.ErrColumnNotFound, + }, + { + name: "Scope field - other scope field", + key: telemetrytypes.TelemetryFieldKey{ + Name: "custom.scope.field", + FieldContext: telemetrytypes.FieldContextScope, + }, + expectedCol: nil, + expectedError: qbtypes.ErrColumnNotFound, + }, + { + name: "Attribute field - string type", + key: telemetrytypes.TelemetryFieldKey{ + Name: "user.id", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeString, + }, + expectedCol: attributeMetadataColumns["attributes"], + expectedError: nil, + }, + { + name: "Attribute field - number type", + key: telemetrytypes.TelemetryFieldKey{ + Name: "request.size", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeNumber, + }, + expectedCol: attributeMetadataColumns["attributes"], + expectedError: nil, + }, + { + name: "Attribute field - int64 type", + key: telemetrytypes.TelemetryFieldKey{ + Name: "request.duration", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeInt64, + }, + expectedCol: attributeMetadataColumns["attributes"], + expectedError: nil, + }, + { + name: "Attribute field - float64 type", + key: telemetrytypes.TelemetryFieldKey{ + Name: "cpu.utilization", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeFloat64, + }, + expectedCol: attributeMetadataColumns["attributes"], + expectedError: nil, + }, + { + name: "Log field - nonexistent", + key: telemetrytypes.TelemetryFieldKey{ + Name: "nonexistent_field", + FieldContext: telemetrytypes.FieldContextLog, + }, + expectedCol: nil, + expectedError: qbtypes.ErrColumnNotFound, + }, + } + + fm := NewFieldMapper() + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + col, err := fm.ColumnFor(context.Background(), &tc.key) + + if tc.expectedError != nil { + assert.Equal(t, tc.expectedError, err) + } else { + require.NoError(t, err) + assert.Equal(t, tc.expectedCol, col) + } + }) + } +} + +func TestGetFieldKeyName(t *testing.T) { + ctx := context.Background() + + testCases := []struct { + name string + key telemetrytypes.TelemetryFieldKey + expectedResult string + expectedError error + }{ + { + name: "Map column type - string attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "user.id", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeString, + }, + expectedResult: "attributes['user.id']", + expectedError: nil, + }, + { + name: "Map column type - number attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "request.size", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeNumber, + }, + expectedResult: "attributes['request.size']", + expectedError: nil, + }, + { + name: "Map column type - bool attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "request.success", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeBool, + }, + expectedResult: "attributes['request.success']", + expectedError: nil, + }, + { + name: "Map column type - resource attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "service.name", + FieldContext: telemetrytypes.FieldContextResource, + }, + expectedResult: "resource_attributes['service.name']", + expectedError: nil, + }, + { + name: "Non-existent column", + key: telemetrytypes.TelemetryFieldKey{ + Name: "nonexistent_field", + FieldContext: telemetrytypes.FieldContextLog, + }, + expectedResult: "", + expectedError: qbtypes.ErrColumnNotFound, + }, + } + + fm := NewFieldMapper() + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := fm.FieldFor(ctx, &tc.key) + + if tc.expectedError != nil { + assert.Equal(t, tc.expectedError, err) + } else { + require.NoError(t, err) + assert.Equal(t, tc.expectedResult, result) + } + }) + } +} diff --git a/pkg/telemetrymetadata/metadata.go b/pkg/telemetrymetadata/metadata.go index 89334c2747..821d6c823b 100644 --- a/pkg/telemetrymetadata/metadata.go +++ b/pkg/telemetrymetadata/metadata.go @@ -5,7 +5,6 @@ import ( "fmt" "github.com/SigNoz/signoz/pkg/errors" - parser "github.com/SigNoz/signoz/pkg/parser/grammar" "github.com/SigNoz/signoz/pkg/telemetrystore" qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" "github.com/SigNoz/signoz/pkg/types/telemetrytypes" @@ -34,7 +33,9 @@ type telemetryMetaStore struct { relatedMetadataDBName string relatedMetadataTblName string + fm qbtypes.FieldMapper conditionBuilder qbtypes.ConditionBuilder + compiler qbtypes.FilterCompiler } func NewTelemetryMetaStore( @@ -50,8 +51,8 @@ func NewTelemetryMetaStore( relatedMetadataDBName string, relatedMetadataTblName string, ) telemetrytypes.MetadataStore { - return &telemetryMetaStore{ + t := &telemetryMetaStore{ telemetrystore: telemetrystore, tracesDBName: tracesDBName, tracesFieldsTblName: tracesFieldsTblName, @@ -63,9 +64,15 @@ func NewTelemetryMetaStore( logsFieldsTblName: logsFieldsTblName, relatedMetadataDBName: relatedMetadataDBName, relatedMetadataTblName: relatedMetadataTblName, - - conditionBuilder: NewConditionBuilder(), } + + fm := NewFieldMapper() + conditionBuilder := NewConditionBuilder(fm) + + t.fm = fm + t.conditionBuilder = conditionBuilder + + return t } // tracesTblStatementToFieldKeys returns materialised attribute/resource/scope keys from the traces table @@ -416,30 +423,36 @@ func (t *telemetryMetaStore) getMetricsKeys(ctx context.Context, fieldKeySelecto func (t *telemetryMetaStore) GetKeys(ctx context.Context, fieldKeySelector *telemetrytypes.FieldKeySelector) (map[string][]*telemetrytypes.TelemetryFieldKey, error) { var keys []*telemetrytypes.TelemetryFieldKey var err error + selectors := []*telemetrytypes.FieldKeySelector{} + + if fieldKeySelector != nil { + selectors = []*telemetrytypes.FieldKeySelector{fieldKeySelector} + } + switch fieldKeySelector.Signal { case telemetrytypes.SignalTraces: - keys, err = t.getTracesKeys(ctx, []*telemetrytypes.FieldKeySelector{fieldKeySelector}) + keys, err = t.getTracesKeys(ctx, selectors) case telemetrytypes.SignalLogs: - keys, err = t.getLogsKeys(ctx, []*telemetrytypes.FieldKeySelector{fieldKeySelector}) + keys, err = t.getLogsKeys(ctx, selectors) case telemetrytypes.SignalMetrics: - keys, err = t.getMetricsKeys(ctx, []*telemetrytypes.FieldKeySelector{fieldKeySelector}) + keys, err = t.getMetricsKeys(ctx, selectors) case telemetrytypes.SignalUnspecified: // get traces keys - tracesKeys, err := t.getTracesKeys(ctx, []*telemetrytypes.FieldKeySelector{fieldKeySelector}) + tracesKeys, err := t.getTracesKeys(ctx, selectors) if err != nil { return nil, err } keys = append(keys, tracesKeys...) // get logs keys - logsKeys, err := t.getLogsKeys(ctx, []*telemetrytypes.FieldKeySelector{fieldKeySelector}) + logsKeys, err := t.getLogsKeys(ctx, selectors) if err != nil { return nil, err } keys = append(keys, logsKeys...) // get metrics keys - metricsKeys, err := t.getMetricsKeys(ctx, []*telemetrytypes.FieldKeySelector{fieldKeySelector}) + metricsKeys, err := t.getMetricsKeys(ctx, selectors) if err != nil { return nil, err } @@ -520,25 +533,25 @@ func (t *telemetryMetaStore) getRelatedValues(ctx context.Context, fieldValueSel return nil, nil } - key := telemetrytypes.TelemetryFieldKey{ + key := &telemetrytypes.TelemetryFieldKey{ Name: fieldValueSelector.Name, Signal: fieldValueSelector.Signal, FieldContext: fieldValueSelector.FieldContext, FieldDataType: fieldValueSelector.FieldDataType, } - selectColumn, err := t.conditionBuilder.GetTableFieldName(ctx, &key) + selectColumn, err := t.fm.FieldFor(ctx, key) if err != nil { // we don't have a explicit column to select from the related metadata table // so we will select either from resource_attributes or attributes table // in that order - resourceColumn, _ := t.conditionBuilder.GetTableFieldName(ctx, &telemetrytypes.TelemetryFieldKey{ + resourceColumn, _ := t.fm.FieldFor(ctx, &telemetrytypes.TelemetryFieldKey{ Name: key.Name, FieldContext: telemetrytypes.FieldContextResource, FieldDataType: telemetrytypes.FieldDataTypeString, }) - attributeColumn, _ := t.conditionBuilder.GetTableFieldName(ctx, &telemetrytypes.TelemetryFieldKey{ + attributeColumn, _ := t.fm.FieldFor(ctx, &telemetrytypes.TelemetryFieldKey{ Name: key.Name, FieldContext: telemetrytypes.FieldContextAttribute, FieldDataType: telemetrytypes.FieldDataTypeString, @@ -549,21 +562,11 @@ func (t *telemetryMetaStore) getRelatedValues(ctx context.Context, fieldValueSel sb := sqlbuilder.Select("DISTINCT " + selectColumn).From(t.relatedMetadataDBName + "." + t.relatedMetadataTblName) if len(fieldValueSelector.ExistingQuery) != 0 { - keysSelectors, err := parser.QueryStringToKeysSelectors(fieldValueSelector.ExistingQuery) - + whereClause, _, err := t.compiler.Compile(ctx, fieldValueSelector.ExistingQuery) if err == nil { - for idx := range keysSelectors { - keysSelectors[idx].Signal = fieldValueSelector.Signal - } - keys, err := t.GetKeysMulti(ctx, keysSelectors) - if err == nil { - whereClause, _, err := parser.PrepareWhereClause(fieldValueSelector.ExistingQuery, keys, t.conditionBuilder, &telemetrytypes.TelemetryFieldKey{}) - if err == nil { - sb.AddWhereClause(whereClause) - } else { - zap.L().Warn("error parsing existing query for related values", zap.Error(err)) - } - } + sb.AddWhereClause(whereClause) + } else { + zap.L().Warn("error parsing existing query for related values", zap.Error(err)) } } diff --git a/pkg/telemetrymetadata/stmt_parse.go b/pkg/telemetrymetadata/stmt_parse.go index f453665fed..726d45a862 100644 --- a/pkg/telemetrymetadata/stmt_parse.go +++ b/pkg/telemetrymetadata/stmt_parse.go @@ -88,14 +88,14 @@ func (v *TelemetryFieldVisitor) VisitColumnDef(expr *parser.ColumnDef) error { fieldName := defaultExprStr[startIdx+2 : endIdx] // Create and store the TelemetryFieldKey - field := telemetrytypes.TelemetryFieldKey{ + field := &telemetrytypes.TelemetryFieldKey{ Name: fieldName, FieldContext: fieldContext, FieldDataType: fieldDataType, Materialized: true, } - v.Fields = append(v.Fields, &field) + v.Fields = append(v.Fields, field) return nil } diff --git a/pkg/telemetrytraces/condition_builder.go b/pkg/telemetrytraces/condition_builder.go index c161802990..0f18a3b56e 100644 --- a/pkg/telemetrytraces/condition_builder.go +++ b/pkg/telemetrytraces/condition_builder.go @@ -10,215 +10,31 @@ import ( "github.com/huandu/go-sqlbuilder" ) -var ( - indexV3Columns = map[string]*schema.Column{ - "ts_bucket_start": {Name: "ts_bucket_start", Type: schema.ColumnTypeUInt64}, - "resource_fingerprint": {Name: "resource_fingerprint", Type: schema.ColumnTypeString}, - - // intrinsic columns - "timestamp": {Name: "timestamp", Type: schema.DateTime64ColumnType{Precision: 9, Timezone: "UTC"}}, - "trace_id": {Name: "trace_id", Type: schema.FixedStringColumnType{Length: 32}}, - "span_id": {Name: "span_id", Type: schema.ColumnTypeString}, - "trace_state": {Name: "trace_state", Type: schema.ColumnTypeString}, - "parent_span_id": {Name: "parent_span_id", Type: schema.ColumnTypeString}, - "flags": {Name: "flags", Type: schema.ColumnTypeUInt32}, - "name": {Name: "name", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "kind": {Name: "kind", Type: schema.ColumnTypeInt8}, - "kind_string": {Name: "kind_string", Type: schema.ColumnTypeString}, - "duration_nano": {Name: "duration_nano", Type: schema.ColumnTypeUInt64}, - "status_code": {Name: "status_code", Type: schema.ColumnTypeInt16}, - "status_message": {Name: "status_message", Type: schema.ColumnTypeString}, - "status_code_string": {Name: "status_code_string", Type: schema.ColumnTypeString}, - - // attributes columns - "attributes_string": {Name: "attributes_string", Type: schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeString, - }}, - "attributes_number": {Name: "attributes_number", Type: schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeFloat64, - }}, - "attributes_bool": {Name: "attributes_bool", Type: schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeBool, - }}, - "resources_string": {Name: "resources_string", Type: schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeString, - }}, - - "events": {Name: "events", Type: schema.ArrayColumnType{ - ElementType: schema.ColumnTypeString, - }}, - "links": {Name: "links", Type: schema.ColumnTypeString}, - // derived columns - "response_status_code": {Name: "response_status_code", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "external_http_url": {Name: "external_http_url", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "http_url": {Name: "http_url", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "external_http_method": {Name: "external_http_method", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "http_method": {Name: "http_method", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "http_host": {Name: "http_host", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "db_name": {Name: "db_name", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "db_operation": {Name: "db_operation", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "has_error": {Name: "has_error", Type: schema.ColumnTypeBool}, - "is_remote": {Name: "is_remote", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - // materialized columns - "resource_string_service$$name": {Name: "resource_string_service$$name", Type: schema.ColumnTypeString}, - "attribute_string_http$$route": {Name: "attribute_string_http$$route", Type: schema.ColumnTypeString}, - "attribute_string_messaging$$system": {Name: "attribute_string_messaging$$system", Type: schema.ColumnTypeString}, - "attribute_string_messaging$$operation": {Name: "attribute_string_messaging$$operation", Type: schema.ColumnTypeString}, - "attribute_string_db$$system": {Name: "attribute_string_db$$system", Type: schema.ColumnTypeString}, - "attribute_string_rpc$$system": {Name: "attribute_string_rpc$$system", Type: schema.ColumnTypeString}, - "attribute_string_rpc$$service": {Name: "attribute_string_rpc$$service", Type: schema.ColumnTypeString}, - "attribute_string_rpc$$method": {Name: "attribute_string_rpc$$method", Type: schema.ColumnTypeString}, - "attribute_string_peer$$service": {Name: "attribute_string_peer$$service", Type: schema.ColumnTypeString}, - - // deprecated intrinsic columns - "traceID": {Name: "traceID", Type: schema.FixedStringColumnType{Length: 32}}, - "spanID": {Name: "spanID", Type: schema.ColumnTypeString}, - "parentSpanID": {Name: "parentSpanID", Type: schema.ColumnTypeString}, - "spanKind": {Name: "spanKind", Type: schema.ColumnTypeString}, - "durationNano": {Name: "durationNano", Type: schema.ColumnTypeUInt64}, - "statusCode": {Name: "statusCode", Type: schema.ColumnTypeInt16}, - "statusMessage": {Name: "statusMessage", Type: schema.ColumnTypeString}, - "statusCodeString": {Name: "statusCodeString", Type: schema.ColumnTypeString}, - - // deprecated derived columns - "references": {Name: "references", Type: schema.ColumnTypeString}, - "responseStatusCode": {Name: "responseStatusCode", Type: schema.ColumnTypeString}, - "externalHttpUrl": {Name: "externalHttpUrl", Type: schema.ColumnTypeString}, - "httpUrl": {Name: "httpUrl", Type: schema.ColumnTypeString}, - "externalHttpMethod": {Name: "externalHttpMethod", Type: schema.ColumnTypeString}, - "httpMethod": {Name: "httpMethod", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "httpHost": {Name: "httpHost", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "dbName": {Name: "dbName", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "dbOperation": {Name: "dbOperation", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "hasError": {Name: "hasError", Type: schema.ColumnTypeBool}, - "isRemote": {Name: "isRemote", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "serviceName": {Name: "serviceName", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "httpRoute": {Name: "httpRoute", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "msgSystem": {Name: "msgSystem", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "msgOperation": {Name: "msgOperation", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "dbSystem": {Name: "dbSystem", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "rpcSystem": {Name: "rpcSystem", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "rpcService": {Name: "rpcService", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "rpcMethod": {Name: "rpcMethod", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - "peerService": {Name: "peerService", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, - - // materialized exists columns - "resource_string_service$$name_exists": {Name: "resource_string_service$$name_exists", Type: schema.ColumnTypeBool}, - "attribute_string_http$$route_exists": {Name: "attribute_string_http$$route_exists", Type: schema.ColumnTypeBool}, - "attribute_string_messaging$$system_exists": {Name: "attribute_string_messaging$$system_exists", Type: schema.ColumnTypeBool}, - "attribute_string_messaging$$operation_exists": {Name: "attribute_string_messaging$$operation_exists", Type: schema.ColumnTypeBool}, - "attribute_string_db$$system_exists": {Name: "attribute_string_db$$system_exists", Type: schema.ColumnTypeBool}, - "attribute_string_rpc$$system_exists": {Name: "attribute_string_rpc$$system_exists", Type: schema.ColumnTypeBool}, - "attribute_string_rpc$$service_exists": {Name: "attribute_string_rpc$$service_exists", Type: schema.ColumnTypeBool}, - "attribute_string_rpc$$method_exists": {Name: "attribute_string_rpc$$method_exists", Type: schema.ColumnTypeBool}, - "attribute_string_peer$$service_exists": {Name: "attribute_string_peer$$service_exists", Type: schema.ColumnTypeBool}, - } -) - -// interface check -var _ qbtypes.ConditionBuilder = &conditionBuilder{} - type conditionBuilder struct { + fm qbtypes.FieldMapper } -func NewConditionBuilder() qbtypes.ConditionBuilder { - return &conditionBuilder{} +var _ qbtypes.ConditionBuilder = (*conditionBuilder)(nil) + +func NewConditionBuilder(fm qbtypes.FieldMapper) *conditionBuilder { + return &conditionBuilder{fm: fm} } -func (c *conditionBuilder) GetColumn(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (*schema.Column, error) { - - switch key.FieldContext { - case telemetrytypes.FieldContextResource: - return indexV3Columns["resources_string"], nil - case telemetrytypes.FieldContextScope: - // we don't have scope data stored in the spans yet - return nil, qbtypes.ErrColumnNotFound - case telemetrytypes.FieldContextAttribute: - switch key.FieldDataType { - case telemetrytypes.FieldDataTypeString: - return indexV3Columns["attributes_string"], nil - case telemetrytypes.FieldDataTypeInt64, telemetrytypes.FieldDataTypeFloat64, telemetrytypes.FieldDataTypeNumber: - return indexV3Columns["attributes_number"], nil - case telemetrytypes.FieldDataTypeBool: - return indexV3Columns["attributes_bool"], nil - } - case telemetrytypes.FieldContextSpan: - col, ok := indexV3Columns[key.Name] - if !ok { - return nil, qbtypes.ErrColumnNotFound - } - return col, nil - } - - return nil, qbtypes.ErrColumnNotFound -} - -func (c *conditionBuilder) GetTableFieldName(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (string, error) { - column, err := c.GetColumn(ctx, key) - if err != nil { - return "", err - } - - switch column.Type { - case schema.ColumnTypeString, - schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - schema.ColumnTypeUInt64, - schema.ColumnTypeUInt32, - schema.ColumnTypeInt8, - schema.ColumnTypeInt16, - schema.ColumnTypeBool, - schema.DateTime64ColumnType{Precision: 9, Timezone: "UTC"}, - schema.FixedStringColumnType{Length: 32}: - return column.Name, nil - case schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeString, - }: - // a key could have been materialized, if so return the materialized column name - if key.Materialized { - return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil - } - return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil - case schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeFloat64, - }: - // a key could have been materialized, if so return the materialized column name - if key.Materialized { - return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil - } - return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil - case schema.MapColumnType{ - KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, - ValueType: schema.ColumnTypeBool, - }: - // a key could have been materialized, if so return the materialized column name - if key.Materialized { - return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil - } - return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil - } - // should not reach here - return column.Name, nil -} - -func (c *conditionBuilder) GetCondition( +func (c *conditionBuilder) ConditionFor( ctx context.Context, key *telemetrytypes.TelemetryFieldKey, operator qbtypes.FilterOperator, value any, sb *sqlbuilder.SelectBuilder, ) (string, error) { - column, err := c.GetColumn(ctx, key) + // first, locate the raw column type (so we can choose the right EXISTS logic) + column, err := c.fm.ColumnFor(ctx, key) if err != nil { return "", err } - tblFieldName, err := c.GetTableFieldName(ctx, key) + // then ask the mapper for the actual SQL reference + tblFieldName, err := c.fm.FieldFor(ctx, key) if err != nil { return "", err } diff --git a/pkg/telemetrytraces/condition_builder_test.go b/pkg/telemetrytraces/condition_builder_test.go index e7bd8e94d0..1e7f8df264 100644 --- a/pkg/telemetrytraces/condition_builder_test.go +++ b/pkg/telemetrytraces/condition_builder_test.go @@ -11,92 +11,8 @@ import ( "github.com/stretchr/testify/require" ) -func TestGetFieldKeyName(t *testing.T) { +func TestConditionFor(t *testing.T) { ctx := context.Background() - conditionBuilder := &conditionBuilder{} - - testCases := []struct { - name string - key telemetrytypes.TelemetryFieldKey - expectedResult string - expectedError error - }{ - { - name: "Simple column type - timestamp", - key: telemetrytypes.TelemetryFieldKey{ - Name: "timestamp", - FieldContext: telemetrytypes.FieldContextSpan, - }, - expectedResult: "timestamp", - expectedError: nil, - }, - { - name: "Map column type - string attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "user.id", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeString, - }, - expectedResult: "attributes_string['user.id']", - expectedError: nil, - }, - { - name: "Map column type - number attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "request.size", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeNumber, - }, - expectedResult: "attributes_number['request.size']", - expectedError: nil, - }, - { - name: "Map column type - bool attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "request.success", - FieldContext: telemetrytypes.FieldContextAttribute, - FieldDataType: telemetrytypes.FieldDataTypeBool, - }, - expectedResult: "attributes_bool['request.success']", - expectedError: nil, - }, - { - name: "Map column type - resource attribute", - key: telemetrytypes.TelemetryFieldKey{ - Name: "service.name", - FieldContext: telemetrytypes.FieldContextResource, - }, - expectedResult: "resources_string['service.name']", - expectedError: nil, - }, - { - name: "Non-existent column", - key: telemetrytypes.TelemetryFieldKey{ - Name: "nonexistent_field", - FieldContext: telemetrytypes.FieldContextSpan, - }, - expectedResult: "", - expectedError: qbtypes.ErrColumnNotFound, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - result, err := conditionBuilder.GetTableFieldName(ctx, &tc.key) - - if tc.expectedError != nil { - assert.Equal(t, tc.expectedError, err) - } else { - require.NoError(t, err) - assert.Equal(t, tc.expectedResult, result) - } - }) - } -} - -func TestGetCondition(t *testing.T) { - ctx := context.Background() - conditionBuilder := NewConditionBuilder() testCases := []struct { name string @@ -280,10 +196,13 @@ func TestGetCondition(t *testing.T) { }, } + fm := NewFieldMapper() + conditionBuilder := NewConditionBuilder(fm) + for _, tc := range testCases { sb := sqlbuilder.NewSelectBuilder() t.Run(tc.name, func(t *testing.T) { - cond, err := conditionBuilder.GetCondition(ctx, &tc.key, tc.operator, tc.value, sb) + cond, err := conditionBuilder.ConditionFor(ctx, &tc.key, tc.operator, tc.value, sb) sb.Where(cond) if tc.expectedError != nil { diff --git a/pkg/telemetrytraces/field_mapper.go b/pkg/telemetrytraces/field_mapper.go new file mode 100644 index 0000000000..e7630ce9c2 --- /dev/null +++ b/pkg/telemetrytraces/field_mapper.go @@ -0,0 +1,271 @@ +package telemetrytraces + +import ( + "context" + "fmt" + "strings" + + schema "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator" + "github.com/SigNoz/signoz/pkg/errors" + qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" + "github.com/SigNoz/signoz/pkg/types/telemetrytypes" + "golang.org/x/exp/maps" +) + +var ( + indexV3Columns = map[string]*schema.Column{ + "ts_bucket_start": {Name: "ts_bucket_start", Type: schema.ColumnTypeUInt64}, + "resource_fingerprint": {Name: "resource_fingerprint", Type: schema.ColumnTypeString}, + + // intrinsic columns + "timestamp": {Name: "timestamp", Type: schema.DateTime64ColumnType{Precision: 9, Timezone: "UTC"}}, + "trace_id": {Name: "trace_id", Type: schema.FixedStringColumnType{Length: 32}}, + "span_id": {Name: "span_id", Type: schema.ColumnTypeString}, + "trace_state": {Name: "trace_state", Type: schema.ColumnTypeString}, + "parent_span_id": {Name: "parent_span_id", Type: schema.ColumnTypeString}, + "flags": {Name: "flags", Type: schema.ColumnTypeUInt32}, + "name": {Name: "name", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "kind": {Name: "kind", Type: schema.ColumnTypeInt8}, + "kind_string": {Name: "kind_string", Type: schema.ColumnTypeString}, + "duration_nano": {Name: "duration_nano", Type: schema.ColumnTypeUInt64}, + "status_code": {Name: "status_code", Type: schema.ColumnTypeInt16}, + "status_message": {Name: "status_message", Type: schema.ColumnTypeString}, + "status_code_string": {Name: "status_code_string", Type: schema.ColumnTypeString}, + + // attributes columns + "attributes_string": {Name: "attributes_string", Type: schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeString, + }}, + "attributes_number": {Name: "attributes_number", Type: schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeFloat64, + }}, + "attributes_bool": {Name: "attributes_bool", Type: schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeBool, + }}, + "resources_string": {Name: "resources_string", Type: schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeString, + }}, + + "events": {Name: "events", Type: schema.ArrayColumnType{ + ElementType: schema.ColumnTypeString, + }}, + "links": {Name: "links", Type: schema.ColumnTypeString}, + // derived columns + "response_status_code": {Name: "response_status_code", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "external_http_url": {Name: "external_http_url", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "http_url": {Name: "http_url", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "external_http_method": {Name: "external_http_method", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "http_method": {Name: "http_method", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "http_host": {Name: "http_host", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "db_name": {Name: "db_name", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "db_operation": {Name: "db_operation", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "has_error": {Name: "has_error", Type: schema.ColumnTypeBool}, + "is_remote": {Name: "is_remote", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + // materialized columns + "resource_string_service$$name": {Name: "resource_string_service$$name", Type: schema.ColumnTypeString}, + "attribute_string_http$$route": {Name: "attribute_string_http$$route", Type: schema.ColumnTypeString}, + "attribute_string_messaging$$system": {Name: "attribute_string_messaging$$system", Type: schema.ColumnTypeString}, + "attribute_string_messaging$$operation": {Name: "attribute_string_messaging$$operation", Type: schema.ColumnTypeString}, + "attribute_string_db$$system": {Name: "attribute_string_db$$system", Type: schema.ColumnTypeString}, + "attribute_string_rpc$$system": {Name: "attribute_string_rpc$$system", Type: schema.ColumnTypeString}, + "attribute_string_rpc$$service": {Name: "attribute_string_rpc$$service", Type: schema.ColumnTypeString}, + "attribute_string_rpc$$method": {Name: "attribute_string_rpc$$method", Type: schema.ColumnTypeString}, + "attribute_string_peer$$service": {Name: "attribute_string_peer$$service", Type: schema.ColumnTypeString}, + + // deprecated intrinsic columns + "traceID": {Name: "traceID", Type: schema.FixedStringColumnType{Length: 32}}, + "spanID": {Name: "spanID", Type: schema.ColumnTypeString}, + "parentSpanID": {Name: "parentSpanID", Type: schema.ColumnTypeString}, + "spanKind": {Name: "spanKind", Type: schema.ColumnTypeString}, + "durationNano": {Name: "durationNano", Type: schema.ColumnTypeUInt64}, + "statusCode": {Name: "statusCode", Type: schema.ColumnTypeInt16}, + "statusMessage": {Name: "statusMessage", Type: schema.ColumnTypeString}, + "statusCodeString": {Name: "statusCodeString", Type: schema.ColumnTypeString}, + + // deprecated derived columns + "references": {Name: "references", Type: schema.ColumnTypeString}, + "responseStatusCode": {Name: "responseStatusCode", Type: schema.ColumnTypeString}, + "externalHttpUrl": {Name: "externalHttpUrl", Type: schema.ColumnTypeString}, + "httpUrl": {Name: "httpUrl", Type: schema.ColumnTypeString}, + "externalHttpMethod": {Name: "externalHttpMethod", Type: schema.ColumnTypeString}, + "httpMethod": {Name: "httpMethod", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "httpHost": {Name: "httpHost", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "dbName": {Name: "dbName", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "dbOperation": {Name: "dbOperation", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "hasError": {Name: "hasError", Type: schema.ColumnTypeBool}, + "isRemote": {Name: "isRemote", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "serviceName": {Name: "serviceName", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "httpRoute": {Name: "httpRoute", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "msgSystem": {Name: "msgSystem", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "msgOperation": {Name: "msgOperation", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "dbSystem": {Name: "dbSystem", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "rpcSystem": {Name: "rpcSystem", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "rpcService": {Name: "rpcService", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "rpcMethod": {Name: "rpcMethod", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + "peerService": {Name: "peerService", Type: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}}, + + // materialized exists columns + "resource_string_service$$name_exists": {Name: "resource_string_service$$name_exists", Type: schema.ColumnTypeBool}, + "attribute_string_http$$route_exists": {Name: "attribute_string_http$$route_exists", Type: schema.ColumnTypeBool}, + "attribute_string_messaging$$system_exists": {Name: "attribute_string_messaging$$system_exists", Type: schema.ColumnTypeBool}, + "attribute_string_messaging$$operation_exists": {Name: "attribute_string_messaging$$operation_exists", Type: schema.ColumnTypeBool}, + "attribute_string_db$$system_exists": {Name: "attribute_string_db$$system_exists", Type: schema.ColumnTypeBool}, + "attribute_string_rpc$$system_exists": {Name: "attribute_string_rpc$$system_exists", Type: schema.ColumnTypeBool}, + "attribute_string_rpc$$service_exists": {Name: "attribute_string_rpc$$service_exists", Type: schema.ColumnTypeBool}, + "attribute_string_rpc$$method_exists": {Name: "attribute_string_rpc$$method_exists", Type: schema.ColumnTypeBool}, + "attribute_string_peer$$service_exists": {Name: "attribute_string_peer$$service_exists", Type: schema.ColumnTypeBool}, + } +) + +type defaultFieldMapper struct{} + +var _ qbtypes.FieldMapper = (*defaultFieldMapper)(nil) + +func NewFieldMapper() *defaultFieldMapper { + return &defaultFieldMapper{} +} + +func (m *defaultFieldMapper) getColumn( + _ context.Context, + key *telemetrytypes.TelemetryFieldKey, +) (*schema.Column, error) { + switch key.FieldContext { + case telemetrytypes.FieldContextResource: + return indexV3Columns["resources_string"], nil + case telemetrytypes.FieldContextScope: + return nil, qbtypes.ErrColumnNotFound + case telemetrytypes.FieldContextAttribute: + switch key.FieldDataType { + case telemetrytypes.FieldDataTypeString: + return indexV3Columns["attributes_string"], nil + case telemetrytypes.FieldDataTypeInt64, + telemetrytypes.FieldDataTypeFloat64, + telemetrytypes.FieldDataTypeNumber: + return indexV3Columns["attributes_number"], nil + case telemetrytypes.FieldDataTypeBool: + return indexV3Columns["attributes_bool"], nil + } + case telemetrytypes.FieldContextSpan: + if col, ok := indexV3Columns[key.Name]; ok { + return col, nil + } + return nil, qbtypes.ErrColumnNotFound + } + return nil, qbtypes.ErrColumnNotFound +} + +func (m *defaultFieldMapper) ColumnFor( + ctx context.Context, + key *telemetrytypes.TelemetryFieldKey, +) (*schema.Column, error) { + return m.getColumn(ctx, key) +} + +// FieldFor returns the table field name for the given key if it exists +// otherwise it returns qbtypes.ErrColumnNotFound +func (m *defaultFieldMapper) FieldFor( + ctx context.Context, + key *telemetrytypes.TelemetryFieldKey, +) (string, error) { + column, err := m.getColumn(ctx, key) + if err != nil { + return "", err + } + + switch column.Type { + case schema.ColumnTypeString, + schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + schema.ColumnTypeUInt64, + schema.ColumnTypeUInt32, + schema.ColumnTypeInt8, + schema.ColumnTypeInt16, + schema.ColumnTypeBool, + schema.DateTime64ColumnType{Precision: 9, Timezone: "UTC"}, + schema.FixedStringColumnType{Length: 32}: + return column.Name, nil + case schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeString, + }: + // a key could have been materialized, if so return the materialized column name + if key.Materialized { + return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil + } + return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil + case schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeFloat64, + }: + // a key could have been materialized, if so return the materialized column name + if key.Materialized { + return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil + } + return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil + case schema.MapColumnType{ + KeyType: schema.LowCardinalityColumnType{ElementType: schema.ColumnTypeString}, + ValueType: schema.ColumnTypeBool, + }: + // a key could have been materialized, if so return the materialized column name + if key.Materialized { + return telemetrytypes.FieldKeyToMaterializedColumnName(key), nil + } + return fmt.Sprintf("%s['%s']", column.Name, key.Name), nil + } + // should not reach here + return column.Name, nil +} + +// ColumnExpressionFor returns the column expression for the given field +// if it exists otherwise it returns qbtypes.ErrColumnNotFound +func (m *defaultFieldMapper) ColumnExpressionFor( + ctx context.Context, + field *telemetrytypes.TelemetryFieldKey, + keys map[string][]*telemetrytypes.TelemetryFieldKey, +) (string, error) { + + colName, err := m.FieldFor(ctx, field) + if errors.Is(err, qbtypes.ErrColumnNotFound) { + // the key didn't have the right context to be added to the query + // we try to use the context we know of + keysForField := keys[field.Name] + if len(keysForField) == 0 { + // is it a static field? + if _, ok := indexV3Columns[field.Name]; ok { + // if it is, attach the column name directly + field.FieldContext = telemetrytypes.FieldContextSpan + colName, _ = m.FieldFor(ctx, field) + } else { + // - the context is not provided + // - there are not keys for the field + // - it is not a static field + // - the next best thing to do is see if there is a typo + // and suggest a correction + correction, found := telemetrytypes.SuggestCorrection(field.Name, maps.Keys(keys)) + if found { + // we found a close match, in the error message send the suggestion + return "", errors.Wrapf(err, errors.TypeInvalidInput, errors.CodeInvalidInput, correction) + } else { + // not even a close match, return an error + return "", err + } + } + } else if len(keysForField) == 1 { + // we have a single key for the field, use it + colName, _ = m.FieldFor(ctx, keysForField[0]) + } else { + // select any non-empty value from the keys + args := []string{} + for _, key := range keysForField { + colName, _ = m.FieldFor(ctx, key) + args = append(args, fmt.Sprintf("toString(%s) != '', toString(%s)", colName, colName)) + } + colName = fmt.Sprintf("multiIf(%s)", strings.Join(args, ", ")) + } + } + + return fmt.Sprintf("%s AS `%s`", colName, field.Name), nil +} diff --git a/pkg/telemetrytraces/field_mapper_test.go b/pkg/telemetrytraces/field_mapper_test.go new file mode 100644 index 0000000000..472daadada --- /dev/null +++ b/pkg/telemetrytraces/field_mapper_test.go @@ -0,0 +1,95 @@ +package telemetrytraces + +import ( + "context" + "testing" + + qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" + "github.com/SigNoz/signoz/pkg/types/telemetrytypes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGetFieldKeyName(t *testing.T) { + ctx := context.Background() + + testCases := []struct { + name string + key telemetrytypes.TelemetryFieldKey + expectedResult string + expectedError error + }{ + { + name: "Simple column type - timestamp", + key: telemetrytypes.TelemetryFieldKey{ + Name: "timestamp", + FieldContext: telemetrytypes.FieldContextSpan, + }, + expectedResult: "timestamp", + expectedError: nil, + }, + { + name: "Map column type - string attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "user.id", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeString, + }, + expectedResult: "attributes_string['user.id']", + expectedError: nil, + }, + { + name: "Map column type - number attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "request.size", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeNumber, + }, + expectedResult: "attributes_number['request.size']", + expectedError: nil, + }, + { + name: "Map column type - bool attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "request.success", + FieldContext: telemetrytypes.FieldContextAttribute, + FieldDataType: telemetrytypes.FieldDataTypeBool, + }, + expectedResult: "attributes_bool['request.success']", + expectedError: nil, + }, + { + name: "Map column type - resource attribute", + key: telemetrytypes.TelemetryFieldKey{ + Name: "service.name", + FieldContext: telemetrytypes.FieldContextResource, + }, + expectedResult: "resources_string['service.name']", + expectedError: nil, + }, + { + name: "Non-existent column", + key: telemetrytypes.TelemetryFieldKey{ + Name: "nonexistent_field", + FieldContext: telemetrytypes.FieldContextSpan, + }, + expectedResult: "", + expectedError: qbtypes.ErrColumnNotFound, + }, + } + + fm := NewFieldMapper() + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := fm.FieldFor(ctx, &tc.key) + + if tc.expectedError != nil { + assert.Equal(t, tc.expectedError, err) + } else { + require.NoError(t, err) + assert.Equal(t, tc.expectedResult, result) + } + }) + } +} diff --git a/pkg/types/querybuildertypes/querybuildertypesv5/qb.go b/pkg/types/querybuildertypes/querybuildertypesv5/qb.go index 875496e705..f396dfbdd5 100644 --- a/pkg/types/querybuildertypes/querybuildertypesv5/qb.go +++ b/pkg/types/querybuildertypes/querybuildertypesv5/qb.go @@ -10,19 +10,47 @@ import ( ) var ( - ErrColumnNotFound = errors.Newf(errors.TypeNotFound, errors.CodeNotFound, "column not found") + ErrColumnNotFound = errors.Newf(errors.TypeNotFound, errors.CodeNotFound, "field not found") ErrBetweenValues = errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "(not) between operator requires two values") ErrInValues = errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "(not) in operator requires a list of values") ) -// ConditionBuilder is the interface for building the condition part of the query. -type ConditionBuilder interface { - // GetColumn returns the column for the given key. - GetColumn(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (*schema.Column, error) +type JsonKeyToFieldFunc func(context.Context, *telemetrytypes.TelemetryFieldKey, FilterOperator, any) (string, any) - // GetTableFieldName returns the table field name for the given key. - GetTableFieldName(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (string, error) - - // GetCondition returns the condition for the given key, operator and value. - GetCondition(ctx context.Context, key *telemetrytypes.TelemetryFieldKey, operator FilterOperator, value any, sb *sqlbuilder.SelectBuilder) (string, error) +// FieldMapper maps the telemetry field key to the table field name. +type FieldMapper interface { + // FieldFor returns the field name for the given key. + FieldFor(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (string, error) + // ColumnFor returns the column for the given key. + ColumnFor(ctx context.Context, key *telemetrytypes.TelemetryFieldKey) (*schema.Column, error) + // ColumnExpressionFor returns the column expression for the given key. + ColumnExpressionFor(ctx context.Context, key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemetrytypes.TelemetryFieldKey) (string, error) +} + +// ConditionBuilder builds the condition for the filter. +type ConditionBuilder interface { + // ConditionFor returns the condition for the given key, operator and value. + ConditionFor(ctx context.Context, key *telemetrytypes.TelemetryFieldKey, operator FilterOperator, value any, sb *sqlbuilder.SelectBuilder) (string, error) +} + +type FilterCompiler interface { + // Compile compiles the filter into a sqlbuilder.WhereClause. + Compile(ctx context.Context, filter string) (*sqlbuilder.WhereClause, []string, error) +} + +type AggExprRewriter interface { + // Rewrite rewrites the aggregation expression to be used in the query. + Rewrite(ctx context.Context, expr string) (string, []any, error) +} + +type Statement struct { + Query string + Args []any + Warnings []string +} + +// StatementBuilder builds the query. +type StatementBuilder interface { + // Build builds the query. + Build(ctx context.Context, start, end uint64, requestType RequestType, query QueryBuilderQuery) (*Statement, error) } diff --git a/pkg/types/querybuildertypes/querybuildertypesv5/query.go b/pkg/types/querybuildertypes/querybuildertypesv5/query.go index dc2032890a..070fd5dfa2 100644 --- a/pkg/types/querybuildertypes/querybuildertypesv5/query.go +++ b/pkg/types/querybuildertypes/querybuildertypesv5/query.go @@ -23,7 +23,7 @@ type Result struct { type ExecStats struct { RowsScanned int64 `json:"rowsScanned"` BytesScanned int64 `json:"bytesScanned"` - DurationMs int64 `json:"durationMs"` + DurationMS int64 `json:"durationMs"` } type TimeRange struct{ From, To uint64 } // ms since epoch diff --git a/pkg/types/telemetrytypes/maybe_typo.go b/pkg/types/telemetrytypes/maybe_typo.go new file mode 100644 index 0000000000..f884d0e8a9 --- /dev/null +++ b/pkg/types/telemetrytypes/maybe_typo.go @@ -0,0 +1,111 @@ +package telemetrytypes + +import ( + "fmt" + "strings" +) + +const ( + typoSuggestionThreshold = 0.75 +) + +// levenshteinDistance calculates the edit distance between two strings +func levenshteinDistance(s1, s2 string) int { + s1 = strings.ToLower(s1) + s2 = strings.ToLower(s2) + + if len(s1) == 0 { + return len(s2) + } + if len(s2) == 0 { + return len(s1) + } + + // Create two work vectors of integer distances + v0 := make([]int, len(s2)+1) + v1 := make([]int, len(s2)+1) + + // Initialize v0 (the previous row of distances) + for i := 0; i <= len(s2); i++ { + v0[i] = i + } + + // Calculate each row in the matrix + for i := range len(s1) { + v1[0] = i + 1 + + for j := range len(s2) { + deletionCost := v0[j+1] + 1 + insertionCost := v1[j] + 1 + + var substitutionCost int + if s1[i] == s2[j] { + substitutionCost = v0[j] + } else { + substitutionCost = v0[j] + 1 + } + + v1[j+1] = min(deletionCost, insertionCost, substitutionCost) + } + + // Copy v1 to v0 for next iteration + for j := 0; j <= len(s2); j++ { + v0[j] = v1[j] + } + } + + return v1[len(s2)] +} + +// similarity returns a value between 0 and 1, where 1 means perfect match +func similarity(s1, s2 string) float64 { + maxLen := max(len(s1), len(s2)) + if maxLen == 0 { + return 1.0 // Both strings are empty + } + + distance := levenshteinDistance(s1, s2) + return 1.0 - float64(distance)/float64(maxLen) +} + +func min(a, b, c int) int { + if a < b { + if a < c { + return a + } + return c + } + if b < c { + return b + } + return c +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} + +// SuggestCorrection checks if there are any column names similar to the input +// and returns a suggestion if there's at least 75% similarity +func SuggestCorrection(input string, knownFieldKeys []string) (string, bool) { + + var bestMatch string + bestSimilarity := 0.0 + + for _, columnName := range knownFieldKeys { + sim := similarity(input, columnName) + if sim > bestSimilarity && sim >= typoSuggestionThreshold { + bestSimilarity = sim + bestMatch = columnName + } + } + + if bestSimilarity >= typoSuggestionThreshold { + return fmt.Sprintf("did you mean: %s?", bestMatch), true + } + + return "", false +}