From 96162d7949b47cbcbda6d89232fbfe72b368905a Mon Sep 17 00:00:00 2001 From: Vishal Sharma Date: Mon, 27 May 2024 17:20:45 +0530 Subject: [PATCH] fix: handle-large-traces (#4903) * fix: handle-large-traces * feat: add isSubTree key to identify subTrees Show user a loom video explaining how to navigate large spans * chore: update icon to warning * chore: fire telemetry events for all trace detail API calls, large traces * chore: update MAX_SPANS_IN_TRACE to 250k --- ee/query-service/app/api/traces.go | 12 +--- ee/query-service/app/db/trace.go | 6 ++ ee/query-service/constants/constants.go | 3 +- .../container/TraceDetail/Missingtrace.tsx | 37 ++++------- .../src/container/TraceDetail/SubTree.tsx | 22 +++++++ frontend/src/container/TraceDetail/index.tsx | 4 +- frontend/src/types/api/trace/getTraceItem.ts | 1 + .../app/clickhouseReader/reader.go | 53 +++++++++++++-- pkg/query-service/app/http_handler.go | 4 +- pkg/query-service/app/parser.go | 65 ++++++++++++------- pkg/query-service/interfaces/interface.go | 2 +- pkg/query-service/model/queryParams.go | 9 +++ pkg/query-service/model/response.go | 5 +- pkg/query-service/telemetry/telemetry.go | 6 ++ 14 files changed, 161 insertions(+), 68 deletions(-) create mode 100644 frontend/src/container/TraceDetail/SubTree.tsx diff --git a/ee/query-service/app/api/traces.go b/ee/query-service/app/api/traces.go index ee18b2f50b..3864fc672e 100644 --- a/ee/query-service/app/api/traces.go +++ b/ee/query-service/app/api/traces.go @@ -2,10 +2,8 @@ package api import ( "net/http" - "strconv" "go.signoz.io/signoz/ee/query-service/app/db" - "go.signoz.io/signoz/ee/query-service/constants" "go.signoz.io/signoz/ee/query-service/model" baseapp "go.signoz.io/signoz/pkg/query-service/app" basemodel "go.signoz.io/signoz/pkg/query-service/model" @@ -19,17 +17,13 @@ func (ah *APIHandler) searchTraces(w http.ResponseWriter, r *http.Request) { ah.APIHandler.SearchTraces(w, r) return } - traceId, spanId, levelUpInt, levelDownInt, err := baseapp.ParseSearchTracesParams(r) + searchTracesParams, err := baseapp.ParseSearchTracesParams(r) if err != nil { RespondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, "Error reading params") return } - spanLimit, err := strconv.Atoi(constants.SpanLimitStr) - if err != nil { - zap.L().Error("Error during strconv.Atoi() on SPAN_LIMIT env variable", zap.Error(err)) - return - } - result, err := ah.opts.DataConnector.SearchTraces(r.Context(), traceId, spanId, levelUpInt, levelDownInt, spanLimit, db.SmartTraceAlgorithm) + + result, err := ah.opts.DataConnector.SearchTraces(r.Context(), searchTracesParams, db.SmartTraceAlgorithm) if ah.HandleError(w, err, http.StatusBadRequest) { return } diff --git a/ee/query-service/app/db/trace.go b/ee/query-service/app/db/trace.go index c6fe9045cf..dec222a09c 100644 --- a/ee/query-service/app/db/trace.go +++ b/ee/query-service/app/db/trace.go @@ -13,6 +13,11 @@ import ( func SmartTraceAlgorithm(payload []basemodel.SearchSpanResponseItem, targetSpanId string, levelUp int, levelDown int, spanLimit int) ([]basemodel.SearchSpansResult, error) { var spans []*model.SpanForTraceDetails + // if targetSpanId is null or not present then randomly select a span as targetSpanId + if (targetSpanId == "" || targetSpanId == "null") && len(payload) > 0 { + targetSpanId = payload[0].SpanID + } + // Build a slice of spans from the payload for _, spanItem := range payload { var parentID string @@ -115,6 +120,7 @@ func SmartTraceAlgorithm(payload []basemodel.SearchSpanResponseItem, targetSpanI searchSpansResult := []basemodel.SearchSpansResult{{ Columns: []string{"__time", "SpanId", "TraceId", "ServiceName", "Name", "Kind", "DurationNano", "TagsKeys", "TagsValues", "References", "Events", "HasError"}, Events: make([][]interface{}, len(resultSpansSet)), + IsSubTree: true, }, } diff --git a/ee/query-service/constants/constants.go b/ee/query-service/constants/constants.go index aeeea03cf2..cc4bb07476 100644 --- a/ee/query-service/constants/constants.go +++ b/ee/query-service/constants/constants.go @@ -11,7 +11,8 @@ const ( var LicenseSignozIo = "https://license.signoz.io/api/v1" var LicenseAPIKey = GetOrDefaultEnv("SIGNOZ_LICENSE_API_KEY", "") var SaasSegmentKey = GetOrDefaultEnv("SIGNOZ_SAAS_SEGMENT_KEY", "") -var SpanLimitStr = GetOrDefaultEnv("SPAN_LIMIT", "5000") +var SpanRenderLimitStr = GetOrDefaultEnv("SPAN_RENDER_LIMIT", "2500") +var MaxSpansInTraceStr = GetOrDefaultEnv("MAX_SPANS_IN_TRACE", "250000") func GetOrDefaultEnv(key string, fallback string) string { v := os.Getenv(key) diff --git a/frontend/src/container/TraceDetail/Missingtrace.tsx b/frontend/src/container/TraceDetail/Missingtrace.tsx index 4375fd95d3..1871342e91 100644 --- a/frontend/src/container/TraceDetail/Missingtrace.tsx +++ b/frontend/src/container/TraceDetail/Missingtrace.tsx @@ -1,30 +1,21 @@ import { volcano } from '@ant-design/colors'; -import { InfoCircleOutlined } from '@ant-design/icons'; -import { Popover, Typography } from 'antd'; - -function PopOverContent(): JSX.Element { - return ( -
- More details on missing spans{' '} - - here - -
- ); -} +import { WarningOutlined } from '@ant-design/icons'; +import { Typography } from 'antd'; function MissingSpansMessage(): JSX.Element { return ( - - - - This trace has missing spans - - + + + This trace has missing spans, more details{' '} + + here + + ); } diff --git a/frontend/src/container/TraceDetail/SubTree.tsx b/frontend/src/container/TraceDetail/SubTree.tsx new file mode 100644 index 0000000000..af801f6de5 --- /dev/null +++ b/frontend/src/container/TraceDetail/SubTree.tsx @@ -0,0 +1,22 @@ +import { volcano } from '@ant-design/colors'; +import { WarningOutlined } from '@ant-design/icons'; +import { Typography } from 'antd'; + +function SubTreeMessage(): JSX.Element { + return ( + + + Only part of trace is shown, for more info{' '} + + watch this + + + ); +} + +export default SubTreeMessage; diff --git a/frontend/src/container/TraceDetail/index.tsx b/frontend/src/container/TraceDetail/index.tsx index bdf857bec7..67b55a1bca 100644 --- a/frontend/src/container/TraceDetail/index.tsx +++ b/frontend/src/container/TraceDetail/index.tsx @@ -33,6 +33,7 @@ import MissingSpansMessage from './Missingtrace'; import SelectedSpanDetails from './SelectedSpanDetails'; import * as styles from './styles'; import { FlameGraphMissingSpansContainer, GanttChartWrapper } from './styles'; +import SubTreeMessage from './SubTree'; import { formUrlParams, getSortedData, @@ -142,9 +143,10 @@ function TraceDetail({ response }: TraceDetailProps): JSX.Element { Trace Details - {traceMetaData.totalSpans} Span + {traceMetaData.totalSpans} Spans {hasMissingSpans && } + {response[0]?.isSubTree && } {map(tree.spanTree, (tree) => ( diff --git a/frontend/src/types/api/trace/getTraceItem.ts b/frontend/src/types/api/trace/getTraceItem.ts index 2e541df929..4a6def3896 100644 --- a/frontend/src/types/api/trace/getTraceItem.ts +++ b/frontend/src/types/api/trace/getTraceItem.ts @@ -14,6 +14,7 @@ export interface PayloadProps { events: Span[]; segmentID: string; columns: string[]; + isSubTree: boolean; }; } diff --git a/pkg/query-service/app/clickhouseReader/reader.go b/pkg/query-service/app/clickhouseReader/reader.go index 535aaee237..35dfb3879e 100644 --- a/pkg/query-service/app/clickhouseReader/reader.go +++ b/pkg/query-service/app/clickhouseReader/reader.go @@ -1953,7 +1953,39 @@ func (r *ClickHouseReader) GetUsage(ctx context.Context, queryParams *model.GetU return &usageItems, nil } -func (r *ClickHouseReader) SearchTraces(ctx context.Context, traceId string, spanId string, levelUp int, levelDown int, spanLimit int, smartTraceAlgorithm func(payload []model.SearchSpanResponseItem, targetSpanId string, levelUp int, levelDown int, spanLimit int) ([]model.SearchSpansResult, error)) (*[]model.SearchSpansResult, error) { +func (r *ClickHouseReader) SearchTraces(ctx context.Context, params *model.SearchTracesParams, + smartTraceAlgorithm func(payload []model.SearchSpanResponseItem, targetSpanId string, + levelUp int, levelDown int, spanLimit int) ([]model.SearchSpansResult, error)) (*[]model.SearchSpansResult, error) { + + var countSpans uint64 + countQuery := fmt.Sprintf("SELECT count() as count from %s.%s WHERE traceID=$1", r.TraceDB, r.SpansTable) + err := r.db.QueryRow(ctx, countQuery, params.TraceID).Scan(&countSpans) + if err != nil { + zap.L().Error("Error in processing sql query", zap.Error(err)) + return nil, fmt.Errorf("error in processing sql query") + } + + if countSpans > uint64(params.MaxSpansInTrace) { + zap.L().Error("Max spans allowed in a trace limit reached", zap.Int("MaxSpansInTrace", params.MaxSpansInTrace), + zap.Uint64("Count", countSpans)) + userEmail, err := auth.GetEmailFromJwt(ctx) + if err == nil { + data := map[string]interface{}{ + "traceSize": countSpans, + "maxSpansInTraceLimit": params.MaxSpansInTrace, + } + telemetry.GetInstance().SendEvent(telemetry.TELEMETRY_EVENT_MAX_SPANS_ALLOWED_LIMIT_REACHED, data, userEmail, true, false) + } + return nil, fmt.Errorf("Max spans allowed in trace limit reached, please contact support for more details") + } + + userEmail, err := auth.GetEmailFromJwt(ctx) + if err == nil { + data := map[string]interface{}{ + "traceSize": countSpans, + } + telemetry.GetInstance().SendEvent(telemetry.TELEMETRY_EVENT_TRACE_DETAIL_API, data, userEmail, true, false) + } var searchScanResponses []model.SearchSpanDBResponseItem @@ -1961,7 +1993,7 @@ func (r *ClickHouseReader) SearchTraces(ctx context.Context, traceId string, spa start := time.Now() - err := r.db.Select(ctx, &searchScanResponses, query, traceId) + err = r.db.Select(ctx, &searchScanResponses, query, params.TraceID) zap.L().Info(query) @@ -1972,8 +2004,9 @@ func (r *ClickHouseReader) SearchTraces(ctx context.Context, traceId string, spa end := time.Now() zap.L().Debug("getTraceSQLQuery took: ", zap.Duration("duration", end.Sub(start))) searchSpansResult := []model.SearchSpansResult{{ - Columns: []string{"__time", "SpanId", "TraceId", "ServiceName", "Name", "Kind", "DurationNano", "TagsKeys", "TagsValues", "References", "Events", "HasError"}, - Events: make([][]interface{}, len(searchScanResponses)), + Columns: []string{"__time", "SpanId", "TraceId", "ServiceName", "Name", "Kind", "DurationNano", "TagsKeys", "TagsValues", "References", "Events", "HasError"}, + Events: make([][]interface{}, len(searchScanResponses)), + IsSubTree: false, }, } @@ -1990,14 +2023,22 @@ func (r *ClickHouseReader) SearchTraces(ctx context.Context, traceId string, spa err = r.featureFlags.CheckFeature(model.SmartTraceDetail) smartAlgoEnabled := err == nil - if len(searchScanResponses) > spanLimit && spanId != "" && smartAlgoEnabled { + if len(searchScanResponses) > params.SpansRenderLimit && smartAlgoEnabled { start = time.Now() - searchSpansResult, err = smartTraceAlgorithm(searchSpanResponses, spanId, levelUp, levelDown, spanLimit) + searchSpansResult, err = smartTraceAlgorithm(searchSpanResponses, params.SpanID, params.LevelUp, params.LevelDown, params.SpansRenderLimit) if err != nil { return nil, err } end = time.Now() zap.L().Debug("smartTraceAlgo took: ", zap.Duration("duration", end.Sub(start))) + userEmail, err := auth.GetEmailFromJwt(ctx) + if err == nil { + data := map[string]interface{}{ + "traceSize": len(searchScanResponses), + "spansRenderLimit": params.SpansRenderLimit, + } + telemetry.GetInstance().SendEvent(telemetry.TELEMETRY_EVENT_LARGE_TRACE_OPENED, data, userEmail, true, false) + } } else { for i, item := range searchSpanResponses { spanEvents := item.GetValues() diff --git a/pkg/query-service/app/http_handler.go b/pkg/query-service/app/http_handler.go index 4d63c887fd..5da041edc7 100644 --- a/pkg/query-service/app/http_handler.go +++ b/pkg/query-service/app/http_handler.go @@ -1321,13 +1321,13 @@ func (aH *APIHandler) getServicesList(w http.ResponseWriter, r *http.Request) { func (aH *APIHandler) SearchTraces(w http.ResponseWriter, r *http.Request) { - traceId, spanId, levelUpInt, levelDownInt, err := ParseSearchTracesParams(r) + params, err := ParseSearchTracesParams(r) if err != nil { RespondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, "Error reading params") return } - result, err := aH.reader.SearchTraces(r.Context(), traceId, spanId, levelUpInt, levelDownInt, 0, nil) + result, err := aH.reader.SearchTraces(r.Context(), params, nil) if aH.HandleError(w, err, http.StatusBadRequest) { return } diff --git a/pkg/query-service/app/parser.go b/pkg/query-service/app/parser.go index 0e5177bea8..679b7581aa 100644 --- a/pkg/query-service/app/parser.go +++ b/pkg/query-service/app/parser.go @@ -17,11 +17,12 @@ import ( promModel "github.com/prometheus/common/model" "go.uber.org/multierr" + "go.signoz.io/signoz/ee/query-service/constants" "go.signoz.io/signoz/pkg/query-service/app/metrics" "go.signoz.io/signoz/pkg/query-service/app/queryBuilder" "go.signoz.io/signoz/pkg/query-service/auth" + baseconstants "go.signoz.io/signoz/pkg/query-service/constants" "go.signoz.io/signoz/pkg/query-service/common" - "go.signoz.io/signoz/pkg/query-service/constants" "go.signoz.io/signoz/pkg/query-service/model" v3 "go.signoz.io/signoz/pkg/query-service/model/v3" "go.signoz.io/signoz/pkg/query-service/postprocess" @@ -250,28 +251,46 @@ func parseGetServicesRequest(r *http.Request) (*model.GetServicesParams, error) return postData, nil } -func ParseSearchTracesParams(r *http.Request) (string, string, int, int, error) { +func ParseSearchTracesParams(r *http.Request) (*model.SearchTracesParams, error) { vars := mux.Vars(r) - traceId := vars["traceId"] - spanId := r.URL.Query().Get("spanId") - levelUp := r.URL.Query().Get("levelUp") - levelDown := r.URL.Query().Get("levelDown") - if levelUp == "" || levelUp == "null" { - levelUp = "0" + params := &model.SearchTracesParams{} + params.TraceID = vars["traceId"] + params.SpanID = r.URL.Query().Get("spanId") + + levelUpStr := r.URL.Query().Get("levelUp") + levelDownStr := r.URL.Query().Get("levelDown") + SpanRenderLimitStr := r.URL.Query().Get("spanRenderLimit") + if levelUpStr == "" || levelUpStr == "null" { + levelUpStr = "0" } - if levelDown == "" || levelDown == "null" { - levelDown = "0" + if levelDownStr == "" || levelDownStr == "null" { + levelDownStr = "0" + } + if SpanRenderLimitStr == "" || SpanRenderLimitStr == "null" { + SpanRenderLimitStr = constants.SpanRenderLimitStr } - levelUpInt, err := strconv.Atoi(levelUp) + levelUpInt, err := strconv.Atoi(levelUpStr) if err != nil { - return "", "", 0, 0, err + return nil, err } - levelDownInt, err := strconv.Atoi(levelDown) + levelDownInt, err := strconv.Atoi(levelDownStr) if err != nil { - return "", "", 0, 0, err + return nil, err } - return traceId, spanId, levelUpInt, levelDownInt, nil + SpanRenderLimitInt, err := strconv.Atoi(SpanRenderLimitStr) + if err != nil { + return nil, err + } + MaxSpansInTraceInt, err := strconv.Atoi(constants.MaxSpansInTraceStr) + if err != nil { + return nil, err + } + params.LevelUp = levelUpInt + params.LevelDown = levelDownInt + params.SpansRenderLimit = SpanRenderLimitInt + params.MaxSpansInTrace = MaxSpansInTraceInt + return params, nil } func DoesExistInSlice(item string, list []string) bool { @@ -327,16 +346,16 @@ func parseFilteredSpansRequest(r *http.Request, aH *APIHandler) (*model.GetFilte } if len(postData.Order) != 0 { - if postData.Order != constants.Ascending && postData.Order != constants.Descending { + if postData.Order != baseconstants.Ascending && postData.Order != baseconstants.Descending { return nil, errors.New("order param is not in correct format") } - if postData.OrderParam != constants.Duration && postData.OrderParam != constants.Timestamp { + if postData.OrderParam != baseconstants.Duration && postData.OrderParam != baseconstants.Timestamp { return nil, errors.New("order param is not in correct format") } - if postData.OrderParam == constants.Duration && !aH.CheckFeature(constants.DurationSort) { - return nil, model.ErrFeatureUnavailable{Key: constants.DurationSort} - } else if postData.OrderParam == constants.Timestamp && !aH.CheckFeature(constants.TimestampSort) { - return nil, model.ErrFeatureUnavailable{Key: constants.TimestampSort} + if postData.OrderParam == baseconstants.Duration && !aH.CheckFeature(baseconstants.DurationSort) { + return nil, model.ErrFeatureUnavailable{Key: baseconstants.DurationSort} + } else if postData.OrderParam == baseconstants.Timestamp && !aH.CheckFeature(baseconstants.TimestampSort) { + return nil, model.ErrFeatureUnavailable{Key: baseconstants.TimestampSort} } } tags, err := extractTagKeys(postData.Tags) @@ -676,7 +695,7 @@ func parseTTLParams(r *http.Request) (*model.TTLParams, error) { } // Validate the type parameter - if typeTTL != constants.TraceTTL && typeTTL != constants.MetricsTTL && typeTTL != constants.LogsTTL { + if typeTTL != baseconstants.TraceTTL && typeTTL != baseconstants.MetricsTTL && typeTTL != baseconstants.LogsTTL { return nil, fmt.Errorf("type param should be metrics|traces|logs, got %v", typeTTL) } @@ -715,7 +734,7 @@ func parseGetTTL(r *http.Request) (*model.GetTTLParams, error) { return nil, fmt.Errorf("type param cannot be empty from the query") } else { // Validate the type parameter - if typeTTL != constants.TraceTTL && typeTTL != constants.MetricsTTL && typeTTL != constants.LogsTTL { + if typeTTL != baseconstants.TraceTTL && typeTTL != baseconstants.MetricsTTL && typeTTL != baseconstants.LogsTTL { return nil, fmt.Errorf("type param should be metrics|traces|logs, got %v", typeTTL) } } diff --git a/pkg/query-service/interfaces/interface.go b/pkg/query-service/interfaces/interface.go index 4c89f6f793..a0631eb70a 100644 --- a/pkg/query-service/interfaces/interface.go +++ b/pkg/query-service/interfaces/interface.go @@ -52,7 +52,7 @@ type Reader interface { GetNextPrevErrorIDs(ctx context.Context, params *model.GetErrorParams) (*model.NextPrevErrorIDs, *model.ApiError) // Search Interfaces - SearchTraces(ctx context.Context, traceID string, spanId string, levelUp int, levelDown int, spanLimit int, smartTraceAlgorithm func(payload []model.SearchSpanResponseItem, targetSpanId string, levelUp int, levelDown int, spanLimit int) ([]model.SearchSpansResult, error)) (*[]model.SearchSpansResult, error) + SearchTraces(ctx context.Context, params *model.SearchTracesParams, smartTraceAlgorithm func(payload []model.SearchSpanResponseItem, targetSpanId string, levelUp int, levelDown int, spanLimit int) ([]model.SearchSpansResult, error)) (*[]model.SearchSpansResult, error) // Setter Interfaces SetTTL(ctx context.Context, ttlParams *model.TTLParams) (*model.SetTTLResponseItem, *model.ApiError) diff --git a/pkg/query-service/model/queryParams.go b/pkg/query-service/model/queryParams.go index 5f9594e299..321f7417be 100644 --- a/pkg/query-service/model/queryParams.go +++ b/pkg/query-service/model/queryParams.go @@ -420,6 +420,15 @@ type GetFilteredSpanAggregatesParams struct { End *time.Time } +type SearchTracesParams struct { + TraceID string `json:"traceId"` + LevelUp int `json:"levelUp"` + LevelDown int `json:"levelDown"` + SpanID string `json:"spanId"` + SpansRenderLimit int `json:"spansRenderLimit"` + MaxSpansInTrace int `json:"maxSpansInTrace"` +} + type SpanFilterParams struct { TraceID []string `json:"traceID"` Status []string `json:"status"` diff --git a/pkg/query-service/model/response.go b/pkg/query-service/model/response.go index d34108cdf8..7a3d948ebb 100644 --- a/pkg/query-service/model/response.go +++ b/pkg/query-service/model/response.go @@ -212,8 +212,9 @@ type ServiceOverviewItem struct { } type SearchSpansResult struct { - Columns []string `json:"columns"` - Events [][]interface{} `json:"events"` + Columns []string `json:"columns"` + Events [][]interface{} `json:"events"` + IsSubTree bool `json:"isSubTree"` } type GetFilterSpansResponseItem struct { diff --git a/pkg/query-service/telemetry/telemetry.go b/pkg/query-service/telemetry/telemetry.go index e4eabe6f51..3625a3c9e2 100644 --- a/pkg/query-service/telemetry/telemetry.go +++ b/pkg/query-service/telemetry/telemetry.go @@ -37,6 +37,9 @@ const ( TELEMETRY_EVENT_LANGUAGE = "Language" TELEMETRY_EVENT_SERVICE = "ServiceName" TELEMETRY_EVENT_LOGS_FILTERS = "Logs Filters" + TELEMETRY_EVENT_LARGE_TRACE_OPENED = "Large Trace Opened" + TELEMETRY_EVENT_TRACE_DETAIL_API = "Trace Detail API" + TELEMETRY_EVENT_MAX_SPANS_ALLOWED_LIMIT_REACHED = "Max spans in a trace limit reached" TELEMETRY_EVENT_DISTRIBUTED = "Distributed" TELEMETRY_EVENT_QUERY_RANGE_API = "Query Range API" TELEMETRY_EVENT_DASHBOARDS_ALERTS = "Dashboards/Alerts Info" @@ -61,6 +64,9 @@ var SAAS_EVENTS_LIST = map[string]struct{}{ TELEMETRY_EVENT_SUCCESSFUL_DASHBOARD_PANEL_QUERY: {}, TELEMETRY_EVENT_SUCCESSFUL_ALERT_QUERY: {}, TELEMETRY_EVENT_QUERY_RANGE_API: {}, + TELEMETRY_EVENT_MAX_SPANS_ALLOWED_LIMIT_REACHED: {}, + TELEMETRY_EVENT_LARGE_TRACE_OPENED: {}, + TELEMETRY_EVENT_TRACE_DETAIL_API: {}, } const api_key = "4Gmoa4ixJAUHx2BpJxsjwA1bEfnwEeRz"