From 3968f11b3d5c098f2a08673fced91a0040cea7ae Mon Sep 17 00:00:00 2001 From: Srikanth Chekuri Date: Thu, 4 Aug 2022 12:38:53 +0530 Subject: [PATCH] feat: improve service map (#1467) * feat: improve service map --- .../TopNav/DateTimeSelection/config.ts | 3 +- .../src/modules/Servicemap/ServiceMap.tsx | 13 ++-- frontend/src/modules/Servicemap/utils.ts | 42 +++++++---- frontend/src/store/actions/serviceMap.ts | 33 ++------- frontend/src/store/actions/types.ts | 7 +- frontend/src/store/reducers/serviceMap.ts | 6 -- .../app/clickhouseReader/options.go | 3 + .../app/clickhouseReader/reader.go | 74 ++++++++++--------- pkg/query-service/app/http_handler.go | 6 +- pkg/query-service/interfaces/interface.go | 3 +- pkg/query-service/model/response.go | 20 ++--- 11 files changed, 101 insertions(+), 109 deletions(-) diff --git a/frontend/src/container/TopNav/DateTimeSelection/config.ts b/frontend/src/container/TopNav/DateTimeSelection/config.ts index 69bdde40c7..59715d1f86 100644 --- a/frontend/src/container/TopNav/DateTimeSelection/config.ts +++ b/frontend/src/container/TopNav/DateTimeSelection/config.ts @@ -42,8 +42,9 @@ export interface Option { } export const ServiceMapOptions: Option[] = [ - { value: '1min', label: 'Last 1 min' }, { value: '5min', label: 'Last 5 min' }, + { value: '15min', label: 'Last 15 min' }, + { value: '30min', label: 'Last 30 min' }, ]; export const getDefaultOption = (route: string): Time => { diff --git a/frontend/src/modules/Servicemap/ServiceMap.tsx b/frontend/src/modules/Servicemap/ServiceMap.tsx index 03256dde59..7bc44d0d5f 100644 --- a/frontend/src/modules/Servicemap/ServiceMap.tsx +++ b/frontend/src/modules/Servicemap/ServiceMap.tsx @@ -45,6 +45,9 @@ interface graphLink { source: string; target: string; value: number; + callRate: number; + errorRate: number; + p99: number; } export interface graphDataType { nodes: graphNode[]; @@ -96,16 +99,16 @@ function ServiceMap(props: ServiceMapProps): JSX.Element { const graphData = { nodes, links }; return ( - + /> */} d.target} linkDirectionalParticles="value" linkDirectionalParticleSpeed={(d) => d.value} @@ -124,7 +127,7 @@ function ServiceMap(props: ServiceMapProps): JSX.Element { ctx.fillStyle = isDarkMode ? '#ffffff' : '#000000'; ctx.fillText(label, node.x, node.y); }} - onNodeClick={(node) => { + onLinkHover={(node) => { const tooltip = document.querySelector('.graph-tooltip'); if (tooltip && node) { tooltip.innerHTML = getTooltip(node); diff --git a/frontend/src/modules/Servicemap/utils.ts b/frontend/src/modules/Servicemap/utils.ts index 6bec25f8a6..f1da9e3c3a 100644 --- a/frontend/src/modules/Servicemap/utils.ts +++ b/frontend/src/modules/Servicemap/utils.ts @@ -1,12 +1,13 @@ /*eslint-disable*/ //@ts-nocheck -import { cloneDeep, find, maxBy, uniq, uniqBy } from 'lodash-es'; +import { cloneDeep, find, maxBy, uniq, uniqBy, groupBy, sumBy } from 'lodash-es'; import { graphDataType } from './ServiceMap'; const MIN_WIDTH = 10; const MAX_WIDTH = 20; const DEFAULT_FONT_SIZE = 6; + export const getDimensions = (num, highest) => { const percentage = (num / highest) * 100; const width = (percentage * (MAX_WIDTH - MIN_WIDTH)) / 100 + MIN_WIDTH; @@ -18,19 +19,30 @@ export const getDimensions = (num, highest) => { }; export const getGraphData = (serviceMap, isDarkMode): graphDataType => { - const { items, services } = serviceMap; + const { items } = serviceMap; + const services = Object.values(groupBy(items, 'child')).map((e) => { + return { + serviceName: e[0].child, + errorRate: sumBy(e, 'errorRate'), + callRate: sumBy(e, 'callRate'), + } + }); const highestCallCount = maxBy(items, (e) => e?.callCount)?.callCount; const highestCallRate = maxBy(services, (e) => e?.callRate)?.callRate; + const divNum = Number( String(1).padEnd(highestCallCount.toString().length, '0'), ); const links = cloneDeep(items).map((node) => { - const { parent, child, callCount } = node; + const { parent, child, callCount, callRate, errorRate, p99 } = node; return { source: parent, target: child, value: (100 - callCount / divNum) * 0.03, + callRate, + errorRate, + p99, }; }); const uniqParent = uniqBy(cloneDeep(items), 'parent').map((e) => e.parent); @@ -47,15 +59,10 @@ export const getGraphData = (serviceMap, isDarkMode): graphDataType => { width: MIN_WIDTH, color, nodeVal: MIN_WIDTH, - callRate: 0, - errorRate: 0, - p99: 0, }; } if (service.errorRate > 0) { color = isDarkMode ? '#DB836E' : '#F98989'; - } else if (service.fourXXRate > 0) { - color = isDarkMode ? '#C79931' : '#F9DA7B'; } const { fontSize, width } = getDimensions(service.callRate, highestCallRate); return { @@ -65,9 +72,6 @@ export const getGraphData = (serviceMap, isDarkMode): graphDataType => { width, color, nodeVal: width, - callRate: service.callRate.toFixed(2), - errorRate: service.errorRate, - p99: service.p99, }; }); return { @@ -90,25 +94,31 @@ export const getZoomPx = (): number => { return 190; }; -export const getTooltip = (node: { +const getRound2DigitsAfterDecimal = (num: number) => { + if (num === 0) { + return 0; + } + return num.toFixed(20).match(/^-?\d*\.?0*\d{0,2}/)[0]; +} + +export const getTooltip = (link: { p99: number; errorRate: number; callRate: number; id: string; }) => { return `
-
${node.id}
P99 latency:
-
${node.p99 / 1000000}ms
+
${getRound2DigitsAfterDecimal(link.p99/ 1000000)}ms
Request:
-
${node.callRate}/sec
+
${getRound2DigitsAfterDecimal(link.callRate)}/sec
Error Rate:
-
${node.errorRate}%
+
${getRound2DigitsAfterDecimal(link.errorRate)}%
`; }; diff --git a/frontend/src/store/actions/serviceMap.ts b/frontend/src/store/actions/serviceMap.ts index 36d8e5ba97..e3f527fc57 100644 --- a/frontend/src/store/actions/serviceMap.ts +++ b/frontend/src/store/actions/serviceMap.ts @@ -6,26 +6,16 @@ import { ActionTypes } from './types'; export interface ServiceMapStore { items: ServicesMapItem[]; - services: ServicesItem[]; loading: boolean; } -export interface ServicesItem { - serviceName: string; - p99: number; - avgDuration: number; - numCalls: number; - callRate: number; - numErrors: number; - errorRate: number; - num4XX: number; - fourXXRate: number; -} - export interface ServicesMapItem { parent: string; child: string; callCount: number; + callRate: number; + errorRate: number; + p99: number; } export interface ServiceMapItemAction { @@ -33,11 +23,6 @@ export interface ServiceMapItemAction { payload: ServicesMapItem[]; } -export interface ServicesAction { - type: ActionTypes.getServices; - payload: ServicesItem[]; -} - export interface ServiceMapLoading { type: ActionTypes.serviceMapLoading; payload: { @@ -55,19 +40,13 @@ export const getDetailedServiceMapItems = (globalTime: GlobalTime) => { end, tags: [], }; - const [serviceMapDependenciesResponse, response] = await Promise.all([ - api.post(`/serviceMapDependencies`, serviceMapPayload), - api.post(`/services`, serviceMapPayload), + const [dependencyGraphResponse] = await Promise.all([ + api.post(`/dependency_graph`, serviceMapPayload), ]); - dispatch({ - type: ActionTypes.getServices, - payload: response.data, - }); - dispatch({ type: ActionTypes.getServiceMapItems, - payload: serviceMapDependenciesResponse.data, + payload: dependencyGraphResponse.data, }); dispatch({ diff --git a/frontend/src/store/actions/types.ts b/frontend/src/store/actions/types.ts index 702997d49b..96d3f63538 100644 --- a/frontend/src/store/actions/types.ts +++ b/frontend/src/store/actions/types.ts @@ -1,8 +1,4 @@ -import { - ServiceMapItemAction, - ServiceMapLoading, - ServicesAction, -} from './serviceMap'; +import { ServiceMapItemAction, ServiceMapLoading } from './serviceMap'; import { GetUsageDataAction } from './usage'; export enum ActionTypes { @@ -17,6 +13,5 @@ export enum ActionTypes { export type Action = | GetUsageDataAction - | ServicesAction | ServiceMapItemAction | ServiceMapLoading; diff --git a/frontend/src/store/reducers/serviceMap.ts b/frontend/src/store/reducers/serviceMap.ts index 18ec21a9ec..04b724615b 100644 --- a/frontend/src/store/reducers/serviceMap.ts +++ b/frontend/src/store/reducers/serviceMap.ts @@ -2,7 +2,6 @@ import { Action, ActionTypes, ServiceMapStore } from 'store/actions'; const initialState: ServiceMapStore = { items: [], - services: [], loading: true, }; @@ -16,11 +15,6 @@ export const ServiceMapReducer = ( ...state, items: action.payload, }; - case ActionTypes.getServices: - return { - ...state, - services: action.payload, - }; case ActionTypes.serviceMapLoading: { return { ...state, diff --git a/pkg/query-service/app/clickhouseReader/options.go b/pkg/query-service/app/clickhouseReader/options.go index 2e4471fec3..eeca6f89c7 100644 --- a/pkg/query-service/app/clickhouseReader/options.go +++ b/pkg/query-service/app/clickhouseReader/options.go @@ -25,6 +25,7 @@ const ( defaultErrorTable string = "signoz_error_index_v2" defaultDurationTable string = "durationSortMV" defaultSpansTable string = "signoz_spans" + defaultDependencyGraphTable string = "dependency_graph_minutes" defaultTopLevelOperationsTable string = "top_level_operations" defaultWriteBatchDelay time.Duration = 5 * time.Second defaultWriteBatchSize int = 10000 @@ -53,6 +54,7 @@ type namespaceConfig struct { DurationTable string SpansTable string ErrorTable string + DependencyGraphTable string TopLevelOperationsTable string WriteBatchDelay time.Duration WriteBatchSize int @@ -113,6 +115,7 @@ func NewOptions(datasource string, primaryNamespace string, otherNamespaces ...s ErrorTable: defaultErrorTable, DurationTable: defaultDurationTable, SpansTable: defaultSpansTable, + DependencyGraphTable: defaultDependencyGraphTable, TopLevelOperationsTable: defaultTopLevelOperationsTable, WriteBatchDelay: defaultWriteBatchDelay, WriteBatchSize: defaultWriteBatchSize, diff --git a/pkg/query-service/app/clickhouseReader/reader.go b/pkg/query-service/app/clickhouseReader/reader.go index da0d0fdb27..d5666b45ef 100644 --- a/pkg/query-service/app/clickhouseReader/reader.go +++ b/pkg/query-service/app/clickhouseReader/reader.go @@ -83,6 +83,7 @@ type ClickHouseReader struct { indexTable string errorTable string spansTable string + dependencyGraphTable string topLevelOperationsTable string queryEngine *promql.Engine remoteStorage *remote.Storage @@ -121,6 +122,7 @@ func NewReader(localDB *sqlx.DB, configFile string) *ClickHouseReader { errorTable: options.primary.ErrorTable, durationTable: options.primary.DurationTable, spansTable: options.primary.SpansTable, + dependencyGraphTable: options.primary.DependencyGraphTable, topLevelOperationsTable: options.primary.TopLevelOperationsTable, promConfigFile: configFile, } @@ -1698,48 +1700,50 @@ func interfaceArrayToStringArray(array []interface{}) []string { return strArray } -func (r *ClickHouseReader) GetServiceMapDependencies(ctx context.Context, queryParams *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error) { - serviceMapDependencyItems := []model.ServiceMapDependencyItem{} +func (r *ClickHouseReader) GetDependencyGraph(ctx context.Context, queryParams *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error) { - query := fmt.Sprintf(`SELECT spanID, parentSpanID, serviceName FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s'`, r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10)) + response := []model.ServiceMapDependencyResponseItem{} - err := r.db.Select(ctx, &serviceMapDependencyItems, query) + args := []interface{}{} + args = append(args, + clickhouse.Named("start", uint64(queryParams.Start.Unix())), + clickhouse.Named("end", uint64(queryParams.End.Unix())), + clickhouse.Named("duration", uint64(queryParams.End.Unix()-queryParams.Start.Unix())), + ) - zap.S().Info(query) + query := fmt.Sprintf(` + WITH + quantilesMergeState(0.5, 0.75, 0.9, 0.95, 0.99)(duration_quantiles_state) AS duration_quantiles_state, + finalizeAggregation(duration_quantiles_state) AS result + SELECT + src as parent, + dest as child, + result[1] AS p50, + result[2] AS p75, + result[3] AS p90, + result[4] AS p95, + result[5] AS p99, + sum(total_count) as callCount, + sum(total_count)/ @duration AS callRate, + sum(error_count)/sum(total_count) as errorRate + FROM %s.%s + WHERE toUInt64(toDateTime(timestamp)) >= @start AND toUInt64(toDateTime(timestamp)) <= @end + GROUP BY + src, + dest`, + r.traceDB, r.dependencyGraphTable, + ) + + zap.S().Debug(query, args) + + err := r.db.Select(ctx, &response, query, args...) if err != nil { - zap.S().Debug("Error in processing sql query: ", err) + zap.S().Error("Error in processing sql query: ", err) return nil, fmt.Errorf("Error in processing sql query") } - serviceMap := make(map[string]*model.ServiceMapDependencyResponseItem) - - spanId2ServiceNameMap := make(map[string]string) - for i := range serviceMapDependencyItems { - spanId2ServiceNameMap[serviceMapDependencyItems[i].SpanId] = serviceMapDependencyItems[i].ServiceName - } - for i := range serviceMapDependencyItems { - parent2childServiceName := spanId2ServiceNameMap[serviceMapDependencyItems[i].ParentSpanId] + "-" + spanId2ServiceNameMap[serviceMapDependencyItems[i].SpanId] - if _, ok := serviceMap[parent2childServiceName]; !ok { - serviceMap[parent2childServiceName] = &model.ServiceMapDependencyResponseItem{ - Parent: spanId2ServiceNameMap[serviceMapDependencyItems[i].ParentSpanId], - Child: spanId2ServiceNameMap[serviceMapDependencyItems[i].SpanId], - CallCount: 1, - } - } else { - serviceMap[parent2childServiceName].CallCount++ - } - } - - retMe := make([]model.ServiceMapDependencyResponseItem, 0, len(serviceMap)) - for _, dependency := range serviceMap { - if dependency.Parent == "" { - continue - } - retMe = append(retMe, *dependency) - } - - return &retMe, nil + return &response, nil } func (r *ClickHouseReader) GetFilteredSpansAggregates(ctx context.Context, queryParams *model.GetFilteredSpanAggregatesParams) (*model.GetFilteredSpansAggregatesResponse, *model.ApiError) { @@ -1979,7 +1983,7 @@ func (r *ClickHouseReader) SetTTL(ctx context.Context, switch params.Type { case constants.TraceTTL: - tableNameArray := []string{signozTraceDBName + "." + signozTraceTableName, signozTraceDBName + "." + signozDurationMVTable, signozTraceDBName + "." + signozSpansTable, signozTraceDBName + "." + signozErrorIndexTable} + tableNameArray := []string{signozTraceDBName + "." + signozTraceTableName, signozTraceDBName + "." + signozDurationMVTable, signozTraceDBName + "." + signozSpansTable, signozTraceDBName + "." + signozErrorIndexTable, signozTraceDBName + "." + defaultDependencyGraphTable} for _, tableName = range tableNameArray { statusItem, err := r.checkTTLStatusItem(ctx, tableName) if err != nil { diff --git a/pkg/query-service/app/http_handler.go b/pkg/query-service/app/http_handler.go index efb5316b87..343b5beec7 100644 --- a/pkg/query-service/app/http_handler.go +++ b/pkg/query-service/app/http_handler.go @@ -326,7 +326,7 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router) { router.HandleFunc("/api/v1/service/top_level_operations", ViewAccess(aH.getServicesTopLevelOps)).Methods(http.MethodPost) router.HandleFunc("/api/v1/traces/{traceId}", ViewAccess(aH.searchTraces)).Methods(http.MethodGet) router.HandleFunc("/api/v1/usage", ViewAccess(aH.getUsage)).Methods(http.MethodGet) - router.HandleFunc("/api/v1/serviceMapDependencies", ViewAccess(aH.serviceMapDependencies)).Methods(http.MethodPost) + router.HandleFunc("/api/v1/dependency_graph", ViewAccess(aH.dependencyGraph)).Methods(http.MethodPost) router.HandleFunc("/api/v1/settings/ttl", AdminAccess(aH.setTTL)).Methods(http.MethodPost) router.HandleFunc("/api/v1/settings/ttl", ViewAccess(aH.getTTL)).Methods(http.MethodGet) @@ -1185,14 +1185,14 @@ func (aH *APIHandler) getServices(w http.ResponseWriter, r *http.Request) { aH.writeJSON(w, r, result) } -func (aH *APIHandler) serviceMapDependencies(w http.ResponseWriter, r *http.Request) { +func (aH *APIHandler) dependencyGraph(w http.ResponseWriter, r *http.Request) { query, err := parseGetServicesRequest(r) if aH.handleError(w, err, http.StatusBadRequest) { return } - result, err := (*aH.reader).GetServiceMapDependencies(r.Context(), query) + result, err := (*aH.reader).GetDependencyGraph(r.Context(), query) if aH.handleError(w, err, http.StatusBadRequest) { return } diff --git a/pkg/query-service/interfaces/interface.go b/pkg/query-service/interfaces/interface.go index 76830c67d8..14bc4b5d63 100644 --- a/pkg/query-service/interfaces/interface.go +++ b/pkg/query-service/interfaces/interface.go @@ -25,7 +25,8 @@ type Reader interface { GetTopOperations(ctx context.Context, query *model.GetTopOperationsParams) (*[]model.TopOperationsItem, *model.ApiError) GetUsage(ctx context.Context, query *model.GetUsageParams) (*[]model.UsageItem, error) GetServicesList(ctx context.Context) (*[]string, error) - GetServiceMapDependencies(ctx context.Context, query *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error) + GetDependencyGraph(ctx context.Context, query *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error) + GetTTL(ctx context.Context, ttlParams *model.GetTTLParams) (*model.GetTTLResponseItem, *model.ApiError) // GetDisks returns a list of disks configured in the underlying DB. It is supported by diff --git a/pkg/query-service/model/response.go b/pkg/query-service/model/response.go index 9bb35d1d55..06ee6e6e2f 100644 --- a/pkg/query-service/model/response.go +++ b/pkg/query-service/model/response.go @@ -206,12 +206,6 @@ func (item *SearchSpanReponseItem) GetValues() []interface{} { return returnArray } -type ServiceMapDependencyItem struct { - SpanId string `json:"spanId,omitempty" ch:"spanID"` - ParentSpanId string `json:"parentSpanId,omitempty" ch:"parentSpanID"` - ServiceName string `json:"serviceName,omitempty" ch:"serviceName"` -} - type UsageItem struct { Time time.Time `json:"time,omitempty" ch:"time"` Timestamp uint64 `json:"timestamp" ch:"timestamp"` @@ -233,10 +227,18 @@ type TagFilters struct { type TagValues struct { TagValues string `json:"tagValues" ch:"tagValues"` } + type ServiceMapDependencyResponseItem struct { - Parent string `json:"parent,omitempty" ch:"parent"` - Child string `json:"child,omitempty" ch:"child"` - CallCount int `json:"callCount,omitempty" ch:"callCount"` + Parent string `json:"parent" ch:"parent"` + Child string `json:"child" ch:"child"` + CallCount uint64 `json:"callCount" ch:"callCount"` + CallRate float64 `json:"callRate" ch:"callRate"` + ErrorRate float64 `json:"errorRate" ch:"errorRate"` + P99 float64 `json:"p99" ch:"p99"` + P95 float64 `json:"p95" ch:"p95"` + P90 float64 `json:"p90" ch:"p90"` + P75 float64 `json:"p75" ch:"p75"` + P50 float64 `json:"p50" ch:"p50"` } type GetFilteredSpansAggregatesResponse struct {