From 5bfc2af51b09913896f2c62894f9f47874ba18d3 Mon Sep 17 00:00:00 2001 From: Srikanth Chekuri Date: Thu, 4 Aug 2022 11:57:05 +0530 Subject: [PATCH] feat: show messaging/cron/browser services in listing page (#1455) * feat: show messaging/cron/browser services in listing page * chore: issue maximum of ten queries to clickhouse Co-authored-by: Palash Gupta --- .../src/api/metrics/getTopLevelOperations.ts | 24 ++ ...getTopEndPoints.ts => getTopOperations.ts} | 8 +- .../MetricsApplication/Tabs/Overview.tsx | 32 +- ...pointsTable.tsx => TopOperationsTable.tsx} | 16 +- frontend/src/container/MetricsTable/index.tsx | 4 +- .../store/actions/metrics/getInitialData.ts | 20 +- frontend/src/store/reducers/metric.ts | 9 +- frontend/src/types/actions/metrics.ts | 5 +- .../api/metrics/getTopLevelOperations.ts | 7 + ...getTopEndPoints.ts => getTopOperations.ts} | 4 +- frontend/src/types/reducer/metrics.ts | 5 +- .../app/clickhouseReader/options.go | 75 ++--- .../app/clickhouseReader/reader.go | 303 +++++++++++------- pkg/query-service/app/http_handler.go | 20 +- pkg/query-service/app/parser.go | 8 +- pkg/query-service/config/prometheus.yml | 2 +- pkg/query-service/interfaces/interface.go | 3 +- pkg/query-service/model/queryParams.go | 2 +- pkg/query-service/model/response.go | 30 +- 19 files changed, 372 insertions(+), 205 deletions(-) create mode 100644 frontend/src/api/metrics/getTopLevelOperations.ts rename frontend/src/api/metrics/{getTopEndPoints.ts => getTopOperations.ts} (73%) rename frontend/src/container/MetricsApplication/{TopEndpointsTable.tsx => TopOperationsTable.tsx} (89%) create mode 100644 frontend/src/types/api/metrics/getTopLevelOperations.ts rename frontend/src/types/api/metrics/{getTopEndPoints.ts => getTopOperations.ts} (74%) diff --git a/frontend/src/api/metrics/getTopLevelOperations.ts b/frontend/src/api/metrics/getTopLevelOperations.ts new file mode 100644 index 0000000000..5ecfd2a67a --- /dev/null +++ b/frontend/src/api/metrics/getTopLevelOperations.ts @@ -0,0 +1,24 @@ +import axios from 'api'; +import { ErrorResponseHandler } from 'api/ErrorResponseHandler'; +import { AxiosError } from 'axios'; +import { ErrorResponse, SuccessResponse } from 'types/api'; +import { PayloadProps, Props } from 'types/api/metrics/getTopLevelOperations'; + +const getTopLevelOperations = async ( + props: Props, +): Promise | ErrorResponse> => { + try { + const response = await axios.post(`/service/top_level_operations`); + + return { + statusCode: 200, + error: null, + message: response.data.status, + payload: response.data[props.service], + }; + } catch (error) { + return ErrorResponseHandler(error as AxiosError); + } +}; + +export default getTopLevelOperations; diff --git a/frontend/src/api/metrics/getTopEndPoints.ts b/frontend/src/api/metrics/getTopOperations.ts similarity index 73% rename from frontend/src/api/metrics/getTopEndPoints.ts rename to frontend/src/api/metrics/getTopOperations.ts index db78aae9e3..cf07f0ee5d 100644 --- a/frontend/src/api/metrics/getTopEndPoints.ts +++ b/frontend/src/api/metrics/getTopOperations.ts @@ -2,13 +2,13 @@ import axios from 'api'; import { ErrorResponseHandler } from 'api/ErrorResponseHandler'; import { AxiosError } from 'axios'; import { ErrorResponse, SuccessResponse } from 'types/api'; -import { PayloadProps, Props } from 'types/api/metrics/getTopEndPoints'; +import { PayloadProps, Props } from 'types/api/metrics/getTopOperations'; -const getTopEndPoints = async ( +const getTopOperations = async ( props: Props, ): Promise | ErrorResponse> => { try { - const response = await axios.post(`/service/top_endpoints`, { + const response = await axios.post(`/service/top_operations`, { start: `${props.start}`, end: `${props.end}`, service: props.service, @@ -26,4 +26,4 @@ const getTopEndPoints = async ( } }; -export default getTopEndPoints; +export default getTopOperations; diff --git a/frontend/src/container/MetricsApplication/Tabs/Overview.tsx b/frontend/src/container/MetricsApplication/Tabs/Overview.tsx index 803ed91bcc..2dbf2d33fd 100644 --- a/frontend/src/container/MetricsApplication/Tabs/Overview.tsx +++ b/frontend/src/container/MetricsApplication/Tabs/Overview.tsx @@ -15,7 +15,7 @@ import { PromQLWidgets } from 'types/api/dashboard/getAll'; import MetricReducer from 'types/reducer/metrics'; import { Card, Col, GraphContainer, GraphTitle, Row } from '../styles'; -import TopEndpointsTable from '../TopEndpointsTable'; +import TopOperationsTable from '../TopOperationsTable'; import { Button } from './styles'; function Application({ getWidget }: DashboardProps): JSX.Element { @@ -23,11 +23,13 @@ function Application({ getWidget }: DashboardProps): JSX.Element { const selectedTimeStamp = useRef(0); const { - topEndPoints, + topOperations, serviceOverview, resourceAttributePromQLQuery, resourceAttributeQueries, + topLevelOperations, } = useSelector((state) => state.metrics); + const operationsRegex = topLevelOperations.join('|'); const selectedTraceTags: string = JSON.stringify( convertRawQueriesToTraceSelectedTags(resourceAttributeQueries, 'array') || [], @@ -107,7 +109,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element { - Application latency + Latency { - onClickHandler(ChartEvent, activeElements, chart, data, 'Application'); + onClickHandler(ChartEvent, activeElements, chart, data, 'Service'); }} - name="application_latency" + name="service_latency" type="line" data={{ datasets: [ @@ -175,7 +177,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element { - Requests + Rate (ops/s) { - onClickHandler(event, element, chart, data, 'Request'); + onClickHandler(event, element, chart, data, 'Rate'); }} widget={getWidget([ { - query: `sum(rate(signoz_latency_count{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[5m]))`, - legend: 'Requests', + query: `sum(rate(signoz_latency_count{service_name="${servicename}", operation=~"${operationsRegex}"${resourceAttributePromQLQuery}}[5m]))`, + legend: 'Operations', }, ])} - yAxisUnit="reqps" + yAxisUnit="ops" /> @@ -227,7 +229,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element { }} widget={getWidget([ { - query: `max(sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[5m]) OR rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", http_status_code=~"5.."${resourceAttributePromQLQuery}}[5m]))*100/sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[5m]))) < 1000 OR vector(0)`, + query: `max(sum(rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[5m]) OR rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}", http_status_code=~"5.."${resourceAttributePromQLQuery}}[5m]))*100/sum(rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}"${resourceAttributePromQLQuery}}[5m]))) < 1000 OR vector(0)`, legend: 'Error Percentage', }, ])} @@ -239,7 +241,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element { - + diff --git a/frontend/src/container/MetricsApplication/TopEndpointsTable.tsx b/frontend/src/container/MetricsApplication/TopOperationsTable.tsx similarity index 89% rename from frontend/src/container/MetricsApplication/TopEndpointsTable.tsx rename to frontend/src/container/MetricsApplication/TopOperationsTable.tsx index 5ede2d9c6a..4f91a97781 100644 --- a/frontend/src/container/MetricsApplication/TopEndpointsTable.tsx +++ b/frontend/src/container/MetricsApplication/TopOperationsTable.tsx @@ -11,7 +11,7 @@ import { AppState } from 'store/reducers'; import { GlobalReducer } from 'types/reducer/globalTime'; import MetricReducer from 'types/reducer/metrics'; -function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element { +function TopOperationsTable(props: TopOperationsTableProps): JSX.Element { const { minTime, maxTime } = useSelector( (state) => state.globalTime, ); @@ -85,7 +85,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element { title: 'Number of Calls', dataIndex: 'numCalls', key: 'numCalls', - sorter: (a: TopEndpointListItem, b: TopEndpointListItem): number => + sorter: (a: TopOperationListItem, b: TopOperationListItem): number => a.numCalls - b.numCalls, }, ]; @@ -94,7 +94,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element { { - return 'Top Endpoints'; + return 'Key Operations'; }} tableLayout="fixed" dataSource={data} @@ -104,7 +104,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element { ); } -interface TopEndpointListItem { +interface TopOperationListItem { p50: number; p95: number; p99: number; @@ -112,10 +112,10 @@ interface TopEndpointListItem { name: string; } -type DataProps = TopEndpointListItem; +type DataProps = TopOperationListItem; -interface TopEndpointsTableProps { - data: TopEndpointListItem[]; +interface TopOperationsTableProps { + data: TopOperationListItem[]; } -export default TopEndpointsTable; +export default TopOperationsTable; diff --git a/frontend/src/container/MetricsTable/index.tsx b/frontend/src/container/MetricsTable/index.tsx index cc0778c80e..e81a7badfc 100644 --- a/frontend/src/container/MetricsTable/index.tsx +++ b/frontend/src/container/MetricsTable/index.tsx @@ -56,14 +56,14 @@ function Metrics(): JSX.Element { render: (value: number): string => (value / 1000000).toFixed(2), }, { - title: 'Error Rate (% of requests)', + title: 'Error Rate (% of total)', dataIndex: 'errorRate', key: 'errorRate', sorter: (a: DataProps, b: DataProps): number => a.errorRate - b.errorRate, render: (value: number): string => value.toFixed(2), }, { - title: 'Requests Per Second', + title: 'Operations Per Second', dataIndex: 'callRate', key: 'callRate', sorter: (a: DataProps, b: DataProps): number => a.callRate - b.callRate, diff --git a/frontend/src/store/actions/metrics/getInitialData.ts b/frontend/src/store/actions/metrics/getInitialData.ts index f994a35c94..0f607f6ea5 100644 --- a/frontend/src/store/actions/metrics/getInitialData.ts +++ b/frontend/src/store/actions/metrics/getInitialData.ts @@ -3,7 +3,8 @@ // import getExternalError from 'api/metrics/getExternalError'; // import getExternalService from 'api/metrics/getExternalService'; import getServiceOverview from 'api/metrics/getServiceOverview'; -import getTopEndPoints from 'api/metrics/getTopEndPoints'; +import getTopLevelOperations from 'api/metrics/getTopLevelOperations'; +import getTopOperations from 'api/metrics/getTopOperations'; import { AxiosError } from 'axios'; import GetMinMax from 'lib/getMinMax'; import getStep from 'lib/getStep'; @@ -46,7 +47,8 @@ export const GetInitialData = ( // getExternalErrorResponse, // getExternalServiceResponse, getServiceOverviewResponse, - getTopEndPointsResponse, + getTopOperationsResponse, + getTopLevelOperationsResponse, ] = await Promise.all([ // getDBOverView({ // ...props, @@ -67,12 +69,15 @@ export const GetInitialData = ( step: getStep({ start: minTime, end: maxTime, inputFormat: 'ns' }), selectedTags: props.selectedTags, }), - getTopEndPoints({ + getTopOperations({ end: maxTime, service: props.serviceName, start: minTime, selectedTags: props.selectedTags, }), + getTopLevelOperations({ + service: props.serviceName, + }), ]); if ( @@ -81,7 +86,8 @@ export const GetInitialData = ( // getExternalErrorResponse.statusCode === 200 && // getExternalServiceResponse.statusCode === 200 && getServiceOverviewResponse.statusCode === 200 && - getTopEndPointsResponse.statusCode === 200 + getTopOperationsResponse.statusCode === 200 && + getTopLevelOperationsResponse.statusCode === 200 ) { dispatch({ type: 'GET_INTIAL_APPLICATION_DATA', @@ -91,7 +97,8 @@ export const GetInitialData = ( // externalError: getExternalErrorResponse.payload, // externalService: getExternalServiceResponse.payload, serviceOverview: getServiceOverviewResponse.payload, - topEndPoints: getTopEndPointsResponse.payload, + topOperations: getTopOperationsResponse.payload, + topLevelOperations: getTopLevelOperationsResponse.payload, }, }); } else { @@ -99,8 +106,9 @@ export const GetInitialData = ( type: 'GET_INITIAL_APPLICATION_ERROR', payload: { errorMessage: - getTopEndPointsResponse.error || + getTopOperationsResponse.error || getServiceOverviewResponse.error || + getTopLevelOperationsResponse.error || // getExternalServiceResponse.error || // getExternalErrorResponse.error || // getExternalAverageDurationResponse.error || diff --git a/frontend/src/store/reducers/metric.ts b/frontend/src/store/reducers/metric.ts index 72b24a6b5b..2cb316d2c1 100644 --- a/frontend/src/store/reducers/metric.ts +++ b/frontend/src/store/reducers/metric.ts @@ -21,7 +21,7 @@ const InitialValue: InitialValueTypes = { services: [], dbOverView: [], externalService: [], - topEndPoints: [], + topOperations: [], externalAverageDuration: [], externalError: [], serviceOverview: [], @@ -29,6 +29,7 @@ const InitialValue: InitialValueTypes = { resourceAttributePromQLQuery: resourceAttributesQueryToPromQL( GetResourceAttributeQueriesFromURL() || [], ), + topLevelOperations: [], }; const metrics = ( @@ -88,22 +89,24 @@ const metrics = ( case GET_INTIAL_APPLICATION_DATA: { const { // dbOverView, - topEndPoints, + topOperations, serviceOverview, // externalService, // externalAverageDuration, // externalError, + topLevelOperations, } = action.payload; return { ...state, // dbOverView, - topEndPoints, + topOperations, serviceOverview, // externalService, // externalAverageDuration, // externalError, metricsApplicationLoading: false, + topLevelOperations, }; } diff --git a/frontend/src/types/actions/metrics.ts b/frontend/src/types/actions/metrics.ts index 382e56b560..bc48f0929f 100644 --- a/frontend/src/types/actions/metrics.ts +++ b/frontend/src/types/actions/metrics.ts @@ -5,7 +5,7 @@ import { IResourceAttributeQuery } from 'container/MetricsApplication/ResourceAttributesFilter/types'; import { ServicesList } from 'types/api/metrics/getService'; import { ServiceOverview } from 'types/api/metrics/getServiceOverview'; -import { TopEndPoints } from 'types/api/metrics/getTopEndPoints'; +import { TopOperations } from 'types/api/metrics/getTopOperations'; export const GET_SERVICE_LIST_SUCCESS = 'GET_SERVICE_LIST_SUCCESS'; export const GET_SERVICE_LIST_LOADING_START = 'GET_SERVICE_LIST_LOADING_START'; @@ -38,12 +38,13 @@ export interface GetServiceListError { export interface GetInitialApplicationData { type: typeof GET_INTIAL_APPLICATION_DATA; payload: { - topEndPoints: TopEndPoints[]; + topOperations: TopOperations[]; // dbOverView: DBOverView[]; // externalService: ExternalService[]; // externalAverageDuration: ExternalAverageDuration[]; // externalError: ExternalError[]; serviceOverview: ServiceOverview[]; + topLevelOperations: string[]; }; } diff --git a/frontend/src/types/api/metrics/getTopLevelOperations.ts b/frontend/src/types/api/metrics/getTopLevelOperations.ts new file mode 100644 index 0000000000..c4e88aed08 --- /dev/null +++ b/frontend/src/types/api/metrics/getTopLevelOperations.ts @@ -0,0 +1,7 @@ +export type TopLevelOperations = string[]; + +export interface Props { + service: string; +} + +export type PayloadProps = TopLevelOperations; diff --git a/frontend/src/types/api/metrics/getTopEndPoints.ts b/frontend/src/types/api/metrics/getTopOperations.ts similarity index 74% rename from frontend/src/types/api/metrics/getTopEndPoints.ts rename to frontend/src/types/api/metrics/getTopOperations.ts index c86d5fd115..f30c01251f 100644 --- a/frontend/src/types/api/metrics/getTopEndPoints.ts +++ b/frontend/src/types/api/metrics/getTopOperations.ts @@ -1,6 +1,6 @@ import { Tags } from 'types/reducer/trace'; -export interface TopEndPoints { +export interface TopOperations { name: string; numCalls: number; p50: number; @@ -15,4 +15,4 @@ export interface Props { selectedTags: Tags[]; } -export type PayloadProps = TopEndPoints[]; +export type PayloadProps = TopOperations[]; diff --git a/frontend/src/types/reducer/metrics.ts b/frontend/src/types/reducer/metrics.ts index d5b500f109..7903b2c21a 100644 --- a/frontend/src/types/reducer/metrics.ts +++ b/frontend/src/types/reducer/metrics.ts @@ -5,7 +5,7 @@ import { ExternalError } from 'types/api/metrics/getExternalError'; import { ExternalService } from 'types/api/metrics/getExternalService'; import { ServicesList } from 'types/api/metrics/getService'; import { ServiceOverview } from 'types/api/metrics/getServiceOverview'; -import { TopEndPoints } from 'types/api/metrics/getTopEndPoints'; +import { TopOperations } from 'types/api/metrics/getTopOperations'; interface MetricReducer { services: ServicesList[]; @@ -15,12 +15,13 @@ interface MetricReducer { errorMessage: string; dbOverView: DBOverView[]; externalService: ExternalService[]; - topEndPoints: TopEndPoints[]; + topOperations: TopOperations[]; externalAverageDuration: ExternalAverageDuration[]; externalError: ExternalError[]; serviceOverview: ServiceOverview[]; resourceAttributeQueries: IResourceAttributeQuery[]; resourceAttributePromQLQuery: string; + topLevelOperations: string[]; } export default MetricReducer; diff --git a/pkg/query-service/app/clickhouseReader/options.go b/pkg/query-service/app/clickhouseReader/options.go index 99fe5080ae..2e4471fec3 100644 --- a/pkg/query-service/app/clickhouseReader/options.go +++ b/pkg/query-service/app/clickhouseReader/options.go @@ -18,16 +18,17 @@ const ( ) const ( - defaultDatasource string = "tcp://localhost:9000" - defaultTraceDB string = "signoz_traces" - defaultOperationsTable string = "signoz_operations" - defaultIndexTable string = "signoz_index_v2" - defaultErrorTable string = "signoz_error_index_v2" - defaulDurationTable string = "durationSortMV" - defaultSpansTable string = "signoz_spans" - defaultWriteBatchDelay time.Duration = 5 * time.Second - defaultWriteBatchSize int = 10000 - defaultEncoding Encoding = EncodingJSON + defaultDatasource string = "tcp://localhost:9000" + defaultTraceDB string = "signoz_traces" + defaultOperationsTable string = "signoz_operations" + defaultIndexTable string = "signoz_index_v2" + defaultErrorTable string = "signoz_error_index_v2" + defaultDurationTable string = "durationSortMV" + defaultSpansTable string = "signoz_spans" + defaultTopLevelOperationsTable string = "top_level_operations" + defaultWriteBatchDelay time.Duration = 5 * time.Second + defaultWriteBatchSize int = 10000 + defaultEncoding Encoding = EncodingJSON ) const ( @@ -43,19 +44,20 @@ const ( // NamespaceConfig is Clickhouse's internal configuration data type namespaceConfig struct { - namespace string - Enabled bool - Datasource string - TraceDB string - OperationsTable string - IndexTable string - DurationTable string - SpansTable string - ErrorTable string - WriteBatchDelay time.Duration - WriteBatchSize int - Encoding Encoding - Connector Connector + namespace string + Enabled bool + Datasource string + TraceDB string + OperationsTable string + IndexTable string + DurationTable string + SpansTable string + ErrorTable string + TopLevelOperationsTable string + WriteBatchDelay time.Duration + WriteBatchSize int + Encoding Encoding + Connector Connector } // Connecto defines how to connect to the database @@ -102,19 +104,20 @@ func NewOptions(datasource string, primaryNamespace string, otherNamespaces ...s options := &Options{ primary: &namespaceConfig{ - namespace: primaryNamespace, - Enabled: true, - Datasource: datasource, - TraceDB: defaultTraceDB, - OperationsTable: defaultOperationsTable, - IndexTable: defaultIndexTable, - ErrorTable: defaultErrorTable, - DurationTable: defaulDurationTable, - SpansTable: defaultSpansTable, - WriteBatchDelay: defaultWriteBatchDelay, - WriteBatchSize: defaultWriteBatchSize, - Encoding: defaultEncoding, - Connector: defaultConnector, + namespace: primaryNamespace, + Enabled: true, + Datasource: datasource, + TraceDB: defaultTraceDB, + OperationsTable: defaultOperationsTable, + IndexTable: defaultIndexTable, + ErrorTable: defaultErrorTable, + DurationTable: defaultDurationTable, + SpansTable: defaultSpansTable, + TopLevelOperationsTable: defaultTopLevelOperationsTable, + WriteBatchDelay: defaultWriteBatchDelay, + WriteBatchSize: defaultWriteBatchSize, + Encoding: defaultEncoding, + Connector: defaultConnector, }, others: make(map[string]*namespaceConfig, len(otherNamespaces)), } diff --git a/pkg/query-service/app/clickhouseReader/reader.go b/pkg/query-service/app/clickhouseReader/reader.go index a1f86c7413..da0d0fdb27 100644 --- a/pkg/query-service/app/clickhouseReader/reader.go +++ b/pkg/query-service/app/clickhouseReader/reader.go @@ -75,16 +75,17 @@ var ( // SpanWriter for reading spans from ClickHouse type ClickHouseReader struct { - db clickhouse.Conn - localDB *sqlx.DB - traceDB string - operationsTable string - durationTable string - indexTable string - errorTable string - spansTable string - queryEngine *promql.Engine - remoteStorage *remote.Storage + db clickhouse.Conn + localDB *sqlx.DB + traceDB string + operationsTable string + durationTable string + indexTable string + errorTable string + spansTable string + topLevelOperationsTable string + queryEngine *promql.Engine + remoteStorage *remote.Storage promConfigFile string promConfig *config.Config @@ -111,16 +112,17 @@ func NewReader(localDB *sqlx.DB, configFile string) *ClickHouseReader { } return &ClickHouseReader{ - db: db, - localDB: localDB, - traceDB: options.primary.TraceDB, - alertManager: alertManager, - operationsTable: options.primary.OperationsTable, - indexTable: options.primary.IndexTable, - errorTable: options.primary.ErrorTable, - durationTable: options.primary.DurationTable, - spansTable: options.primary.SpansTable, - promConfigFile: configFile, + db: db, + localDB: localDB, + traceDB: options.primary.TraceDB, + alertManager: alertManager, + operationsTable: options.primary.OperationsTable, + indexTable: options.primary.IndexTable, + errorTable: options.primary.ErrorTable, + durationTable: options.primary.DurationTable, + spansTable: options.primary.SpansTable, + topLevelOperationsTable: options.primary.TopLevelOperationsTable, + promConfigFile: configFile, } } @@ -657,103 +659,153 @@ func (r *ClickHouseReader) GetServicesList(ctx context.Context) (*[]string, erro return &services, nil } +func (r *ClickHouseReader) GetTopLevelOperations(ctx context.Context) (*map[string][]string, *model.ApiError) { + + operations := map[string][]string{} + query := fmt.Sprintf(`SELECT DISTINCT name, serviceName FROM %s.%s`, r.traceDB, r.topLevelOperationsTable) + + rows, err := r.db.Query(ctx, query) + + if err != nil { + zap.S().Error("Error in processing sql query: ", err) + return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} + } + + defer rows.Close() + for rows.Next() { + var name, serviceName string + if err := rows.Scan(&name, &serviceName); err != nil { + return nil, &model.ApiError{Typ: model.ErrorInternal, Err: fmt.Errorf("Error in reading data")} + } + if _, ok := operations[serviceName]; !ok { + operations[serviceName] = []string{} + } + operations[serviceName] = append(operations[serviceName], name) + } + return &operations, nil +} + func (r *ClickHouseReader) GetServices(ctx context.Context, queryParams *model.GetServicesParams) (*[]model.ServiceItem, *model.ApiError) { if r.indexTable == "" { return nil, &model.ApiError{Typ: model.ErrorExec, Err: ErrNoIndexTable} } + topLevelOps, apiErr := r.GetTopLevelOperations(ctx) + if apiErr != nil { + return nil, apiErr + } + serviceItems := []model.ServiceItem{} + var wg sync.WaitGroup + // limit the number of concurrent queries to not overload the clickhouse server + sem := make(chan struct{}, 10) + var mtx sync.RWMutex - query := fmt.Sprintf("SELECT serviceName, quantile(0.99)(durationNano) as p99, avg(durationNano) as avgDuration, count(*) as numCalls FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2'", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10)) - args := []interface{}{} - args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) - if errStatus != nil { - return nil, errStatus + for svc, ops := range *topLevelOps { + sem <- struct{}{} + wg.Add(1) + go func(svc string, ops []string) { + defer wg.Done() + defer func() { <-sem }() + var serviceItem model.ServiceItem + var numErrors uint64 + query := fmt.Sprintf( + `SELECT + quantile(0.99)(durationNano) as p99, + avg(durationNano) as avgDuration, + count(*) as numCalls + FROM %s.%s + WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end`, + r.traceDB, r.indexTable, + ) + errorQuery := fmt.Sprintf( + `SELECT + count(*) as numErrors + FROM %s.%s + WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end AND statusCode=2`, + r.traceDB, r.indexTable, + ) + + args := []interface{}{} + args = append(args, + clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), + clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)), + clickhouse.Named("serviceName", svc), + clickhouse.Named("names", ops), + ) + args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) + if errStatus != nil { + zap.S().Error("Error in processing sql query: ", errStatus) + return + } + err := r.db.QueryRow( + ctx, + query, + args..., + ).ScanStruct(&serviceItem) + + if err != nil { + zap.S().Error("Error in processing sql query: ", err) + return + } + + err = r.db.QueryRow(ctx, errorQuery, args...).Scan(&numErrors) + if err != nil { + zap.S().Error("Error in processing sql query: ", err) + return + } + + serviceItem.ServiceName = svc + serviceItem.NumErrors = numErrors + mtx.Lock() + serviceItems = append(serviceItems, serviceItem) + mtx.Unlock() + }(svc, ops) } - query += " GROUP BY serviceName ORDER BY p99 DESC" - err := r.db.Select(ctx, &serviceItems, query, args...) + wg.Wait() - zap.S().Info(query) - - if err != nil { - zap.S().Debug("Error in processing sql query: ", err) - return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} + for idx := range serviceItems { + serviceItems[idx].CallRate = float64(serviceItems[idx].NumCalls) / float64(queryParams.Period) + serviceItems[idx].ErrorRate = float64(serviceItems[idx].NumErrors) * 100 / float64(serviceItems[idx].NumCalls) } - - ////////////////// Below block gets 5xx of services - serviceErrorItems := []model.ServiceItem{} - - query = fmt.Sprintf("SELECT serviceName, count(*) as numErrors FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND (statusCode>=500 OR statusCode=2)", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10)) - args = []interface{}{} - args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) - if errStatus != nil { - return nil, errStatus - } - query += " GROUP BY serviceName" - err = r.db.Select(ctx, &serviceErrorItems, query, args...) - - zap.S().Info(query) - - if err != nil { - zap.S().Debug("Error in processing sql query: ", err) - return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} - } - - m5xx := make(map[string]uint64) - - for j := range serviceErrorItems { - m5xx[serviceErrorItems[j].ServiceName] = serviceErrorItems[j].NumErrors - } - /////////////////////////////////////////// - - ////////////////// Below block gets 4xx of services - - service4xxItems := []model.ServiceItem{} - - query = fmt.Sprintf("SELECT serviceName, count(*) as num4xx FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND statusCode>=400 AND statusCode<500", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10)) - args = []interface{}{} - args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) - if errStatus != nil { - return nil, errStatus - } - query += " GROUP BY serviceName" - err = r.db.Select(ctx, &service4xxItems, query, args...) - - zap.S().Info(query) - - if err != nil { - zap.S().Debug("Error in processing sql query: ", err) - return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} - } - - m4xx := make(map[string]uint64) - - for j := range service4xxItems { - m4xx[service4xxItems[j].ServiceName] = service4xxItems[j].Num4XX - } - - for i := range serviceItems { - if val, ok := m5xx[serviceItems[i].ServiceName]; ok { - serviceItems[i].NumErrors = val - } - if val, ok := m4xx[serviceItems[i].ServiceName]; ok { - serviceItems[i].Num4XX = val - } - serviceItems[i].CallRate = float64(serviceItems[i].NumCalls) / float64(queryParams.Period) - serviceItems[i].FourXXRate = float64(serviceItems[i].Num4XX) * 100 / float64(serviceItems[i].NumCalls) - serviceItems[i].ErrorRate = float64(serviceItems[i].NumErrors) * 100 / float64(serviceItems[i].NumCalls) - } - return &serviceItems, nil } func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams *model.GetServiceOverviewParams) (*[]model.ServiceOverviewItem, *model.ApiError) { + topLevelOps, apiErr := r.GetTopLevelOperations(ctx) + if apiErr != nil { + return nil, apiErr + } + ops, ok := (*topLevelOps)[queryParams.ServiceName] + if !ok { + return nil, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("Service not found")} + } + + namedArgs := []interface{}{ + clickhouse.Named("interval", strconv.Itoa(int(queryParams.StepSeconds/60))), + clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), + clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)), + clickhouse.Named("serviceName", queryParams.ServiceName), + clickhouse.Named("names", ops), + } + serviceOverviewItems := []model.ServiceOverviewItem{} - query := fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %s minute) as time, quantile(0.99)(durationNano) as p99, quantile(0.95)(durationNano) as p95,quantile(0.50)(durationNano) as p50, count(*) as numCalls FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND serviceName='%s'", strconv.Itoa(int(queryParams.StepSeconds/60)), r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName) + query := fmt.Sprintf(` + SELECT + toStartOfInterval(timestamp, INTERVAL @interval minute) as time, + quantile(0.99)(durationNano) as p99, + quantile(0.95)(durationNano) as p95, + quantile(0.50)(durationNano) as p50, + count(*) as numCalls + FROM %s.%s + WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end`, + r.traceDB, r.indexTable, + ) args := []interface{}{} + args = append(args, namedArgs...) args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) if errStatus != nil { return nil, errStatus @@ -761,17 +813,25 @@ func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams * query += " GROUP BY time ORDER BY time DESC" err := r.db.Select(ctx, &serviceOverviewItems, query, args...) - zap.S().Info(query) + zap.S().Debug(query) if err != nil { - zap.S().Debug("Error in processing sql query: ", err) + zap.S().Error("Error in processing sql query: ", err) return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} } serviceErrorItems := []model.ServiceErrorItem{} - query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %s minute) as time, count(*) as numErrors FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND serviceName='%s' AND hasError=true", strconv.Itoa(int(queryParams.StepSeconds/60)), r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName) + query = fmt.Sprintf(` + SELECT + toStartOfInterval(timestamp, INTERVAL @interval minute) as time, + count(*) as numErrors + FROM %s.%s + WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end AND statusCode=2`, + r.traceDB, r.indexTable, + ) args = []interface{}{} + args = append(args, namedArgs...) args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) if errStatus != nil { return nil, errStatus @@ -779,10 +839,10 @@ func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams * query += " GROUP BY time ORDER BY time DESC" err = r.db.Select(ctx, &serviceErrorItems, query, args...) - zap.S().Info(query) + zap.S().Debug(query) if err != nil { - zap.S().Debug("Error in processing sql query: ", err) + zap.S().Error("Error in processing sql query: ", err) return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} } @@ -1523,31 +1583,48 @@ func (r *ClickHouseReader) GetTagValues(ctx context.Context, queryParams *model. return &cleanedTagValues, nil } -func (r *ClickHouseReader) GetTopEndpoints(ctx context.Context, queryParams *model.GetTopEndpointsParams) (*[]model.TopEndpointsItem, *model.ApiError) { +func (r *ClickHouseReader) GetTopOperations(ctx context.Context, queryParams *model.GetTopOperationsParams) (*[]model.TopOperationsItem, *model.ApiError) { - var topEndpointsItems []model.TopEndpointsItem + namedArgs := []interface{}{ + clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), + clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)), + clickhouse.Named("serviceName", queryParams.ServiceName), + } - query := fmt.Sprintf("SELECT quantile(0.5)(durationNano) as p50, quantile(0.95)(durationNano) as p95, quantile(0.99)(durationNano) as p99, COUNT(1) as numCalls, name FROM %s.%s WHERE timestamp >= '%s' AND timestamp <= '%s' AND kind='2' and serviceName='%s'", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName) + var topOperationsItems []model.TopOperationsItem + + query := fmt.Sprintf(` + SELECT + quantile(0.5)(durationNano) as p50, + quantile(0.95)(durationNano) as p95, + quantile(0.99)(durationNano) as p99, + COUNT(*) as numCalls, + name + FROM %s.%s + WHERE serviceName = @serviceName AND timestamp>= @start AND timestamp<= @end`, + r.traceDB, r.indexTable, + ) args := []interface{}{} + args = append(args, namedArgs...) args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) if errStatus != nil { return nil, errStatus } - query += " GROUP BY name" - err := r.db.Select(ctx, &topEndpointsItems, query, args...) + query += " GROUP BY name ORDER BY p99 DESC LIMIT 10" + err := r.db.Select(ctx, &topOperationsItems, query, args...) - zap.S().Info(query) + zap.S().Debug(query) if err != nil { - zap.S().Debug("Error in processing sql query: ", err) + zap.S().Error("Error in processing sql query: ", err) return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} } - if topEndpointsItems == nil { - topEndpointsItems = []model.TopEndpointsItem{} + if topOperationsItems == nil { + topOperationsItems = []model.TopOperationsItem{} } - return &topEndpointsItems, nil + return &topOperationsItems, nil } func (r *ClickHouseReader) GetUsage(ctx context.Context, queryParams *model.GetUsageParams) (*[]model.UsageItem, error) { diff --git a/pkg/query-service/app/http_handler.go b/pkg/query-service/app/http_handler.go index c01d504e4a..efb5316b87 100644 --- a/pkg/query-service/app/http_handler.go +++ b/pkg/query-service/app/http_handler.go @@ -322,7 +322,8 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router) { router.HandleFunc("/api/v1/services", ViewAccess(aH.getServices)).Methods(http.MethodPost) router.HandleFunc("/api/v1/services/list", aH.getServicesList).Methods(http.MethodGet) router.HandleFunc("/api/v1/service/overview", ViewAccess(aH.getServiceOverview)).Methods(http.MethodPost) - router.HandleFunc("/api/v1/service/top_endpoints", ViewAccess(aH.getTopEndpoints)).Methods(http.MethodPost) + router.HandleFunc("/api/v1/service/top_operations", ViewAccess(aH.getTopOperations)).Methods(http.MethodPost) + router.HandleFunc("/api/v1/service/top_level_operations", ViewAccess(aH.getServicesTopLevelOps)).Methods(http.MethodPost) router.HandleFunc("/api/v1/traces/{traceId}", ViewAccess(aH.searchTraces)).Methods(http.MethodGet) router.HandleFunc("/api/v1/usage", ViewAccess(aH.getUsage)).Methods(http.MethodGet) router.HandleFunc("/api/v1/serviceMapDependencies", ViewAccess(aH.serviceMapDependencies)).Methods(http.MethodPost) @@ -1103,14 +1104,14 @@ func (aH *APIHandler) submitFeedback(w http.ResponseWriter, r *http.Request) { } -func (aH *APIHandler) getTopEndpoints(w http.ResponseWriter, r *http.Request) { +func (aH *APIHandler) getTopOperations(w http.ResponseWriter, r *http.Request) { - query, err := parseGetTopEndpointsRequest(r) + query, err := parseGetTopOperationsRequest(r) if aH.handleError(w, err, http.StatusBadRequest) { return } - result, apiErr := (*aH.reader).GetTopEndpoints(r.Context(), query) + result, apiErr := (*aH.reader).GetTopOperations(r.Context(), query) if apiErr != nil && aH.handleError(w, apiErr.Err, http.StatusInternalServerError) { return @@ -1152,6 +1153,17 @@ func (aH *APIHandler) getServiceOverview(w http.ResponseWriter, r *http.Request) } +func (aH *APIHandler) getServicesTopLevelOps(w http.ResponseWriter, r *http.Request) { + + result, apiErr := (*aH.reader).GetTopLevelOperations(r.Context()) + if apiErr != nil { + respondError(w, apiErr, nil) + return + } + + aH.writeJSON(w, r, result) +} + func (aH *APIHandler) getServices(w http.ResponseWriter, r *http.Request) { query, err := parseGetServicesRequest(r) diff --git a/pkg/query-service/app/parser.go b/pkg/query-service/app/parser.go index e81b986a3d..6991a03156 100644 --- a/pkg/query-service/app/parser.go +++ b/pkg/query-service/app/parser.go @@ -32,8 +32,8 @@ func parseUser(r *http.Request) (*model.User, error) { return &user, nil } -func parseGetTopEndpointsRequest(r *http.Request) (*model.GetTopEndpointsParams, error) { - var postData *model.GetTopEndpointsParams +func parseGetTopOperationsRequest(r *http.Request) (*model.GetTopOperationsParams, error) { + var postData *model.GetTopOperationsParams err := json.NewDecoder(r.Body).Decode(&postData) if err != nil { @@ -467,8 +467,8 @@ func parseCountErrorsRequest(r *http.Request) (*model.CountErrorsParams, error) } params := &model.CountErrorsParams{ - Start: startTime, - End: endTime, + Start: startTime, + End: endTime, } return params, nil diff --git a/pkg/query-service/config/prometheus.yml b/pkg/query-service/config/prometheus.yml index c515a46662..d7c0ce6911 100644 --- a/pkg/query-service/config/prometheus.yml +++ b/pkg/query-service/config/prometheus.yml @@ -23,4 +23,4 @@ scrape_configs: remote_read: - - url: tcp://localhost:9001/?database=signoz_metrics + - url: tcp://localhost:9000/?database=signoz_metrics diff --git a/pkg/query-service/interfaces/interface.go b/pkg/query-service/interfaces/interface.go index 5e9d01be8b..76830c67d8 100644 --- a/pkg/query-service/interfaces/interface.go +++ b/pkg/query-service/interfaces/interface.go @@ -20,8 +20,9 @@ type Reader interface { GetInstantQueryMetricsResult(ctx context.Context, query *model.InstantQueryMetricsParams) (*promql.Result, *stats.QueryStats, *model.ApiError) GetQueryRangeResult(ctx context.Context, query *model.QueryRangeParams) (*promql.Result, *stats.QueryStats, *model.ApiError) GetServiceOverview(ctx context.Context, query *model.GetServiceOverviewParams) (*[]model.ServiceOverviewItem, *model.ApiError) + GetTopLevelOperations(ctx context.Context) (*map[string][]string, *model.ApiError) GetServices(ctx context.Context, query *model.GetServicesParams) (*[]model.ServiceItem, *model.ApiError) - GetTopEndpoints(ctx context.Context, query *model.GetTopEndpointsParams) (*[]model.TopEndpointsItem, *model.ApiError) + GetTopOperations(ctx context.Context, query *model.GetTopOperationsParams) (*[]model.TopOperationsItem, *model.ApiError) GetUsage(ctx context.Context, query *model.GetUsageParams) (*[]model.UsageItem, error) GetServicesList(ctx context.Context) (*[]string, error) GetServiceMapDependencies(ctx context.Context, query *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error) diff --git a/pkg/query-service/model/queryParams.go b/pkg/query-service/model/queryParams.go index 2b964597ab..a215fb8d9b 100644 --- a/pkg/query-service/model/queryParams.go +++ b/pkg/query-service/model/queryParams.go @@ -135,7 +135,7 @@ type MetricAutocompleteTagParams struct { TagKey string } -type GetTopEndpointsParams struct { +type GetTopOperationsParams struct { StartTime string `json:"start"` EndTime string `json:"end"` ServiceName string `json:"service"` diff --git a/pkg/query-service/model/response.go b/pkg/query-service/model/response.go index 0bdaf02dc7..9bb35d1d55 100644 --- a/pkg/query-service/model/response.go +++ b/pkg/query-service/model/response.go @@ -3,6 +3,7 @@ package model import ( "encoding/json" "fmt" + "math" "strconv" "time" @@ -217,7 +218,7 @@ type UsageItem struct { Count uint64 `json:"count" ch:"count"` } -type TopEndpointsItem struct { +type TopOperationsItem struct { Percentile50 float64 `json:"p50" ch:"p50"` Percentile95 float64 `json:"p95" ch:"p95"` Percentile99 float64 `json:"p99" ch:"p99"` @@ -403,3 +404,30 @@ func (p *MetricPoint) MarshalJSON() ([]byte, error) { v := strconv.FormatFloat(p.Value, 'f', -1, 64) return json.Marshal([...]interface{}{float64(p.Timestamp) / 1000, v}) } + +// MarshalJSON implements json.Marshaler. +func (s *ServiceItem) MarshalJSON() ([]byte, error) { + // If a service didn't not send any data in the last interval duration + // it's values such as 99th percentile will return as NaN and + // json encoding doesn't support NaN + // We still want to show it in the UI, so we'll replace NaN with 0 + type Alias ServiceItem + if math.IsInf(s.AvgDuration, 0) || math.IsNaN(s.AvgDuration) { + s.AvgDuration = 0 + } + if math.IsInf(s.CallRate, 0) || math.IsNaN(s.CallRate) { + s.CallRate = 0 + } + if math.IsInf(s.ErrorRate, 0) || math.IsNaN(s.ErrorRate) { + s.ErrorRate = 0 + } + if math.IsInf(s.Percentile99, 0) || math.IsNaN(s.Percentile99) { + s.Percentile99 = 0 + } + + return json.Marshal(&struct { + *Alias + }{ + Alias: (*Alias)(s), + }) +}