feat: show messaging/cron/browser services in listing page (#1455)

* feat: show messaging/cron/browser services in listing page

* chore: issue maximum of ten queries to clickhouse

Co-authored-by: Palash Gupta <palashgdev@gmail.com>
This commit is contained in:
Srikanth Chekuri 2022-08-04 11:57:05 +05:30 committed by GitHub
parent 8146da52af
commit 5bfc2af51b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 372 additions and 205 deletions

View File

@ -0,0 +1,24 @@
import axios from 'api';
import { ErrorResponseHandler } from 'api/ErrorResponseHandler';
import { AxiosError } from 'axios';
import { ErrorResponse, SuccessResponse } from 'types/api';
import { PayloadProps, Props } from 'types/api/metrics/getTopLevelOperations';
const getTopLevelOperations = async (
props: Props,
): Promise<SuccessResponse<PayloadProps> | ErrorResponse> => {
try {
const response = await axios.post(`/service/top_level_operations`);
return {
statusCode: 200,
error: null,
message: response.data.status,
payload: response.data[props.service],
};
} catch (error) {
return ErrorResponseHandler(error as AxiosError);
}
};
export default getTopLevelOperations;

View File

@ -2,13 +2,13 @@ import axios from 'api';
import { ErrorResponseHandler } from 'api/ErrorResponseHandler';
import { AxiosError } from 'axios';
import { ErrorResponse, SuccessResponse } from 'types/api';
import { PayloadProps, Props } from 'types/api/metrics/getTopEndPoints';
import { PayloadProps, Props } from 'types/api/metrics/getTopOperations';
const getTopEndPoints = async (
const getTopOperations = async (
props: Props,
): Promise<SuccessResponse<PayloadProps> | ErrorResponse> => {
try {
const response = await axios.post(`/service/top_endpoints`, {
const response = await axios.post(`/service/top_operations`, {
start: `${props.start}`,
end: `${props.end}`,
service: props.service,
@ -26,4 +26,4 @@ const getTopEndPoints = async (
}
};
export default getTopEndPoints;
export default getTopOperations;

View File

@ -15,7 +15,7 @@ import { PromQLWidgets } from 'types/api/dashboard/getAll';
import MetricReducer from 'types/reducer/metrics';
import { Card, Col, GraphContainer, GraphTitle, Row } from '../styles';
import TopEndpointsTable from '../TopEndpointsTable';
import TopOperationsTable from '../TopOperationsTable';
import { Button } from './styles';
function Application({ getWidget }: DashboardProps): JSX.Element {
@ -23,11 +23,13 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
const selectedTimeStamp = useRef(0);
const {
topEndPoints,
topOperations,
serviceOverview,
resourceAttributePromQLQuery,
resourceAttributeQueries,
topLevelOperations,
} = useSelector<AppState, MetricReducer>((state) => state.metrics);
const operationsRegex = topLevelOperations.join('|');
const selectedTraceTags: string = JSON.stringify(
convertRawQueriesToTraceSelectedTags(resourceAttributeQueries, 'array') || [],
@ -107,7 +109,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
<Button
type="default"
size="small"
id="Application_button"
id="Service_button"
onClick={(): void => {
onTracePopupClick(selectedTimeStamp.current);
}}
@ -115,13 +117,13 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
View Traces
</Button>
<Card>
<GraphTitle>Application latency</GraphTitle>
<GraphTitle>Latency</GraphTitle>
<GraphContainer>
<Graph
onClickHandler={(ChartEvent, activeElements, chart, data): void => {
onClickHandler(ChartEvent, activeElements, chart, data, 'Application');
onClickHandler(ChartEvent, activeElements, chart, data, 'Service');
}}
name="application_latency"
name="service_latency"
type="line"
data={{
datasets: [
@ -175,7 +177,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
<Button
type="default"
size="small"
id="Request_button"
id="Rate_button"
onClick={(): void => {
onTracePopupClick(selectedTimeStamp.current);
}}
@ -183,21 +185,21 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
View Traces
</Button>
<Card>
<GraphTitle>Requests</GraphTitle>
<GraphTitle>Rate (ops/s)</GraphTitle>
<GraphContainer>
<FullView
name="request_per_sec"
name="operations_per_sec"
fullViewOptions={false}
onClickHandler={(event, element, chart, data): void => {
onClickHandler(event, element, chart, data, 'Request');
onClickHandler(event, element, chart, data, 'Rate');
}}
widget={getWidget([
{
query: `sum(rate(signoz_latency_count{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[5m]))`,
legend: 'Requests',
query: `sum(rate(signoz_latency_count{service_name="${servicename}", operation=~"${operationsRegex}"${resourceAttributePromQLQuery}}[5m]))`,
legend: 'Operations',
},
])}
yAxisUnit="reqps"
yAxisUnit="ops"
/>
</GraphContainer>
</Card>
@ -227,7 +229,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
}}
widget={getWidget([
{
query: `max(sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[5m]) OR rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", http_status_code=~"5.."${resourceAttributePromQLQuery}}[5m]))*100/sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[5m]))) < 1000 OR vector(0)`,
query: `max(sum(rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[5m]) OR rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}", http_status_code=~"5.."${resourceAttributePromQLQuery}}[5m]))*100/sum(rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}"${resourceAttributePromQLQuery}}[5m]))) < 1000 OR vector(0)`,
legend: 'Error Percentage',
},
])}
@ -239,7 +241,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
<Col span={12}>
<Card>
<TopEndpointsTable data={topEndPoints} />
<TopOperationsTable data={topOperations} />
</Card>
</Col>
</Row>

View File

@ -11,7 +11,7 @@ import { AppState } from 'store/reducers';
import { GlobalReducer } from 'types/reducer/globalTime';
import MetricReducer from 'types/reducer/metrics';
function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element {
function TopOperationsTable(props: TopOperationsTableProps): JSX.Element {
const { minTime, maxTime } = useSelector<AppState, GlobalReducer>(
(state) => state.globalTime,
);
@ -85,7 +85,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element {
title: 'Number of Calls',
dataIndex: 'numCalls',
key: 'numCalls',
sorter: (a: TopEndpointListItem, b: TopEndpointListItem): number =>
sorter: (a: TopOperationListItem, b: TopOperationListItem): number =>
a.numCalls - b.numCalls,
},
];
@ -94,7 +94,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element {
<Table
showHeader
title={(): string => {
return 'Top Endpoints';
return 'Key Operations';
}}
tableLayout="fixed"
dataSource={data}
@ -104,7 +104,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element {
);
}
interface TopEndpointListItem {
interface TopOperationListItem {
p50: number;
p95: number;
p99: number;
@ -112,10 +112,10 @@ interface TopEndpointListItem {
name: string;
}
type DataProps = TopEndpointListItem;
type DataProps = TopOperationListItem;
interface TopEndpointsTableProps {
data: TopEndpointListItem[];
interface TopOperationsTableProps {
data: TopOperationListItem[];
}
export default TopEndpointsTable;
export default TopOperationsTable;

View File

@ -56,14 +56,14 @@ function Metrics(): JSX.Element {
render: (value: number): string => (value / 1000000).toFixed(2),
},
{
title: 'Error Rate (% of requests)',
title: 'Error Rate (% of total)',
dataIndex: 'errorRate',
key: 'errorRate',
sorter: (a: DataProps, b: DataProps): number => a.errorRate - b.errorRate,
render: (value: number): string => value.toFixed(2),
},
{
title: 'Requests Per Second',
title: 'Operations Per Second',
dataIndex: 'callRate',
key: 'callRate',
sorter: (a: DataProps, b: DataProps): number => a.callRate - b.callRate,

View File

@ -3,7 +3,8 @@
// import getExternalError from 'api/metrics/getExternalError';
// import getExternalService from 'api/metrics/getExternalService';
import getServiceOverview from 'api/metrics/getServiceOverview';
import getTopEndPoints from 'api/metrics/getTopEndPoints';
import getTopLevelOperations from 'api/metrics/getTopLevelOperations';
import getTopOperations from 'api/metrics/getTopOperations';
import { AxiosError } from 'axios';
import GetMinMax from 'lib/getMinMax';
import getStep from 'lib/getStep';
@ -46,7 +47,8 @@ export const GetInitialData = (
// getExternalErrorResponse,
// getExternalServiceResponse,
getServiceOverviewResponse,
getTopEndPointsResponse,
getTopOperationsResponse,
getTopLevelOperationsResponse,
] = await Promise.all([
// getDBOverView({
// ...props,
@ -67,12 +69,15 @@ export const GetInitialData = (
step: getStep({ start: minTime, end: maxTime, inputFormat: 'ns' }),
selectedTags: props.selectedTags,
}),
getTopEndPoints({
getTopOperations({
end: maxTime,
service: props.serviceName,
start: minTime,
selectedTags: props.selectedTags,
}),
getTopLevelOperations({
service: props.serviceName,
}),
]);
if (
@ -81,7 +86,8 @@ export const GetInitialData = (
// getExternalErrorResponse.statusCode === 200 &&
// getExternalServiceResponse.statusCode === 200 &&
getServiceOverviewResponse.statusCode === 200 &&
getTopEndPointsResponse.statusCode === 200
getTopOperationsResponse.statusCode === 200 &&
getTopLevelOperationsResponse.statusCode === 200
) {
dispatch({
type: 'GET_INTIAL_APPLICATION_DATA',
@ -91,7 +97,8 @@ export const GetInitialData = (
// externalError: getExternalErrorResponse.payload,
// externalService: getExternalServiceResponse.payload,
serviceOverview: getServiceOverviewResponse.payload,
topEndPoints: getTopEndPointsResponse.payload,
topOperations: getTopOperationsResponse.payload,
topLevelOperations: getTopLevelOperationsResponse.payload,
},
});
} else {
@ -99,8 +106,9 @@ export const GetInitialData = (
type: 'GET_INITIAL_APPLICATION_ERROR',
payload: {
errorMessage:
getTopEndPointsResponse.error ||
getTopOperationsResponse.error ||
getServiceOverviewResponse.error ||
getTopLevelOperationsResponse.error ||
// getExternalServiceResponse.error ||
// getExternalErrorResponse.error ||
// getExternalAverageDurationResponse.error ||

View File

@ -21,7 +21,7 @@ const InitialValue: InitialValueTypes = {
services: [],
dbOverView: [],
externalService: [],
topEndPoints: [],
topOperations: [],
externalAverageDuration: [],
externalError: [],
serviceOverview: [],
@ -29,6 +29,7 @@ const InitialValue: InitialValueTypes = {
resourceAttributePromQLQuery: resourceAttributesQueryToPromQL(
GetResourceAttributeQueriesFromURL() || [],
),
topLevelOperations: [],
};
const metrics = (
@ -88,22 +89,24 @@ const metrics = (
case GET_INTIAL_APPLICATION_DATA: {
const {
// dbOverView,
topEndPoints,
topOperations,
serviceOverview,
// externalService,
// externalAverageDuration,
// externalError,
topLevelOperations,
} = action.payload;
return {
...state,
// dbOverView,
topEndPoints,
topOperations,
serviceOverview,
// externalService,
// externalAverageDuration,
// externalError,
metricsApplicationLoading: false,
topLevelOperations,
};
}

View File

@ -5,7 +5,7 @@
import { IResourceAttributeQuery } from 'container/MetricsApplication/ResourceAttributesFilter/types';
import { ServicesList } from 'types/api/metrics/getService';
import { ServiceOverview } from 'types/api/metrics/getServiceOverview';
import { TopEndPoints } from 'types/api/metrics/getTopEndPoints';
import { TopOperations } from 'types/api/metrics/getTopOperations';
export const GET_SERVICE_LIST_SUCCESS = 'GET_SERVICE_LIST_SUCCESS';
export const GET_SERVICE_LIST_LOADING_START = 'GET_SERVICE_LIST_LOADING_START';
@ -38,12 +38,13 @@ export interface GetServiceListError {
export interface GetInitialApplicationData {
type: typeof GET_INTIAL_APPLICATION_DATA;
payload: {
topEndPoints: TopEndPoints[];
topOperations: TopOperations[];
// dbOverView: DBOverView[];
// externalService: ExternalService[];
// externalAverageDuration: ExternalAverageDuration[];
// externalError: ExternalError[];
serviceOverview: ServiceOverview[];
topLevelOperations: string[];
};
}

View File

@ -0,0 +1,7 @@
export type TopLevelOperations = string[];
export interface Props {
service: string;
}
export type PayloadProps = TopLevelOperations;

View File

@ -1,6 +1,6 @@
import { Tags } from 'types/reducer/trace';
export interface TopEndPoints {
export interface TopOperations {
name: string;
numCalls: number;
p50: number;
@ -15,4 +15,4 @@ export interface Props {
selectedTags: Tags[];
}
export type PayloadProps = TopEndPoints[];
export type PayloadProps = TopOperations[];

View File

@ -5,7 +5,7 @@ import { ExternalError } from 'types/api/metrics/getExternalError';
import { ExternalService } from 'types/api/metrics/getExternalService';
import { ServicesList } from 'types/api/metrics/getService';
import { ServiceOverview } from 'types/api/metrics/getServiceOverview';
import { TopEndPoints } from 'types/api/metrics/getTopEndPoints';
import { TopOperations } from 'types/api/metrics/getTopOperations';
interface MetricReducer {
services: ServicesList[];
@ -15,12 +15,13 @@ interface MetricReducer {
errorMessage: string;
dbOverView: DBOverView[];
externalService: ExternalService[];
topEndPoints: TopEndPoints[];
topOperations: TopOperations[];
externalAverageDuration: ExternalAverageDuration[];
externalError: ExternalError[];
serviceOverview: ServiceOverview[];
resourceAttributeQueries: IResourceAttributeQuery[];
resourceAttributePromQLQuery: string;
topLevelOperations: string[];
}
export default MetricReducer;

View File

@ -23,8 +23,9 @@ const (
defaultOperationsTable string = "signoz_operations"
defaultIndexTable string = "signoz_index_v2"
defaultErrorTable string = "signoz_error_index_v2"
defaulDurationTable string = "durationSortMV"
defaultDurationTable string = "durationSortMV"
defaultSpansTable string = "signoz_spans"
defaultTopLevelOperationsTable string = "top_level_operations"
defaultWriteBatchDelay time.Duration = 5 * time.Second
defaultWriteBatchSize int = 10000
defaultEncoding Encoding = EncodingJSON
@ -52,6 +53,7 @@ type namespaceConfig struct {
DurationTable string
SpansTable string
ErrorTable string
TopLevelOperationsTable string
WriteBatchDelay time.Duration
WriteBatchSize int
Encoding Encoding
@ -109,8 +111,9 @@ func NewOptions(datasource string, primaryNamespace string, otherNamespaces ...s
OperationsTable: defaultOperationsTable,
IndexTable: defaultIndexTable,
ErrorTable: defaultErrorTable,
DurationTable: defaulDurationTable,
DurationTable: defaultDurationTable,
SpansTable: defaultSpansTable,
TopLevelOperationsTable: defaultTopLevelOperationsTable,
WriteBatchDelay: defaultWriteBatchDelay,
WriteBatchSize: defaultWriteBatchSize,
Encoding: defaultEncoding,

View File

@ -83,6 +83,7 @@ type ClickHouseReader struct {
indexTable string
errorTable string
spansTable string
topLevelOperationsTable string
queryEngine *promql.Engine
remoteStorage *remote.Storage
@ -120,6 +121,7 @@ func NewReader(localDB *sqlx.DB, configFile string) *ClickHouseReader {
errorTable: options.primary.ErrorTable,
durationTable: options.primary.DurationTable,
spansTable: options.primary.SpansTable,
topLevelOperationsTable: options.primary.TopLevelOperationsTable,
promConfigFile: configFile,
}
}
@ -657,103 +659,153 @@ func (r *ClickHouseReader) GetServicesList(ctx context.Context) (*[]string, erro
return &services, nil
}
func (r *ClickHouseReader) GetTopLevelOperations(ctx context.Context) (*map[string][]string, *model.ApiError) {
operations := map[string][]string{}
query := fmt.Sprintf(`SELECT DISTINCT name, serviceName FROM %s.%s`, r.traceDB, r.topLevelOperationsTable)
rows, err := r.db.Query(ctx, query)
if err != nil {
zap.S().Error("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
}
defer rows.Close()
for rows.Next() {
var name, serviceName string
if err := rows.Scan(&name, &serviceName); err != nil {
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: fmt.Errorf("Error in reading data")}
}
if _, ok := operations[serviceName]; !ok {
operations[serviceName] = []string{}
}
operations[serviceName] = append(operations[serviceName], name)
}
return &operations, nil
}
func (r *ClickHouseReader) GetServices(ctx context.Context, queryParams *model.GetServicesParams) (*[]model.ServiceItem, *model.ApiError) {
if r.indexTable == "" {
return nil, &model.ApiError{Typ: model.ErrorExec, Err: ErrNoIndexTable}
}
serviceItems := []model.ServiceItem{}
topLevelOps, apiErr := r.GetTopLevelOperations(ctx)
if apiErr != nil {
return nil, apiErr
}
serviceItems := []model.ServiceItem{}
var wg sync.WaitGroup
// limit the number of concurrent queries to not overload the clickhouse server
sem := make(chan struct{}, 10)
var mtx sync.RWMutex
for svc, ops := range *topLevelOps {
sem <- struct{}{}
wg.Add(1)
go func(svc string, ops []string) {
defer wg.Done()
defer func() { <-sem }()
var serviceItem model.ServiceItem
var numErrors uint64
query := fmt.Sprintf(
`SELECT
quantile(0.99)(durationNano) as p99,
avg(durationNano) as avgDuration,
count(*) as numCalls
FROM %s.%s
WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end`,
r.traceDB, r.indexTable,
)
errorQuery := fmt.Sprintf(
`SELECT
count(*) as numErrors
FROM %s.%s
WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end AND statusCode=2`,
r.traceDB, r.indexTable,
)
query := fmt.Sprintf("SELECT serviceName, quantile(0.99)(durationNano) as p99, avg(durationNano) as avgDuration, count(*) as numCalls FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2'", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10))
args := []interface{}{}
args = append(args,
clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)),
clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)),
clickhouse.Named("serviceName", svc),
clickhouse.Named("names", ops),
)
args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
zap.S().Error("Error in processing sql query: ", errStatus)
return
}
query += " GROUP BY serviceName ORDER BY p99 DESC"
err := r.db.Select(ctx, &serviceItems, query, args...)
zap.S().Info(query)
err := r.db.QueryRow(
ctx,
query,
args...,
).ScanStruct(&serviceItem)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
zap.S().Error("Error in processing sql query: ", err)
return
}
////////////////// Below block gets 5xx of services
serviceErrorItems := []model.ServiceItem{}
query = fmt.Sprintf("SELECT serviceName, count(*) as numErrors FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND (statusCode>=500 OR statusCode=2)", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10))
args = []interface{}{}
args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
}
query += " GROUP BY serviceName"
err = r.db.Select(ctx, &serviceErrorItems, query, args...)
zap.S().Info(query)
err = r.db.QueryRow(ctx, errorQuery, args...).Scan(&numErrors)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
zap.S().Error("Error in processing sql query: ", err)
return
}
m5xx := make(map[string]uint64)
for j := range serviceErrorItems {
m5xx[serviceErrorItems[j].ServiceName] = serviceErrorItems[j].NumErrors
serviceItem.ServiceName = svc
serviceItem.NumErrors = numErrors
mtx.Lock()
serviceItems = append(serviceItems, serviceItem)
mtx.Unlock()
}(svc, ops)
}
///////////////////////////////////////////
wg.Wait()
////////////////// Below block gets 4xx of services
service4xxItems := []model.ServiceItem{}
query = fmt.Sprintf("SELECT serviceName, count(*) as num4xx FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND statusCode>=400 AND statusCode<500", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10))
args = []interface{}{}
args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
for idx := range serviceItems {
serviceItems[idx].CallRate = float64(serviceItems[idx].NumCalls) / float64(queryParams.Period)
serviceItems[idx].ErrorRate = float64(serviceItems[idx].NumErrors) * 100 / float64(serviceItems[idx].NumCalls)
}
query += " GROUP BY serviceName"
err = r.db.Select(ctx, &service4xxItems, query, args...)
zap.S().Info(query)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
}
m4xx := make(map[string]uint64)
for j := range service4xxItems {
m4xx[service4xxItems[j].ServiceName] = service4xxItems[j].Num4XX
}
for i := range serviceItems {
if val, ok := m5xx[serviceItems[i].ServiceName]; ok {
serviceItems[i].NumErrors = val
}
if val, ok := m4xx[serviceItems[i].ServiceName]; ok {
serviceItems[i].Num4XX = val
}
serviceItems[i].CallRate = float64(serviceItems[i].NumCalls) / float64(queryParams.Period)
serviceItems[i].FourXXRate = float64(serviceItems[i].Num4XX) * 100 / float64(serviceItems[i].NumCalls)
serviceItems[i].ErrorRate = float64(serviceItems[i].NumErrors) * 100 / float64(serviceItems[i].NumCalls)
}
return &serviceItems, nil
}
func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams *model.GetServiceOverviewParams) (*[]model.ServiceOverviewItem, *model.ApiError) {
topLevelOps, apiErr := r.GetTopLevelOperations(ctx)
if apiErr != nil {
return nil, apiErr
}
ops, ok := (*topLevelOps)[queryParams.ServiceName]
if !ok {
return nil, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("Service not found")}
}
namedArgs := []interface{}{
clickhouse.Named("interval", strconv.Itoa(int(queryParams.StepSeconds/60))),
clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)),
clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)),
clickhouse.Named("serviceName", queryParams.ServiceName),
clickhouse.Named("names", ops),
}
serviceOverviewItems := []model.ServiceOverviewItem{}
query := fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %s minute) as time, quantile(0.99)(durationNano) as p99, quantile(0.95)(durationNano) as p95,quantile(0.50)(durationNano) as p50, count(*) as numCalls FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND serviceName='%s'", strconv.Itoa(int(queryParams.StepSeconds/60)), r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName)
query := fmt.Sprintf(`
SELECT
toStartOfInterval(timestamp, INTERVAL @interval minute) as time,
quantile(0.99)(durationNano) as p99,
quantile(0.95)(durationNano) as p95,
quantile(0.50)(durationNano) as p50,
count(*) as numCalls
FROM %s.%s
WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end`,
r.traceDB, r.indexTable,
)
args := []interface{}{}
args = append(args, namedArgs...)
args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
@ -761,17 +813,25 @@ func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams *
query += " GROUP BY time ORDER BY time DESC"
err := r.db.Select(ctx, &serviceOverviewItems, query, args...)
zap.S().Info(query)
zap.S().Debug(query)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
zap.S().Error("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
}
serviceErrorItems := []model.ServiceErrorItem{}
query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %s minute) as time, count(*) as numErrors FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND serviceName='%s' AND hasError=true", strconv.Itoa(int(queryParams.StepSeconds/60)), r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName)
query = fmt.Sprintf(`
SELECT
toStartOfInterval(timestamp, INTERVAL @interval minute) as time,
count(*) as numErrors
FROM %s.%s
WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end AND statusCode=2`,
r.traceDB, r.indexTable,
)
args = []interface{}{}
args = append(args, namedArgs...)
args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
@ -779,10 +839,10 @@ func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams *
query += " GROUP BY time ORDER BY time DESC"
err = r.db.Select(ctx, &serviceErrorItems, query, args...)
zap.S().Info(query)
zap.S().Debug(query)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
zap.S().Error("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
}
@ -1523,31 +1583,48 @@ func (r *ClickHouseReader) GetTagValues(ctx context.Context, queryParams *model.
return &cleanedTagValues, nil
}
func (r *ClickHouseReader) GetTopEndpoints(ctx context.Context, queryParams *model.GetTopEndpointsParams) (*[]model.TopEndpointsItem, *model.ApiError) {
func (r *ClickHouseReader) GetTopOperations(ctx context.Context, queryParams *model.GetTopOperationsParams) (*[]model.TopOperationsItem, *model.ApiError) {
var topEndpointsItems []model.TopEndpointsItem
namedArgs := []interface{}{
clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)),
clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)),
clickhouse.Named("serviceName", queryParams.ServiceName),
}
query := fmt.Sprintf("SELECT quantile(0.5)(durationNano) as p50, quantile(0.95)(durationNano) as p95, quantile(0.99)(durationNano) as p99, COUNT(1) as numCalls, name FROM %s.%s WHERE timestamp >= '%s' AND timestamp <= '%s' AND kind='2' and serviceName='%s'", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName)
var topOperationsItems []model.TopOperationsItem
query := fmt.Sprintf(`
SELECT
quantile(0.5)(durationNano) as p50,
quantile(0.95)(durationNano) as p95,
quantile(0.99)(durationNano) as p99,
COUNT(*) as numCalls,
name
FROM %s.%s
WHERE serviceName = @serviceName AND timestamp>= @start AND timestamp<= @end`,
r.traceDB, r.indexTable,
)
args := []interface{}{}
args = append(args, namedArgs...)
args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
}
query += " GROUP BY name"
err := r.db.Select(ctx, &topEndpointsItems, query, args...)
query += " GROUP BY name ORDER BY p99 DESC LIMIT 10"
err := r.db.Select(ctx, &topOperationsItems, query, args...)
zap.S().Info(query)
zap.S().Debug(query)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
zap.S().Error("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
}
if topEndpointsItems == nil {
topEndpointsItems = []model.TopEndpointsItem{}
if topOperationsItems == nil {
topOperationsItems = []model.TopOperationsItem{}
}
return &topEndpointsItems, nil
return &topOperationsItems, nil
}
func (r *ClickHouseReader) GetUsage(ctx context.Context, queryParams *model.GetUsageParams) (*[]model.UsageItem, error) {

View File

@ -322,7 +322,8 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router) {
router.HandleFunc("/api/v1/services", ViewAccess(aH.getServices)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/services/list", aH.getServicesList).Methods(http.MethodGet)
router.HandleFunc("/api/v1/service/overview", ViewAccess(aH.getServiceOverview)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/service/top_endpoints", ViewAccess(aH.getTopEndpoints)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/service/top_operations", ViewAccess(aH.getTopOperations)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/service/top_level_operations", ViewAccess(aH.getServicesTopLevelOps)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/traces/{traceId}", ViewAccess(aH.searchTraces)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/usage", ViewAccess(aH.getUsage)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/serviceMapDependencies", ViewAccess(aH.serviceMapDependencies)).Methods(http.MethodPost)
@ -1103,14 +1104,14 @@ func (aH *APIHandler) submitFeedback(w http.ResponseWriter, r *http.Request) {
}
func (aH *APIHandler) getTopEndpoints(w http.ResponseWriter, r *http.Request) {
func (aH *APIHandler) getTopOperations(w http.ResponseWriter, r *http.Request) {
query, err := parseGetTopEndpointsRequest(r)
query, err := parseGetTopOperationsRequest(r)
if aH.handleError(w, err, http.StatusBadRequest) {
return
}
result, apiErr := (*aH.reader).GetTopEndpoints(r.Context(), query)
result, apiErr := (*aH.reader).GetTopOperations(r.Context(), query)
if apiErr != nil && aH.handleError(w, apiErr.Err, http.StatusInternalServerError) {
return
@ -1152,6 +1153,17 @@ func (aH *APIHandler) getServiceOverview(w http.ResponseWriter, r *http.Request)
}
func (aH *APIHandler) getServicesTopLevelOps(w http.ResponseWriter, r *http.Request) {
result, apiErr := (*aH.reader).GetTopLevelOperations(r.Context())
if apiErr != nil {
respondError(w, apiErr, nil)
return
}
aH.writeJSON(w, r, result)
}
func (aH *APIHandler) getServices(w http.ResponseWriter, r *http.Request) {
query, err := parseGetServicesRequest(r)

View File

@ -32,8 +32,8 @@ func parseUser(r *http.Request) (*model.User, error) {
return &user, nil
}
func parseGetTopEndpointsRequest(r *http.Request) (*model.GetTopEndpointsParams, error) {
var postData *model.GetTopEndpointsParams
func parseGetTopOperationsRequest(r *http.Request) (*model.GetTopOperationsParams, error) {
var postData *model.GetTopOperationsParams
err := json.NewDecoder(r.Body).Decode(&postData)
if err != nil {

View File

@ -23,4 +23,4 @@ scrape_configs:
remote_read:
- url: tcp://localhost:9001/?database=signoz_metrics
- url: tcp://localhost:9000/?database=signoz_metrics

View File

@ -20,8 +20,9 @@ type Reader interface {
GetInstantQueryMetricsResult(ctx context.Context, query *model.InstantQueryMetricsParams) (*promql.Result, *stats.QueryStats, *model.ApiError)
GetQueryRangeResult(ctx context.Context, query *model.QueryRangeParams) (*promql.Result, *stats.QueryStats, *model.ApiError)
GetServiceOverview(ctx context.Context, query *model.GetServiceOverviewParams) (*[]model.ServiceOverviewItem, *model.ApiError)
GetTopLevelOperations(ctx context.Context) (*map[string][]string, *model.ApiError)
GetServices(ctx context.Context, query *model.GetServicesParams) (*[]model.ServiceItem, *model.ApiError)
GetTopEndpoints(ctx context.Context, query *model.GetTopEndpointsParams) (*[]model.TopEndpointsItem, *model.ApiError)
GetTopOperations(ctx context.Context, query *model.GetTopOperationsParams) (*[]model.TopOperationsItem, *model.ApiError)
GetUsage(ctx context.Context, query *model.GetUsageParams) (*[]model.UsageItem, error)
GetServicesList(ctx context.Context) (*[]string, error)
GetServiceMapDependencies(ctx context.Context, query *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error)

View File

@ -135,7 +135,7 @@ type MetricAutocompleteTagParams struct {
TagKey string
}
type GetTopEndpointsParams struct {
type GetTopOperationsParams struct {
StartTime string `json:"start"`
EndTime string `json:"end"`
ServiceName string `json:"service"`

View File

@ -3,6 +3,7 @@ package model
import (
"encoding/json"
"fmt"
"math"
"strconv"
"time"
@ -217,7 +218,7 @@ type UsageItem struct {
Count uint64 `json:"count" ch:"count"`
}
type TopEndpointsItem struct {
type TopOperationsItem struct {
Percentile50 float64 `json:"p50" ch:"p50"`
Percentile95 float64 `json:"p95" ch:"p95"`
Percentile99 float64 `json:"p99" ch:"p99"`
@ -403,3 +404,30 @@ func (p *MetricPoint) MarshalJSON() ([]byte, error) {
v := strconv.FormatFloat(p.Value, 'f', -1, 64)
return json.Marshal([...]interface{}{float64(p.Timestamp) / 1000, v})
}
// MarshalJSON implements json.Marshaler.
func (s *ServiceItem) MarshalJSON() ([]byte, error) {
// If a service didn't not send any data in the last interval duration
// it's values such as 99th percentile will return as NaN and
// json encoding doesn't support NaN
// We still want to show it in the UI, so we'll replace NaN with 0
type Alias ServiceItem
if math.IsInf(s.AvgDuration, 0) || math.IsNaN(s.AvgDuration) {
s.AvgDuration = 0
}
if math.IsInf(s.CallRate, 0) || math.IsNaN(s.CallRate) {
s.CallRate = 0
}
if math.IsInf(s.ErrorRate, 0) || math.IsNaN(s.ErrorRate) {
s.ErrorRate = 0
}
if math.IsInf(s.Percentile99, 0) || math.IsNaN(s.Percentile99) {
s.Percentile99 = 0
}
return json.Marshal(&struct {
*Alias
}{
Alias: (*Alias)(s),
})
}