feat: show messaging/cron/browser services in listing page (#1455)

* feat: show messaging/cron/browser services in listing page

* chore: issue maximum of ten queries to clickhouse

Co-authored-by: Palash Gupta <palashgdev@gmail.com>
This commit is contained in:
Srikanth Chekuri 2022-08-04 11:57:05 +05:30 committed by GitHub
parent 8146da52af
commit 5bfc2af51b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 372 additions and 205 deletions

View File

@ -0,0 +1,24 @@
import axios from 'api';
import { ErrorResponseHandler } from 'api/ErrorResponseHandler';
import { AxiosError } from 'axios';
import { ErrorResponse, SuccessResponse } from 'types/api';
import { PayloadProps, Props } from 'types/api/metrics/getTopLevelOperations';
const getTopLevelOperations = async (
props: Props,
): Promise<SuccessResponse<PayloadProps> | ErrorResponse> => {
try {
const response = await axios.post(`/service/top_level_operations`);
return {
statusCode: 200,
error: null,
message: response.data.status,
payload: response.data[props.service],
};
} catch (error) {
return ErrorResponseHandler(error as AxiosError);
}
};
export default getTopLevelOperations;

View File

@ -2,13 +2,13 @@ import axios from 'api';
import { ErrorResponseHandler } from 'api/ErrorResponseHandler';
import { AxiosError } from 'axios';
import { ErrorResponse, SuccessResponse } from 'types/api';
import { PayloadProps, Props } from 'types/api/metrics/getTopEndPoints';
import { PayloadProps, Props } from 'types/api/metrics/getTopOperations';
const getTopEndPoints = async (
const getTopOperations = async (
props: Props,
): Promise<SuccessResponse<PayloadProps> | ErrorResponse> => {
try {
const response = await axios.post(`/service/top_endpoints`, {
const response = await axios.post(`/service/top_operations`, {
start: `${props.start}`,
end: `${props.end}`,
service: props.service,
@ -26,4 +26,4 @@ const getTopEndPoints = async (
}
};
export default getTopEndPoints;
export default getTopOperations;

View File

@ -15,7 +15,7 @@ import { PromQLWidgets } from 'types/api/dashboard/getAll';
import MetricReducer from 'types/reducer/metrics';
import { Card, Col, GraphContainer, GraphTitle, Row } from '../styles';
import TopEndpointsTable from '../TopEndpointsTable';
import TopOperationsTable from '../TopOperationsTable';
import { Button } from './styles';
function Application({ getWidget }: DashboardProps): JSX.Element {
@ -23,11 +23,13 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
const selectedTimeStamp = useRef(0);
const {
topEndPoints,
topOperations,
serviceOverview,
resourceAttributePromQLQuery,
resourceAttributeQueries,
topLevelOperations,
} = useSelector<AppState, MetricReducer>((state) => state.metrics);
const operationsRegex = topLevelOperations.join('|');
const selectedTraceTags: string = JSON.stringify(
convertRawQueriesToTraceSelectedTags(resourceAttributeQueries, 'array') || [],
@ -107,7 +109,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
<Button
type="default"
size="small"
id="Application_button"
id="Service_button"
onClick={(): void => {
onTracePopupClick(selectedTimeStamp.current);
}}
@ -115,13 +117,13 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
View Traces
</Button>
<Card>
<GraphTitle>Application latency</GraphTitle>
<GraphTitle>Latency</GraphTitle>
<GraphContainer>
<Graph
onClickHandler={(ChartEvent, activeElements, chart, data): void => {
onClickHandler(ChartEvent, activeElements, chart, data, 'Application');
onClickHandler(ChartEvent, activeElements, chart, data, 'Service');
}}
name="application_latency"
name="service_latency"
type="line"
data={{
datasets: [
@ -175,7 +177,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
<Button
type="default"
size="small"
id="Request_button"
id="Rate_button"
onClick={(): void => {
onTracePopupClick(selectedTimeStamp.current);
}}
@ -183,21 +185,21 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
View Traces
</Button>
<Card>
<GraphTitle>Requests</GraphTitle>
<GraphTitle>Rate (ops/s)</GraphTitle>
<GraphContainer>
<FullView
name="request_per_sec"
name="operations_per_sec"
fullViewOptions={false}
onClickHandler={(event, element, chart, data): void => {
onClickHandler(event, element, chart, data, 'Request');
onClickHandler(event, element, chart, data, 'Rate');
}}
widget={getWidget([
{
query: `sum(rate(signoz_latency_count{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[5m]))`,
legend: 'Requests',
query: `sum(rate(signoz_latency_count{service_name="${servicename}", operation=~"${operationsRegex}"${resourceAttributePromQLQuery}}[5m]))`,
legend: 'Operations',
},
])}
yAxisUnit="reqps"
yAxisUnit="ops"
/>
</GraphContainer>
</Card>
@ -227,7 +229,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
}}
widget={getWidget([
{
query: `max(sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[5m]) OR rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", http_status_code=~"5.."${resourceAttributePromQLQuery}}[5m]))*100/sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[5m]))) < 1000 OR vector(0)`,
query: `max(sum(rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[5m]) OR rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}", http_status_code=~"5.."${resourceAttributePromQLQuery}}[5m]))*100/sum(rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}"${resourceAttributePromQLQuery}}[5m]))) < 1000 OR vector(0)`,
legend: 'Error Percentage',
},
])}
@ -239,7 +241,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element {
<Col span={12}>
<Card>
<TopEndpointsTable data={topEndPoints} />
<TopOperationsTable data={topOperations} />
</Card>
</Col>
</Row>

View File

@ -11,7 +11,7 @@ import { AppState } from 'store/reducers';
import { GlobalReducer } from 'types/reducer/globalTime';
import MetricReducer from 'types/reducer/metrics';
function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element {
function TopOperationsTable(props: TopOperationsTableProps): JSX.Element {
const { minTime, maxTime } = useSelector<AppState, GlobalReducer>(
(state) => state.globalTime,
);
@ -85,7 +85,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element {
title: 'Number of Calls',
dataIndex: 'numCalls',
key: 'numCalls',
sorter: (a: TopEndpointListItem, b: TopEndpointListItem): number =>
sorter: (a: TopOperationListItem, b: TopOperationListItem): number =>
a.numCalls - b.numCalls,
},
];
@ -94,7 +94,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element {
<Table
showHeader
title={(): string => {
return 'Top Endpoints';
return 'Key Operations';
}}
tableLayout="fixed"
dataSource={data}
@ -104,7 +104,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element {
);
}
interface TopEndpointListItem {
interface TopOperationListItem {
p50: number;
p95: number;
p99: number;
@ -112,10 +112,10 @@ interface TopEndpointListItem {
name: string;
}
type DataProps = TopEndpointListItem;
type DataProps = TopOperationListItem;
interface TopEndpointsTableProps {
data: TopEndpointListItem[];
interface TopOperationsTableProps {
data: TopOperationListItem[];
}
export default TopEndpointsTable;
export default TopOperationsTable;

View File

@ -56,14 +56,14 @@ function Metrics(): JSX.Element {
render: (value: number): string => (value / 1000000).toFixed(2),
},
{
title: 'Error Rate (% of requests)',
title: 'Error Rate (% of total)',
dataIndex: 'errorRate',
key: 'errorRate',
sorter: (a: DataProps, b: DataProps): number => a.errorRate - b.errorRate,
render: (value: number): string => value.toFixed(2),
},
{
title: 'Requests Per Second',
title: 'Operations Per Second',
dataIndex: 'callRate',
key: 'callRate',
sorter: (a: DataProps, b: DataProps): number => a.callRate - b.callRate,

View File

@ -3,7 +3,8 @@
// import getExternalError from 'api/metrics/getExternalError';
// import getExternalService from 'api/metrics/getExternalService';
import getServiceOverview from 'api/metrics/getServiceOverview';
import getTopEndPoints from 'api/metrics/getTopEndPoints';
import getTopLevelOperations from 'api/metrics/getTopLevelOperations';
import getTopOperations from 'api/metrics/getTopOperations';
import { AxiosError } from 'axios';
import GetMinMax from 'lib/getMinMax';
import getStep from 'lib/getStep';
@ -46,7 +47,8 @@ export const GetInitialData = (
// getExternalErrorResponse,
// getExternalServiceResponse,
getServiceOverviewResponse,
getTopEndPointsResponse,
getTopOperationsResponse,
getTopLevelOperationsResponse,
] = await Promise.all([
// getDBOverView({
// ...props,
@ -67,12 +69,15 @@ export const GetInitialData = (
step: getStep({ start: minTime, end: maxTime, inputFormat: 'ns' }),
selectedTags: props.selectedTags,
}),
getTopEndPoints({
getTopOperations({
end: maxTime,
service: props.serviceName,
start: minTime,
selectedTags: props.selectedTags,
}),
getTopLevelOperations({
service: props.serviceName,
}),
]);
if (
@ -81,7 +86,8 @@ export const GetInitialData = (
// getExternalErrorResponse.statusCode === 200 &&
// getExternalServiceResponse.statusCode === 200 &&
getServiceOverviewResponse.statusCode === 200 &&
getTopEndPointsResponse.statusCode === 200
getTopOperationsResponse.statusCode === 200 &&
getTopLevelOperationsResponse.statusCode === 200
) {
dispatch({
type: 'GET_INTIAL_APPLICATION_DATA',
@ -91,7 +97,8 @@ export const GetInitialData = (
// externalError: getExternalErrorResponse.payload,
// externalService: getExternalServiceResponse.payload,
serviceOverview: getServiceOverviewResponse.payload,
topEndPoints: getTopEndPointsResponse.payload,
topOperations: getTopOperationsResponse.payload,
topLevelOperations: getTopLevelOperationsResponse.payload,
},
});
} else {
@ -99,8 +106,9 @@ export const GetInitialData = (
type: 'GET_INITIAL_APPLICATION_ERROR',
payload: {
errorMessage:
getTopEndPointsResponse.error ||
getTopOperationsResponse.error ||
getServiceOverviewResponse.error ||
getTopLevelOperationsResponse.error ||
// getExternalServiceResponse.error ||
// getExternalErrorResponse.error ||
// getExternalAverageDurationResponse.error ||

View File

@ -21,7 +21,7 @@ const InitialValue: InitialValueTypes = {
services: [],
dbOverView: [],
externalService: [],
topEndPoints: [],
topOperations: [],
externalAverageDuration: [],
externalError: [],
serviceOverview: [],
@ -29,6 +29,7 @@ const InitialValue: InitialValueTypes = {
resourceAttributePromQLQuery: resourceAttributesQueryToPromQL(
GetResourceAttributeQueriesFromURL() || [],
),
topLevelOperations: [],
};
const metrics = (
@ -88,22 +89,24 @@ const metrics = (
case GET_INTIAL_APPLICATION_DATA: {
const {
// dbOverView,
topEndPoints,
topOperations,
serviceOverview,
// externalService,
// externalAverageDuration,
// externalError,
topLevelOperations,
} = action.payload;
return {
...state,
// dbOverView,
topEndPoints,
topOperations,
serviceOverview,
// externalService,
// externalAverageDuration,
// externalError,
metricsApplicationLoading: false,
topLevelOperations,
};
}

View File

@ -5,7 +5,7 @@
import { IResourceAttributeQuery } from 'container/MetricsApplication/ResourceAttributesFilter/types';
import { ServicesList } from 'types/api/metrics/getService';
import { ServiceOverview } from 'types/api/metrics/getServiceOverview';
import { TopEndPoints } from 'types/api/metrics/getTopEndPoints';
import { TopOperations } from 'types/api/metrics/getTopOperations';
export const GET_SERVICE_LIST_SUCCESS = 'GET_SERVICE_LIST_SUCCESS';
export const GET_SERVICE_LIST_LOADING_START = 'GET_SERVICE_LIST_LOADING_START';
@ -38,12 +38,13 @@ export interface GetServiceListError {
export interface GetInitialApplicationData {
type: typeof GET_INTIAL_APPLICATION_DATA;
payload: {
topEndPoints: TopEndPoints[];
topOperations: TopOperations[];
// dbOverView: DBOverView[];
// externalService: ExternalService[];
// externalAverageDuration: ExternalAverageDuration[];
// externalError: ExternalError[];
serviceOverview: ServiceOverview[];
topLevelOperations: string[];
};
}

View File

@ -0,0 +1,7 @@
export type TopLevelOperations = string[];
export interface Props {
service: string;
}
export type PayloadProps = TopLevelOperations;

View File

@ -1,6 +1,6 @@
import { Tags } from 'types/reducer/trace';
export interface TopEndPoints {
export interface TopOperations {
name: string;
numCalls: number;
p50: number;
@ -15,4 +15,4 @@ export interface Props {
selectedTags: Tags[];
}
export type PayloadProps = TopEndPoints[];
export type PayloadProps = TopOperations[];

View File

@ -5,7 +5,7 @@ import { ExternalError } from 'types/api/metrics/getExternalError';
import { ExternalService } from 'types/api/metrics/getExternalService';
import { ServicesList } from 'types/api/metrics/getService';
import { ServiceOverview } from 'types/api/metrics/getServiceOverview';
import { TopEndPoints } from 'types/api/metrics/getTopEndPoints';
import { TopOperations } from 'types/api/metrics/getTopOperations';
interface MetricReducer {
services: ServicesList[];
@ -15,12 +15,13 @@ interface MetricReducer {
errorMessage: string;
dbOverView: DBOverView[];
externalService: ExternalService[];
topEndPoints: TopEndPoints[];
topOperations: TopOperations[];
externalAverageDuration: ExternalAverageDuration[];
externalError: ExternalError[];
serviceOverview: ServiceOverview[];
resourceAttributeQueries: IResourceAttributeQuery[];
resourceAttributePromQLQuery: string;
topLevelOperations: string[];
}
export default MetricReducer;

View File

@ -18,16 +18,17 @@ const (
)
const (
defaultDatasource string = "tcp://localhost:9000"
defaultTraceDB string = "signoz_traces"
defaultOperationsTable string = "signoz_operations"
defaultIndexTable string = "signoz_index_v2"
defaultErrorTable string = "signoz_error_index_v2"
defaulDurationTable string = "durationSortMV"
defaultSpansTable string = "signoz_spans"
defaultWriteBatchDelay time.Duration = 5 * time.Second
defaultWriteBatchSize int = 10000
defaultEncoding Encoding = EncodingJSON
defaultDatasource string = "tcp://localhost:9000"
defaultTraceDB string = "signoz_traces"
defaultOperationsTable string = "signoz_operations"
defaultIndexTable string = "signoz_index_v2"
defaultErrorTable string = "signoz_error_index_v2"
defaultDurationTable string = "durationSortMV"
defaultSpansTable string = "signoz_spans"
defaultTopLevelOperationsTable string = "top_level_operations"
defaultWriteBatchDelay time.Duration = 5 * time.Second
defaultWriteBatchSize int = 10000
defaultEncoding Encoding = EncodingJSON
)
const (
@ -43,19 +44,20 @@ const (
// NamespaceConfig is Clickhouse's internal configuration data
type namespaceConfig struct {
namespace string
Enabled bool
Datasource string
TraceDB string
OperationsTable string
IndexTable string
DurationTable string
SpansTable string
ErrorTable string
WriteBatchDelay time.Duration
WriteBatchSize int
Encoding Encoding
Connector Connector
namespace string
Enabled bool
Datasource string
TraceDB string
OperationsTable string
IndexTable string
DurationTable string
SpansTable string
ErrorTable string
TopLevelOperationsTable string
WriteBatchDelay time.Duration
WriteBatchSize int
Encoding Encoding
Connector Connector
}
// Connecto defines how to connect to the database
@ -102,19 +104,20 @@ func NewOptions(datasource string, primaryNamespace string, otherNamespaces ...s
options := &Options{
primary: &namespaceConfig{
namespace: primaryNamespace,
Enabled: true,
Datasource: datasource,
TraceDB: defaultTraceDB,
OperationsTable: defaultOperationsTable,
IndexTable: defaultIndexTable,
ErrorTable: defaultErrorTable,
DurationTable: defaulDurationTable,
SpansTable: defaultSpansTable,
WriteBatchDelay: defaultWriteBatchDelay,
WriteBatchSize: defaultWriteBatchSize,
Encoding: defaultEncoding,
Connector: defaultConnector,
namespace: primaryNamespace,
Enabled: true,
Datasource: datasource,
TraceDB: defaultTraceDB,
OperationsTable: defaultOperationsTable,
IndexTable: defaultIndexTable,
ErrorTable: defaultErrorTable,
DurationTable: defaultDurationTable,
SpansTable: defaultSpansTable,
TopLevelOperationsTable: defaultTopLevelOperationsTable,
WriteBatchDelay: defaultWriteBatchDelay,
WriteBatchSize: defaultWriteBatchSize,
Encoding: defaultEncoding,
Connector: defaultConnector,
},
others: make(map[string]*namespaceConfig, len(otherNamespaces)),
}

View File

@ -75,16 +75,17 @@ var (
// SpanWriter for reading spans from ClickHouse
type ClickHouseReader struct {
db clickhouse.Conn
localDB *sqlx.DB
traceDB string
operationsTable string
durationTable string
indexTable string
errorTable string
spansTable string
queryEngine *promql.Engine
remoteStorage *remote.Storage
db clickhouse.Conn
localDB *sqlx.DB
traceDB string
operationsTable string
durationTable string
indexTable string
errorTable string
spansTable string
topLevelOperationsTable string
queryEngine *promql.Engine
remoteStorage *remote.Storage
promConfigFile string
promConfig *config.Config
@ -111,16 +112,17 @@ func NewReader(localDB *sqlx.DB, configFile string) *ClickHouseReader {
}
return &ClickHouseReader{
db: db,
localDB: localDB,
traceDB: options.primary.TraceDB,
alertManager: alertManager,
operationsTable: options.primary.OperationsTable,
indexTable: options.primary.IndexTable,
errorTable: options.primary.ErrorTable,
durationTable: options.primary.DurationTable,
spansTable: options.primary.SpansTable,
promConfigFile: configFile,
db: db,
localDB: localDB,
traceDB: options.primary.TraceDB,
alertManager: alertManager,
operationsTable: options.primary.OperationsTable,
indexTable: options.primary.IndexTable,
errorTable: options.primary.ErrorTable,
durationTable: options.primary.DurationTable,
spansTable: options.primary.SpansTable,
topLevelOperationsTable: options.primary.TopLevelOperationsTable,
promConfigFile: configFile,
}
}
@ -657,103 +659,153 @@ func (r *ClickHouseReader) GetServicesList(ctx context.Context) (*[]string, erro
return &services, nil
}
func (r *ClickHouseReader) GetTopLevelOperations(ctx context.Context) (*map[string][]string, *model.ApiError) {
operations := map[string][]string{}
query := fmt.Sprintf(`SELECT DISTINCT name, serviceName FROM %s.%s`, r.traceDB, r.topLevelOperationsTable)
rows, err := r.db.Query(ctx, query)
if err != nil {
zap.S().Error("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
}
defer rows.Close()
for rows.Next() {
var name, serviceName string
if err := rows.Scan(&name, &serviceName); err != nil {
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: fmt.Errorf("Error in reading data")}
}
if _, ok := operations[serviceName]; !ok {
operations[serviceName] = []string{}
}
operations[serviceName] = append(operations[serviceName], name)
}
return &operations, nil
}
func (r *ClickHouseReader) GetServices(ctx context.Context, queryParams *model.GetServicesParams) (*[]model.ServiceItem, *model.ApiError) {
if r.indexTable == "" {
return nil, &model.ApiError{Typ: model.ErrorExec, Err: ErrNoIndexTable}
}
topLevelOps, apiErr := r.GetTopLevelOperations(ctx)
if apiErr != nil {
return nil, apiErr
}
serviceItems := []model.ServiceItem{}
var wg sync.WaitGroup
// limit the number of concurrent queries to not overload the clickhouse server
sem := make(chan struct{}, 10)
var mtx sync.RWMutex
query := fmt.Sprintf("SELECT serviceName, quantile(0.99)(durationNano) as p99, avg(durationNano) as avgDuration, count(*) as numCalls FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2'", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10))
args := []interface{}{}
args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
for svc, ops := range *topLevelOps {
sem <- struct{}{}
wg.Add(1)
go func(svc string, ops []string) {
defer wg.Done()
defer func() { <-sem }()
var serviceItem model.ServiceItem
var numErrors uint64
query := fmt.Sprintf(
`SELECT
quantile(0.99)(durationNano) as p99,
avg(durationNano) as avgDuration,
count(*) as numCalls
FROM %s.%s
WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end`,
r.traceDB, r.indexTable,
)
errorQuery := fmt.Sprintf(
`SELECT
count(*) as numErrors
FROM %s.%s
WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end AND statusCode=2`,
r.traceDB, r.indexTable,
)
args := []interface{}{}
args = append(args,
clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)),
clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)),
clickhouse.Named("serviceName", svc),
clickhouse.Named("names", ops),
)
args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
zap.S().Error("Error in processing sql query: ", errStatus)
return
}
err := r.db.QueryRow(
ctx,
query,
args...,
).ScanStruct(&serviceItem)
if err != nil {
zap.S().Error("Error in processing sql query: ", err)
return
}
err = r.db.QueryRow(ctx, errorQuery, args...).Scan(&numErrors)
if err != nil {
zap.S().Error("Error in processing sql query: ", err)
return
}
serviceItem.ServiceName = svc
serviceItem.NumErrors = numErrors
mtx.Lock()
serviceItems = append(serviceItems, serviceItem)
mtx.Unlock()
}(svc, ops)
}
query += " GROUP BY serviceName ORDER BY p99 DESC"
err := r.db.Select(ctx, &serviceItems, query, args...)
wg.Wait()
zap.S().Info(query)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
for idx := range serviceItems {
serviceItems[idx].CallRate = float64(serviceItems[idx].NumCalls) / float64(queryParams.Period)
serviceItems[idx].ErrorRate = float64(serviceItems[idx].NumErrors) * 100 / float64(serviceItems[idx].NumCalls)
}
////////////////// Below block gets 5xx of services
serviceErrorItems := []model.ServiceItem{}
query = fmt.Sprintf("SELECT serviceName, count(*) as numErrors FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND (statusCode>=500 OR statusCode=2)", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10))
args = []interface{}{}
args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
}
query += " GROUP BY serviceName"
err = r.db.Select(ctx, &serviceErrorItems, query, args...)
zap.S().Info(query)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
}
m5xx := make(map[string]uint64)
for j := range serviceErrorItems {
m5xx[serviceErrorItems[j].ServiceName] = serviceErrorItems[j].NumErrors
}
///////////////////////////////////////////
////////////////// Below block gets 4xx of services
service4xxItems := []model.ServiceItem{}
query = fmt.Sprintf("SELECT serviceName, count(*) as num4xx FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND statusCode>=400 AND statusCode<500", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10))
args = []interface{}{}
args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
}
query += " GROUP BY serviceName"
err = r.db.Select(ctx, &service4xxItems, query, args...)
zap.S().Info(query)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
}
m4xx := make(map[string]uint64)
for j := range service4xxItems {
m4xx[service4xxItems[j].ServiceName] = service4xxItems[j].Num4XX
}
for i := range serviceItems {
if val, ok := m5xx[serviceItems[i].ServiceName]; ok {
serviceItems[i].NumErrors = val
}
if val, ok := m4xx[serviceItems[i].ServiceName]; ok {
serviceItems[i].Num4XX = val
}
serviceItems[i].CallRate = float64(serviceItems[i].NumCalls) / float64(queryParams.Period)
serviceItems[i].FourXXRate = float64(serviceItems[i].Num4XX) * 100 / float64(serviceItems[i].NumCalls)
serviceItems[i].ErrorRate = float64(serviceItems[i].NumErrors) * 100 / float64(serviceItems[i].NumCalls)
}
return &serviceItems, nil
}
func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams *model.GetServiceOverviewParams) (*[]model.ServiceOverviewItem, *model.ApiError) {
topLevelOps, apiErr := r.GetTopLevelOperations(ctx)
if apiErr != nil {
return nil, apiErr
}
ops, ok := (*topLevelOps)[queryParams.ServiceName]
if !ok {
return nil, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("Service not found")}
}
namedArgs := []interface{}{
clickhouse.Named("interval", strconv.Itoa(int(queryParams.StepSeconds/60))),
clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)),
clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)),
clickhouse.Named("serviceName", queryParams.ServiceName),
clickhouse.Named("names", ops),
}
serviceOverviewItems := []model.ServiceOverviewItem{}
query := fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %s minute) as time, quantile(0.99)(durationNano) as p99, quantile(0.95)(durationNano) as p95,quantile(0.50)(durationNano) as p50, count(*) as numCalls FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND serviceName='%s'", strconv.Itoa(int(queryParams.StepSeconds/60)), r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName)
query := fmt.Sprintf(`
SELECT
toStartOfInterval(timestamp, INTERVAL @interval minute) as time,
quantile(0.99)(durationNano) as p99,
quantile(0.95)(durationNano) as p95,
quantile(0.50)(durationNano) as p50,
count(*) as numCalls
FROM %s.%s
WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end`,
r.traceDB, r.indexTable,
)
args := []interface{}{}
args = append(args, namedArgs...)
args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
@ -761,17 +813,25 @@ func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams *
query += " GROUP BY time ORDER BY time DESC"
err := r.db.Select(ctx, &serviceOverviewItems, query, args...)
zap.S().Info(query)
zap.S().Debug(query)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
zap.S().Error("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
}
serviceErrorItems := []model.ServiceErrorItem{}
query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %s minute) as time, count(*) as numErrors FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND serviceName='%s' AND hasError=true", strconv.Itoa(int(queryParams.StepSeconds/60)), r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName)
query = fmt.Sprintf(`
SELECT
toStartOfInterval(timestamp, INTERVAL @interval minute) as time,
count(*) as numErrors
FROM %s.%s
WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end AND statusCode=2`,
r.traceDB, r.indexTable,
)
args = []interface{}{}
args = append(args, namedArgs...)
args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
@ -779,10 +839,10 @@ func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams *
query += " GROUP BY time ORDER BY time DESC"
err = r.db.Select(ctx, &serviceErrorItems, query, args...)
zap.S().Info(query)
zap.S().Debug(query)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
zap.S().Error("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
}
@ -1523,31 +1583,48 @@ func (r *ClickHouseReader) GetTagValues(ctx context.Context, queryParams *model.
return &cleanedTagValues, nil
}
func (r *ClickHouseReader) GetTopEndpoints(ctx context.Context, queryParams *model.GetTopEndpointsParams) (*[]model.TopEndpointsItem, *model.ApiError) {
func (r *ClickHouseReader) GetTopOperations(ctx context.Context, queryParams *model.GetTopOperationsParams) (*[]model.TopOperationsItem, *model.ApiError) {
var topEndpointsItems []model.TopEndpointsItem
namedArgs := []interface{}{
clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)),
clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)),
clickhouse.Named("serviceName", queryParams.ServiceName),
}
query := fmt.Sprintf("SELECT quantile(0.5)(durationNano) as p50, quantile(0.95)(durationNano) as p95, quantile(0.99)(durationNano) as p99, COUNT(1) as numCalls, name FROM %s.%s WHERE timestamp >= '%s' AND timestamp <= '%s' AND kind='2' and serviceName='%s'", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName)
var topOperationsItems []model.TopOperationsItem
query := fmt.Sprintf(`
SELECT
quantile(0.5)(durationNano) as p50,
quantile(0.95)(durationNano) as p95,
quantile(0.99)(durationNano) as p99,
COUNT(*) as numCalls,
name
FROM %s.%s
WHERE serviceName = @serviceName AND timestamp>= @start AND timestamp<= @end`,
r.traceDB, r.indexTable,
)
args := []interface{}{}
args = append(args, namedArgs...)
args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args)
if errStatus != nil {
return nil, errStatus
}
query += " GROUP BY name"
err := r.db.Select(ctx, &topEndpointsItems, query, args...)
query += " GROUP BY name ORDER BY p99 DESC LIMIT 10"
err := r.db.Select(ctx, &topOperationsItems, query, args...)
zap.S().Info(query)
zap.S().Debug(query)
if err != nil {
zap.S().Debug("Error in processing sql query: ", err)
zap.S().Error("Error in processing sql query: ", err)
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")}
}
if topEndpointsItems == nil {
topEndpointsItems = []model.TopEndpointsItem{}
if topOperationsItems == nil {
topOperationsItems = []model.TopOperationsItem{}
}
return &topEndpointsItems, nil
return &topOperationsItems, nil
}
func (r *ClickHouseReader) GetUsage(ctx context.Context, queryParams *model.GetUsageParams) (*[]model.UsageItem, error) {

View File

@ -322,7 +322,8 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router) {
router.HandleFunc("/api/v1/services", ViewAccess(aH.getServices)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/services/list", aH.getServicesList).Methods(http.MethodGet)
router.HandleFunc("/api/v1/service/overview", ViewAccess(aH.getServiceOverview)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/service/top_endpoints", ViewAccess(aH.getTopEndpoints)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/service/top_operations", ViewAccess(aH.getTopOperations)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/service/top_level_operations", ViewAccess(aH.getServicesTopLevelOps)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/traces/{traceId}", ViewAccess(aH.searchTraces)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/usage", ViewAccess(aH.getUsage)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/serviceMapDependencies", ViewAccess(aH.serviceMapDependencies)).Methods(http.MethodPost)
@ -1103,14 +1104,14 @@ func (aH *APIHandler) submitFeedback(w http.ResponseWriter, r *http.Request) {
}
func (aH *APIHandler) getTopEndpoints(w http.ResponseWriter, r *http.Request) {
func (aH *APIHandler) getTopOperations(w http.ResponseWriter, r *http.Request) {
query, err := parseGetTopEndpointsRequest(r)
query, err := parseGetTopOperationsRequest(r)
if aH.handleError(w, err, http.StatusBadRequest) {
return
}
result, apiErr := (*aH.reader).GetTopEndpoints(r.Context(), query)
result, apiErr := (*aH.reader).GetTopOperations(r.Context(), query)
if apiErr != nil && aH.handleError(w, apiErr.Err, http.StatusInternalServerError) {
return
@ -1152,6 +1153,17 @@ func (aH *APIHandler) getServiceOverview(w http.ResponseWriter, r *http.Request)
}
func (aH *APIHandler) getServicesTopLevelOps(w http.ResponseWriter, r *http.Request) {
result, apiErr := (*aH.reader).GetTopLevelOperations(r.Context())
if apiErr != nil {
respondError(w, apiErr, nil)
return
}
aH.writeJSON(w, r, result)
}
func (aH *APIHandler) getServices(w http.ResponseWriter, r *http.Request) {
query, err := parseGetServicesRequest(r)

View File

@ -32,8 +32,8 @@ func parseUser(r *http.Request) (*model.User, error) {
return &user, nil
}
func parseGetTopEndpointsRequest(r *http.Request) (*model.GetTopEndpointsParams, error) {
var postData *model.GetTopEndpointsParams
func parseGetTopOperationsRequest(r *http.Request) (*model.GetTopOperationsParams, error) {
var postData *model.GetTopOperationsParams
err := json.NewDecoder(r.Body).Decode(&postData)
if err != nil {
@ -467,8 +467,8 @@ func parseCountErrorsRequest(r *http.Request) (*model.CountErrorsParams, error)
}
params := &model.CountErrorsParams{
Start: startTime,
End: endTime,
Start: startTime,
End: endTime,
}
return params, nil

View File

@ -23,4 +23,4 @@ scrape_configs:
remote_read:
- url: tcp://localhost:9001/?database=signoz_metrics
- url: tcp://localhost:9000/?database=signoz_metrics

View File

@ -20,8 +20,9 @@ type Reader interface {
GetInstantQueryMetricsResult(ctx context.Context, query *model.InstantQueryMetricsParams) (*promql.Result, *stats.QueryStats, *model.ApiError)
GetQueryRangeResult(ctx context.Context, query *model.QueryRangeParams) (*promql.Result, *stats.QueryStats, *model.ApiError)
GetServiceOverview(ctx context.Context, query *model.GetServiceOverviewParams) (*[]model.ServiceOverviewItem, *model.ApiError)
GetTopLevelOperations(ctx context.Context) (*map[string][]string, *model.ApiError)
GetServices(ctx context.Context, query *model.GetServicesParams) (*[]model.ServiceItem, *model.ApiError)
GetTopEndpoints(ctx context.Context, query *model.GetTopEndpointsParams) (*[]model.TopEndpointsItem, *model.ApiError)
GetTopOperations(ctx context.Context, query *model.GetTopOperationsParams) (*[]model.TopOperationsItem, *model.ApiError)
GetUsage(ctx context.Context, query *model.GetUsageParams) (*[]model.UsageItem, error)
GetServicesList(ctx context.Context) (*[]string, error)
GetServiceMapDependencies(ctx context.Context, query *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error)

View File

@ -135,7 +135,7 @@ type MetricAutocompleteTagParams struct {
TagKey string
}
type GetTopEndpointsParams struct {
type GetTopOperationsParams struct {
StartTime string `json:"start"`
EndTime string `json:"end"`
ServiceName string `json:"service"`

View File

@ -3,6 +3,7 @@ package model
import (
"encoding/json"
"fmt"
"math"
"strconv"
"time"
@ -217,7 +218,7 @@ type UsageItem struct {
Count uint64 `json:"count" ch:"count"`
}
type TopEndpointsItem struct {
type TopOperationsItem struct {
Percentile50 float64 `json:"p50" ch:"p50"`
Percentile95 float64 `json:"p95" ch:"p95"`
Percentile99 float64 `json:"p99" ch:"p99"`
@ -403,3 +404,30 @@ func (p *MetricPoint) MarshalJSON() ([]byte, error) {
v := strconv.FormatFloat(p.Value, 'f', -1, 64)
return json.Marshal([...]interface{}{float64(p.Timestamp) / 1000, v})
}
// MarshalJSON implements json.Marshaler.
func (s *ServiceItem) MarshalJSON() ([]byte, error) {
// If a service didn't not send any data in the last interval duration
// it's values such as 99th percentile will return as NaN and
// json encoding doesn't support NaN
// We still want to show it in the UI, so we'll replace NaN with 0
type Alias ServiceItem
if math.IsInf(s.AvgDuration, 0) || math.IsNaN(s.AvgDuration) {
s.AvgDuration = 0
}
if math.IsInf(s.CallRate, 0) || math.IsNaN(s.CallRate) {
s.CallRate = 0
}
if math.IsInf(s.ErrorRate, 0) || math.IsNaN(s.ErrorRate) {
s.ErrorRate = 0
}
if math.IsInf(s.Percentile99, 0) || math.IsNaN(s.Percentile99) {
s.Percentile99 = 0
}
return json.Marshal(&struct {
*Alias
}{
Alias: (*Alias)(s),
})
}