diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 2781d2a0c6..6ea316135a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -4,4 +4,4 @@ * @ankitnayan /frontend/ @palashgdev @pranshuchittora /deploy/ @prashant-shahi -/pkg/query-service/ @srikanthccv @makeavish @nityanandagohain +/pkg/query-service/ @srikanthccv diff --git a/deploy/docker-swarm/clickhouse-setup/otel-collector-metrics-config.yaml b/deploy/docker-swarm/clickhouse-setup/otel-collector-metrics-config.yaml index a01f356437..ecaee5977a 100644 --- a/deploy/docker-swarm/clickhouse-setup/otel-collector-metrics-config.yaml +++ b/deploy/docker-swarm/clickhouse-setup/otel-collector-metrics-config.yaml @@ -5,9 +5,11 @@ receivers: # otel-collector internal metrics - job_name: "otel-collector" scrape_interval: 60s - static_configs: - - targets: - - otel-collector:8888 + dns_sd_configs: + - names: + - 'tasks.otel-collector' + type: 'A' + port: 8888 # otel-collector-metrics internal metrics - job_name: "otel-collector-metrics" scrape_interval: 60s @@ -17,9 +19,11 @@ receivers: # SigNoz span metrics - job_name: "signozspanmetrics-collector" scrape_interval: 60s - static_configs: - - targets: - - otel-collector:8889 + dns_sd_configs: + - names: + - 'tasks.otel-collector' + type: 'A' + port: 8889 processors: batch: diff --git a/frontend/public/locales/en-GB/alerts.json b/frontend/public/locales/en-GB/alerts.json index e67bd35273..cae309fd45 100644 --- a/frontend/public/locales/en-GB/alerts.json +++ b/frontend/public/locales/en-GB/alerts.json @@ -1,4 +1,11 @@ { + "target_missing": "Please enter a threshold to proceed", + "rule_test_fired": "Test notification sent successfully", + "no_alerts_found": "No alerts found during the evaluation. This happens when rule condition is unsatisfied. You may adjust the rule threshold and retry.", + "button_testrule": "Test Notification", + "label_channel_select": "Notification Channels", + "placeholder_channel_select": "select one or more channels", + "channel_select_tooltip": "Leave empty to send this alert on all the configured channels", "preview_chart_unexpected_error": "An unexpeced error occurred updating the chart, please check your query.", "preview_chart_threshold_label": "Threshold", "placeholder_label_key_pair": "Click here to enter a label (key value pairs)", diff --git a/frontend/public/locales/en-GB/channels.json b/frontend/public/locales/en-GB/channels.json index 5e670cc536..027501f69d 100644 --- a/frontend/public/locales/en-GB/channels.json +++ b/frontend/public/locales/en-GB/channels.json @@ -1,4 +1,14 @@ { + "channel_delete_unexp_error": "Something went wrong", + "channel_delete_success": "Channel Deleted Successfully", + "column_channel_name": "Name", + "column_channel_type": "Type", + "column_channel_action": "Action", + "column_channel_edit": "Edit", + "button_new_channel": "New Alert Channel", + "tooltip_notification_channels": "More details on how to setting notification channels", + "sending_channels_note": "The alerts will be sent to all the configured channels.", + "loading_channels_message": "Loading Channels..", "page_title_create": "New Notification Channels", "page_title_edit": "Edit Notification Channels", "button_save_channel": "Save", diff --git a/frontend/public/locales/en/alerts.json b/frontend/public/locales/en/alerts.json index e67bd35273..cae309fd45 100644 --- a/frontend/public/locales/en/alerts.json +++ b/frontend/public/locales/en/alerts.json @@ -1,4 +1,11 @@ { + "target_missing": "Please enter a threshold to proceed", + "rule_test_fired": "Test notification sent successfully", + "no_alerts_found": "No alerts found during the evaluation. This happens when rule condition is unsatisfied. You may adjust the rule threshold and retry.", + "button_testrule": "Test Notification", + "label_channel_select": "Notification Channels", + "placeholder_channel_select": "select one or more channels", + "channel_select_tooltip": "Leave empty to send this alert on all the configured channels", "preview_chart_unexpected_error": "An unexpeced error occurred updating the chart, please check your query.", "preview_chart_threshold_label": "Threshold", "placeholder_label_key_pair": "Click here to enter a label (key value pairs)", diff --git a/frontend/public/locales/en/channels.json b/frontend/public/locales/en/channels.json index 5e670cc536..027501f69d 100644 --- a/frontend/public/locales/en/channels.json +++ b/frontend/public/locales/en/channels.json @@ -1,4 +1,14 @@ { + "channel_delete_unexp_error": "Something went wrong", + "channel_delete_success": "Channel Deleted Successfully", + "column_channel_name": "Name", + "column_channel_type": "Type", + "column_channel_action": "Action", + "column_channel_edit": "Edit", + "button_new_channel": "New Alert Channel", + "tooltip_notification_channels": "More details on how to setting notification channels", + "sending_channels_note": "The alerts will be sent to all the configured channels.", + "loading_channels_message": "Loading Channels..", "page_title_create": "New Notification Channels", "page_title_edit": "Edit Notification Channels", "button_save_channel": "Save", diff --git a/frontend/src/api/alerts/patch.ts b/frontend/src/api/alerts/patch.ts new file mode 100644 index 0000000000..920b53ae9f --- /dev/null +++ b/frontend/src/api/alerts/patch.ts @@ -0,0 +1,26 @@ +import axios from 'api'; +import { ErrorResponseHandler } from 'api/ErrorResponseHandler'; +import { AxiosError } from 'axios'; +import { ErrorResponse, SuccessResponse } from 'types/api'; +import { PayloadProps, Props } from 'types/api/alerts/patch'; + +const patch = async ( + props: Props, +): Promise | ErrorResponse> => { + try { + const response = await axios.patch(`/rules/${props.id}`, { + ...props.data, + }); + + return { + statusCode: 200, + error: null, + message: response.data.status, + payload: response.data.data, + }; + } catch (error) { + return ErrorResponseHandler(error as AxiosError); + } +}; + +export default patch; diff --git a/frontend/src/api/alerts/testAlert.ts b/frontend/src/api/alerts/testAlert.ts new file mode 100644 index 0000000000..a30e977a10 --- /dev/null +++ b/frontend/src/api/alerts/testAlert.ts @@ -0,0 +1,26 @@ +import axios from 'api'; +import { ErrorResponseHandler } from 'api/ErrorResponseHandler'; +import { AxiosError } from 'axios'; +import { ErrorResponse, SuccessResponse } from 'types/api'; +import { PayloadProps, Props } from 'types/api/alerts/testAlert'; + +const testAlert = async ( + props: Props, +): Promise | ErrorResponse> => { + try { + const response = await axios.post('/testRule', { + ...props.data, + }); + + return { + statusCode: 200, + error: null, + message: response.data.status, + payload: response.data.data, + }; + } catch (error) { + return ErrorResponseHandler(error as AxiosError); + } +}; + +export default testAlert; diff --git a/frontend/src/api/metrics/getTopLevelOperations.ts b/frontend/src/api/metrics/getTopLevelOperations.ts new file mode 100644 index 0000000000..5ecfd2a67a --- /dev/null +++ b/frontend/src/api/metrics/getTopLevelOperations.ts @@ -0,0 +1,24 @@ +import axios from 'api'; +import { ErrorResponseHandler } from 'api/ErrorResponseHandler'; +import { AxiosError } from 'axios'; +import { ErrorResponse, SuccessResponse } from 'types/api'; +import { PayloadProps, Props } from 'types/api/metrics/getTopLevelOperations'; + +const getTopLevelOperations = async ( + props: Props, +): Promise | ErrorResponse> => { + try { + const response = await axios.post(`/service/top_level_operations`); + + return { + statusCode: 200, + error: null, + message: response.data.status, + payload: response.data[props.service], + }; + } catch (error) { + return ErrorResponseHandler(error as AxiosError); + } +}; + +export default getTopLevelOperations; diff --git a/frontend/src/api/metrics/getTopEndPoints.ts b/frontend/src/api/metrics/getTopOperations.ts similarity index 73% rename from frontend/src/api/metrics/getTopEndPoints.ts rename to frontend/src/api/metrics/getTopOperations.ts index db78aae9e3..cf07f0ee5d 100644 --- a/frontend/src/api/metrics/getTopEndPoints.ts +++ b/frontend/src/api/metrics/getTopOperations.ts @@ -2,13 +2,13 @@ import axios from 'api'; import { ErrorResponseHandler } from 'api/ErrorResponseHandler'; import { AxiosError } from 'axios'; import { ErrorResponse, SuccessResponse } from 'types/api'; -import { PayloadProps, Props } from 'types/api/metrics/getTopEndPoints'; +import { PayloadProps, Props } from 'types/api/metrics/getTopOperations'; -const getTopEndPoints = async ( +const getTopOperations = async ( props: Props, ): Promise | ErrorResponse> => { try { - const response = await axios.post(`/service/top_endpoints`, { + const response = await axios.post(`/service/top_operations`, { start: `${props.start}`, end: `${props.end}`, service: props.service, @@ -26,4 +26,4 @@ const getTopEndPoints = async ( } }; -export default getTopEndPoints; +export default getTopOperations; diff --git a/frontend/src/container/AllAlertChannels/AlertChannels.tsx b/frontend/src/container/AllAlertChannels/AlertChannels.tsx index 974530c6e5..762304a871 100644 --- a/frontend/src/container/AllAlertChannels/AlertChannels.tsx +++ b/frontend/src/container/AllAlertChannels/AlertChannels.tsx @@ -5,6 +5,7 @@ import ROUTES from 'constants/routes'; import useComponentPermission from 'hooks/useComponentPermission'; import history from 'lib/history'; import React, { useCallback, useState } from 'react'; +import { useTranslation } from 'react-i18next'; import { useSelector } from 'react-redux'; import { generatePath } from 'react-router-dom'; import { AppState } from 'store/reducers'; @@ -14,6 +15,7 @@ import AppReducer from 'types/reducer/app'; import Delete from './Delete'; function AlertChannels({ allChannels }: AlertChannelsProps): JSX.Element { + const { t } = useTranslation(['channels']); const [notifications, Element] = notification.useNotification(); const [channels, setChannels] = useState(allChannels); const { role } = useSelector((state) => state.app); @@ -29,12 +31,12 @@ function AlertChannels({ allChannels }: AlertChannelsProps): JSX.Element { const columns: ColumnsType = [ { - title: 'Name', + title: t('column_channel_name'), dataIndex: 'name', key: 'name', }, { - title: 'Type', + title: t('column_channel_type'), dataIndex: 'type', key: 'type', }, @@ -42,14 +44,14 @@ function AlertChannels({ allChannels }: AlertChannelsProps): JSX.Element { if (action) { columns.push({ - title: 'Action', + title: t('column_channel_action'), dataIndex: 'id', key: 'action', align: 'center', render: (id: string): JSX.Element => ( <> diff --git a/frontend/src/container/AllAlertChannels/Delete.tsx b/frontend/src/container/AllAlertChannels/Delete.tsx index 85116fd922..75555e199c 100644 --- a/frontend/src/container/AllAlertChannels/Delete.tsx +++ b/frontend/src/container/AllAlertChannels/Delete.tsx @@ -1,29 +1,31 @@ import { Button } from 'antd'; import { NotificationInstance } from 'antd/lib/notification'; -import deleteAlert from 'api/channels/delete'; +import deleteChannel from 'api/channels/delete'; import React, { useState } from 'react'; +import { useTranslation } from 'react-i18next'; import { Channels } from 'types/api/channels/getAll'; function Delete({ notifications, setChannels, id }: DeleteProps): JSX.Element { + const { t } = useTranslation(['channels']); const [loading, setLoading] = useState(false); const onClickHandler = async (): Promise => { try { setLoading(true); - const response = await deleteAlert({ + const response = await deleteChannel({ id, }); if (response.statusCode === 200) { notifications.success({ message: 'Success', - description: 'Channel Deleted Successfully', + description: t('channel_delete_success'), }); setChannels((preChannels) => preChannels.filter((e) => e.id !== id)); } else { notifications.error({ message: 'Error', - description: response.error || 'Something went wrong', + description: response.error || t('channel_delete_unexp_error'), }); } setLoading(false); @@ -31,7 +33,9 @@ function Delete({ notifications, setChannels, id }: DeleteProps): JSX.Element { notifications.error({ message: 'Error', description: - error instanceof Error ? error.toString() : 'Something went wrong', + error instanceof Error + ? error.toString() + : t('channel_delete_unexp_error'), }); setLoading(false); } diff --git a/frontend/src/container/AllAlertChannels/index.tsx b/frontend/src/container/AllAlertChannels/index.tsx index 44ab948f0b..99636806ea 100644 --- a/frontend/src/container/AllAlertChannels/index.tsx +++ b/frontend/src/container/AllAlertChannels/index.tsx @@ -8,16 +8,18 @@ import useComponentPermission from 'hooks/useComponentPermission'; import useFetch from 'hooks/useFetch'; import history from 'lib/history'; import React, { useCallback } from 'react'; +import { useTranslation } from 'react-i18next'; import { useSelector } from 'react-redux'; import { AppState } from 'store/reducers'; import AppReducer from 'types/reducer/app'; import AlertChannelsComponent from './AlertChannels'; -import { Button, ButtonContainer } from './styles'; +import { Button, ButtonContainer, RightActionContainer } from './styles'; const { Paragraph } = Typography; function AlertChannels(): JSX.Element { + const { t } = useTranslation(['channels']); const { role } = useSelector((state) => state.app); const [addNewChannelPermission] = useComponentPermission( ['add_new_channel'], @@ -34,28 +36,28 @@ function AlertChannels(): JSX.Element { } if (loading || payload === undefined) { - return ; + return ; } return ( <> - The latest added channel is used as the default channel for sending alerts + {t('sending_channels_note')} -
+ {addNewChannelPermission && ( )} -
+
diff --git a/frontend/src/container/AllAlertChannels/styles.ts b/frontend/src/container/AllAlertChannels/styles.ts index b2d03a4cea..209860b867 100644 --- a/frontend/src/container/AllAlertChannels/styles.ts +++ b/frontend/src/container/AllAlertChannels/styles.ts @@ -1,6 +1,13 @@ import { Button as ButtonComponent } from 'antd'; import styled from 'styled-components'; +export const RightActionContainer = styled.div` + &&& { + display: flex; + align-items: center; + } +`; + export const ButtonContainer = styled.div` &&& { display: flex; diff --git a/frontend/src/container/FormAlertRules/BasicInfo.tsx b/frontend/src/container/FormAlertRules/BasicInfo.tsx index c977c82a4e..6bfbfffd03 100644 --- a/frontend/src/container/FormAlertRules/BasicInfo.tsx +++ b/frontend/src/container/FormAlertRules/BasicInfo.tsx @@ -4,9 +4,12 @@ import React from 'react'; import { useTranslation } from 'react-i18next'; import { AlertDef, Labels } from 'types/api/alerts/def'; +import ChannelSelect from './ChannelSelect'; import LabelSelect from './labels'; import { + ChannelSelectTip, FormContainer, + FormItemMedium, InputSmall, SeveritySelect, StepHeading, @@ -80,7 +83,7 @@ function BasicInfo({ alertDef, setAlertDef }: BasicInfoProps): JSX.Element { }} /> - + { setAlertDef({ @@ -92,7 +95,19 @@ function BasicInfo({ alertDef, setAlertDef }: BasicInfoProps): JSX.Element { }} initialValues={alertDef.labels} /> - + + + { + setAlertDef({ + ...alertDef, + preferredChannels: s, + }); + }} + /> + {t('channel_select_tooltip')} + ); diff --git a/frontend/src/container/FormAlertRules/ChannelSelect/index.tsx b/frontend/src/container/FormAlertRules/ChannelSelect/index.tsx new file mode 100644 index 0000000000..99c3038a42 --- /dev/null +++ b/frontend/src/container/FormAlertRules/ChannelSelect/index.tsx @@ -0,0 +1,70 @@ +import { notification, Select } from 'antd'; +import getChannels from 'api/channels/getAll'; +import useFetch from 'hooks/useFetch'; +import React from 'react'; +import { useTranslation } from 'react-i18next'; + +import { StyledSelect } from './styles'; + +export interface ChannelSelectProps { + currentValue?: string[]; + onSelectChannels: (s: string[]) => void; +} + +function ChannelSelect({ + currentValue, + onSelectChannels, +}: ChannelSelectProps): JSX.Element | null { + // init namespace for translations + const { t } = useTranslation('alerts'); + + const { loading, payload, error, errorMessage } = useFetch(getChannels); + + const handleChange = (value: string[]): void => { + onSelectChannels(value); + }; + + if (error && errorMessage !== '') { + notification.error({ + message: 'Error', + description: errorMessage, + }); + } + const renderOptions = (): React.ReactNode[] => { + const children: React.ReactNode[] = []; + + if (loading || payload === undefined || payload.length === 0) { + return children; + } + + payload.forEach((o) => { + children.push( + + {o.name} + , + ); + }); + + return children; + }; + return ( + { + handleChange(value as string[]); + }} + optionLabelProp="label" + > + {renderOptions()} + + ); +} + +ChannelSelect.defaultProps = { + currentValue: [], +}; +export default ChannelSelect; diff --git a/frontend/src/container/FormAlertRules/ChannelSelect/styles.ts b/frontend/src/container/FormAlertRules/ChannelSelect/styles.ts new file mode 100644 index 0000000000..7a59e38767 --- /dev/null +++ b/frontend/src/container/FormAlertRules/ChannelSelect/styles.ts @@ -0,0 +1,6 @@ +import { Select } from 'antd'; +import styled from 'styled-components'; + +export const StyledSelect = styled(Select)` + border-radius: 4px; +`; diff --git a/frontend/src/container/FormAlertRules/ChartPreview/index.tsx b/frontend/src/container/FormAlertRules/ChartPreview/index.tsx index 88364fad1b..6243c1d4d4 100644 --- a/frontend/src/container/FormAlertRules/ChartPreview/index.tsx +++ b/frontend/src/container/FormAlertRules/ChartPreview/index.tsx @@ -21,7 +21,7 @@ export interface ChartPreviewProps { selectedTime?: timePreferenceType; selectedInterval?: Time; headline?: JSX.Element; - threshold?: number; + threshold?: number | undefined; } function ChartPreview({ @@ -35,7 +35,7 @@ function ChartPreview({ }: ChartPreviewProps): JSX.Element | null { const { t } = useTranslation('alerts'); const staticLine: StaticLineProps | undefined = - threshold && threshold > 0 + threshold !== undefined ? { yMin: threshold, yMax: threshold, @@ -117,7 +117,7 @@ ChartPreview.defaultProps = { selectedTime: 'GLOBAL_TIME', selectedInterval: '5min', headline: undefined, - threshold: 0, + threshold: undefined, }; export default ChartPreview; diff --git a/frontend/src/container/FormAlertRules/RuleOptions.tsx b/frontend/src/container/FormAlertRules/RuleOptions.tsx index d9aff8bfb1..8794f87b2c 100644 --- a/frontend/src/container/FormAlertRules/RuleOptions.tsx +++ b/frontend/src/container/FormAlertRules/RuleOptions.tsx @@ -156,7 +156,7 @@ function RuleOptions({ ...alertDef, condition: { ...alertDef.condition, - target: (value as number) || undefined, + target: value as number, }, }); }} diff --git a/frontend/src/container/FormAlertRules/index.tsx b/frontend/src/container/FormAlertRules/index.tsx index 8643eb4060..022e913f8e 100644 --- a/frontend/src/container/FormAlertRules/index.tsx +++ b/frontend/src/container/FormAlertRules/index.tsx @@ -1,6 +1,7 @@ import { ExclamationCircleOutlined, SaveOutlined } from '@ant-design/icons'; import { FormInstance, Modal, notification, Typography } from 'antd'; import saveAlertApi from 'api/alerts/save'; +import testAlertApi from 'api/alerts/testAlert'; import ROUTES from 'constants/routes'; import QueryTypeTag from 'container/NewWidget/LeftContainer/QueryTypeTag'; import PlotTag from 'container/NewWidget/LeftContainer/WidgetGraph/PlotTag'; @@ -143,10 +144,74 @@ function FormAlertRules({ }); } }; + const validatePromParams = useCallback((): boolean => { + let retval = true; + if (queryCategory !== EQueryType.PROM) return retval; + + if (!promQueries || Object.keys(promQueries).length === 0) { + notification.error({ + message: 'Error', + description: t('promql_required'), + }); + return false; + } + + Object.keys(promQueries).forEach((key) => { + if (promQueries[key].query === '') { + notification.error({ + message: 'Error', + description: t('promql_required'), + }); + retval = false; + } + }); + + return retval; + }, [t, promQueries, queryCategory]); + + const validateQBParams = useCallback((): boolean => { + let retval = true; + if (queryCategory !== EQueryType.QUERY_BUILDER) return true; + + if (!metricQueries || Object.keys(metricQueries).length === 0) { + notification.error({ + message: 'Error', + description: t('condition_required'), + }); + return false; + } + + if (!alertDef.condition?.target) { + notification.error({ + message: 'Error', + description: t('target_missing'), + }); + return false; + } + + Object.keys(metricQueries).forEach((key) => { + if (metricQueries[key].metricName === '') { + notification.error({ + message: 'Error', + description: t('metricname_missing', { where: metricQueries[key].name }), + }); + retval = false; + } + }); + + Object.keys(formulaQueries).forEach((key) => { + if (formulaQueries[key].expression === '') { + notification.error({ + message: 'Error', + description: t('expression_missing', formulaQueries[key].name), + }); + retval = false; + } + }); + return retval; + }, [t, alertDef, queryCategory, metricQueries, formulaQueries]); const isFormValid = useCallback((): boolean => { - let retval = true; - if (!alertDef.alert || alertDef.alert === '') { notification.error({ message: 'Error', @@ -155,57 +220,14 @@ function FormAlertRules({ return false; } - if ( - queryCategory === EQueryType.PROM && - (!promQueries || Object.keys(promQueries).length === 0) - ) { - notification.error({ - message: 'Error', - description: t('promql_required'), - }); + if (!validatePromParams()) { return false; } - if ( - (queryCategory === EQueryType.QUERY_BUILDER && !metricQueries) || - Object.keys(metricQueries).length === 0 - ) { - notification.error({ - message: 'Error', - description: t('condition_required'), - }); - return false; - } - - if (queryCategory === EQueryType.QUERY_BUILDER) { - Object.keys(metricQueries).forEach((key) => { - if (metricQueries[key].metricName === '') { - retval = false; - notification.error({ - message: 'Error', - description: t('metricname_missing', { where: metricQueries[key].name }), - }); - } - }); - Object.keys(formulaQueries).forEach((key) => { - if (formulaQueries[key].expression === '') { - retval = false; - notification.error({ - message: 'Error', - description: t('expression_missing', formulaQueries[key].name), - }); - } - }); - } - - return retval; - }, [t, alertDef, queryCategory, metricQueries, formulaQueries, promQueries]); - - const saveRule = useCallback(async () => { - if (!isFormValid()) { - return; - } + return validateQBParams(); + }, [t, validateQBParams, alertDef, validatePromParams]); + const preparePostData = (): AlertDef => { const postableAlert: AlertDef = { ...alertDef, source: window?.location.toString(), @@ -220,6 +242,22 @@ function FormAlertRules({ }, }, }; + return postableAlert; + }; + + const memoizedPreparePostData = useCallback(preparePostData, [ + queryCategory, + alertDef, + metricQueries, + formulaQueries, + promQueries, + ]); + + const saveRule = useCallback(async () => { + if (!isFormValid()) { + return; + } + const postableAlert = memoizedPreparePostData(); setLoading(true); try { @@ -250,24 +288,13 @@ function FormAlertRules({ }); } } catch (e) { - console.log('save alert api failed:', e); notification.error({ message: 'Error', description: t('unexpected_error'), }); } setLoading(false); - }, [ - t, - isFormValid, - queryCategory, - ruleId, - alertDef, - metricQueries, - formulaQueries, - promQueries, - ruleCache, - ]); + }, [t, isFormValid, ruleId, ruleCache, memoizedPreparePostData]); const onSaveHandler = useCallback(async () => { const content = ( @@ -288,6 +315,44 @@ function FormAlertRules({ }); }, [t, saveRule, queryCategory]); + const onTestRuleHandler = useCallback(async () => { + if (!isFormValid()) { + return; + } + const postableAlert = memoizedPreparePostData(); + + setLoading(true); + try { + const response = await testAlertApi({ data: postableAlert }); + + if (response.statusCode === 200) { + const { payload } = response; + if (payload?.alertCount === 0) { + notification.error({ + message: 'Error', + description: t('no_alerts_found'), + }); + } else { + notification.success({ + message: 'Success', + description: t('rule_test_fired'), + }); + } + } else { + notification.error({ + message: 'Error', + description: response.error || t('unexpected_error'), + }); + } + } catch (e) { + notification.error({ + message: 'Error', + description: t('unexpected_error'), + }); + } + setLoading(false); + }, [t, isFormValid, memoizedPreparePostData]); + const renderBasicInfo = (): JSX.Element => ( ); @@ -354,6 +419,14 @@ function FormAlertRules({ > {ruleId > 0 ? t('button_savechanges') : t('button_createrule')} + + {' '} + {t('button_testrule')} + ` - width: 70%; - border-radisu: 4px; + border-radius: 4px; background: ${({ isDarkMode }): string => (isDarkMode ? '#000' : '#fff')}; flex: 1; display: flex; diff --git a/frontend/src/container/FormAlertRules/styles.ts b/frontend/src/container/FormAlertRules/styles.ts index 4ec8dcafbd..c1a7ad2aa8 100644 --- a/frontend/src/container/FormAlertRules/styles.ts +++ b/frontend/src/container/FormAlertRules/styles.ts @@ -1,4 +1,15 @@ -import { Button, Card, Col, Form, Input, InputNumber, Row, Select } from 'antd'; +import { + Button, + Card, + Col, + Form, + Input, + InputNumber, + Row, + Select, + Typography, +} from 'antd'; +import FormItem from 'antd/lib/form/FormItem'; import TextArea from 'antd/lib/input/TextArea'; import styled from 'styled-components'; @@ -67,7 +78,7 @@ export const InlineSelect = styled(Select)` `; export const SeveritySelect = styled(Select)` - width: 15% !important; + width: 25% !important; `; export const InputSmall = styled(Input)` @@ -99,3 +110,11 @@ export const ThresholdInput = styled(InputNumber)` export const TextareaMedium = styled(TextArea)` width: 70%; `; + +export const FormItemMedium = styled(FormItem)` + width: 70%; +`; + +export const ChannelSelectTip = styled(Typography.Text)` + color: hsla(0, 0%, 100%, 0.3); +`; diff --git a/frontend/src/container/ListAlertRules/DeleteAlert.tsx b/frontend/src/container/ListAlertRules/DeleteAlert.tsx index f479de38ab..ac91bfe2f2 100644 --- a/frontend/src/container/ListAlertRules/DeleteAlert.tsx +++ b/frontend/src/container/ListAlertRules/DeleteAlert.tsx @@ -1,10 +1,11 @@ -import { Button } from 'antd'; import { NotificationInstance } from 'antd/lib/notification/index'; import deleteAlerts from 'api/alerts/delete'; import { State } from 'hooks/useFetch'; import React, { useState } from 'react'; import { PayloadProps as DeleteAlertPayloadProps } from 'types/api/alerts/delete'; -import { Alerts } from 'types/api/alerts/getAll'; +import { GettableAlert } from 'types/api/alerts/get'; + +import { ColumnButton } from './styles'; function DeleteAlert({ id, @@ -72,20 +73,20 @@ function DeleteAlert({ }; return ( - + ); } interface DeleteAlertProps { - id: Alerts['id']; - setData: React.Dispatch>; + id: GettableAlert['id']; + setData: React.Dispatch>; notifications: NotificationInstance; } diff --git a/frontend/src/container/ListAlertRules/ListAlert.tsx b/frontend/src/container/ListAlertRules/ListAlert.tsx index 4df6290725..1981e8bfd8 100644 --- a/frontend/src/container/ListAlertRules/ListAlert.tsx +++ b/frontend/src/container/ListAlertRules/ListAlert.tsx @@ -1,6 +1,6 @@ /* eslint-disable react/display-name */ import { PlusOutlined } from '@ant-design/icons'; -import { notification, Tag, Typography } from 'antd'; +import { notification, Typography } from 'antd'; import Table, { ColumnsType } from 'antd/lib/table'; import TextToolTip from 'components/TextToolTip'; import ROUTES from 'constants/routes'; @@ -13,15 +13,16 @@ import { UseQueryResult } from 'react-query'; import { useSelector } from 'react-redux'; import { AppState } from 'store/reducers'; import { ErrorResponse, SuccessResponse } from 'types/api'; -import { Alerts } from 'types/api/alerts/getAll'; +import { GettableAlert } from 'types/api/alerts/get'; import AppReducer from 'types/reducer/app'; import DeleteAlert from './DeleteAlert'; -import { Button, ButtonContainer } from './styles'; +import { Button, ButtonContainer, ColumnButton, StyledTag } from './styles'; import Status from './TableComponents/Status'; +import ToggleAlertState from './ToggleAlertState'; function ListAlert({ allAlertRules, refetch }: ListAlertProps): JSX.Element { - const [data, setData] = useState(allAlertRules || []); + const [data, setData] = useState(allAlertRules || []); const { t } = useTranslation('common'); const { role } = useSelector((state) => state.app); const [addNewAlert, action] = useComponentPermission( @@ -53,22 +54,27 @@ function ListAlert({ allAlertRules, refetch }: ListAlertProps): JSX.Element { history.push(`${ROUTES.EDIT_ALERTS}?ruleId=${id}`); }; - const columns: ColumnsType = [ + const columns: ColumnsType = [ { title: 'Status', dataIndex: 'state', key: 'state', sorter: (a, b): number => - b.labels.severity.length - a.labels.severity.length, + (b.state ? b.state.charCodeAt(0) : 1000) - + (a.state ? a.state.charCodeAt(0) : 1000), render: (value): JSX.Element => , }, { title: 'Alert Name', dataIndex: 'alert', key: 'name', - sorter: (a, b): number => a.name.charCodeAt(0) - b.name.charCodeAt(0), + sorter: (a, b): number => + (a.alert ? a.alert.charCodeAt(0) : 1000) - + (b.alert ? b.alert.charCodeAt(0) : 1000), render: (value, record): JSX.Element => ( - onEditHandler(record.id.toString())}> + onEditHandler(record.id ? record.id.toString() : '')} + > {value} ), @@ -78,7 +84,8 @@ function ListAlert({ allAlertRules, refetch }: ListAlertProps): JSX.Element { dataIndex: 'labels', key: 'severity', sorter: (a, b): number => - a.labels.severity.length - b.labels.severity.length, + (a.labels ? a.labels.severity.length : 0) - + (b.labels ? b.labels.severity.length : 0), render: (value): JSX.Element => { const objectKeys = Object.keys(value); const withSeverityKey = objectKeys.find((e) => e === 'severity') || ''; @@ -92,6 +99,7 @@ function ListAlert({ allAlertRules, refetch }: ListAlertProps): JSX.Element { dataIndex: 'labels', key: 'tags', align: 'center', + width: 350, render: (value): JSX.Element => { const objectKeys = Object.keys(value); const withOutSeverityKeys = objectKeys.filter((e) => e !== 'severity'); @@ -104,9 +112,9 @@ function ListAlert({ allAlertRules, refetch }: ListAlertProps): JSX.Element { <> {withOutSeverityKeys.map((e) => { return ( - + {e}: {value[e]} - + ); })} @@ -120,14 +128,19 @@ function ListAlert({ allAlertRules, refetch }: ListAlertProps): JSX.Element { title: 'Action', dataIndex: 'id', key: 'action', - render: (id: Alerts['id']): JSX.Element => { + render: (id: GettableAlert['id'], record): JSX.Element => { return ( <> - + - + + + ); }, @@ -159,8 +172,10 @@ function ListAlert({ allAlertRules, refetch }: ListAlertProps): JSX.Element { } interface ListAlertProps { - allAlertRules: Alerts[]; - refetch: UseQueryResult>['refetch']; + allAlertRules: GettableAlert[]; + refetch: UseQueryResult< + ErrorResponse | SuccessResponse + >['refetch']; } export default ListAlert; diff --git a/frontend/src/container/ListAlertRules/TableComponents/Status.tsx b/frontend/src/container/ListAlertRules/TableComponents/Status.tsx index 33de5fb1db..d935b8d5ba 100644 --- a/frontend/src/container/ListAlertRules/TableComponents/Status.tsx +++ b/frontend/src/container/ListAlertRules/TableComponents/Status.tsx @@ -1,6 +1,6 @@ import { Tag } from 'antd'; import React from 'react'; -import { Alerts } from 'types/api/alerts/getAll'; +import { GettableAlert } from 'types/api/alerts/get'; function Status({ status }: StatusProps): JSX.Element { switch (status) { @@ -16,14 +16,18 @@ function Status({ status }: StatusProps): JSX.Element { return Firing; } + case 'disabled': { + return Disabled; + } + default: { - return Unknown Status; + return Unknown; } } } interface StatusProps { - status: Alerts['state']; + status: GettableAlert['state']; } export default Status; diff --git a/frontend/src/container/ListAlertRules/ToggleAlertState.tsx b/frontend/src/container/ListAlertRules/ToggleAlertState.tsx new file mode 100644 index 0000000000..9b367ea891 --- /dev/null +++ b/frontend/src/container/ListAlertRules/ToggleAlertState.tsx @@ -0,0 +1,108 @@ +import { notification } from 'antd'; +import patchAlert from 'api/alerts/patch'; +import { State } from 'hooks/useFetch'; +import React, { useState } from 'react'; +import { GettableAlert } from 'types/api/alerts/get'; +import { PayloadProps as PatchPayloadProps } from 'types/api/alerts/patch'; + +import { ColumnButton } from './styles'; + +function ToggleAlertState({ + id, + disabled, + setData, +}: ToggleAlertStateProps): JSX.Element { + const [apiStatus, setAPIStatus] = useState>({ + error: false, + errorMessage: '', + loading: false, + success: false, + payload: undefined, + }); + + const defaultErrorMessage = 'Something went wrong'; + + const onToggleHandler = async ( + id: number, + disabled: boolean, + ): Promise => { + try { + setAPIStatus((state) => ({ + ...state, + loading: true, + })); + + const response = await patchAlert({ + id, + data: { + disabled, + }, + }); + + if (response.statusCode === 200) { + setData((state) => { + return state.map((alert) => { + if (alert.id === id) { + return { + ...alert, + disabled: response.payload.disabled, + state: response.payload.state, + }; + } + return alert; + }); + }); + + setAPIStatus((state) => ({ + ...state, + loading: false, + payload: response.payload, + })); + notification.success({ + message: 'Success', + }); + } else { + setAPIStatus((state) => ({ + ...state, + loading: false, + error: true, + errorMessage: response.error || defaultErrorMessage, + })); + + notification.error({ + message: response.error || defaultErrorMessage, + }); + } + } catch (error) { + setAPIStatus((state) => ({ + ...state, + loading: false, + error: true, + errorMessage: defaultErrorMessage, + })); + + notification.error({ + message: defaultErrorMessage, + }); + } + }; + + return ( + => onToggleHandler(id, !disabled)} + type="link" + > + {disabled ? 'Enable' : 'Disable'} + + ); +} + +interface ToggleAlertStateProps { + id: GettableAlert['id']; + disabled: boolean; + setData: React.Dispatch>; +} + +export default ToggleAlertState; diff --git a/frontend/src/container/ListAlertRules/styles.ts b/frontend/src/container/ListAlertRules/styles.ts index fa993568fb..67748b21c0 100644 --- a/frontend/src/container/ListAlertRules/styles.ts +++ b/frontend/src/container/ListAlertRules/styles.ts @@ -1,4 +1,4 @@ -import { Button as ButtonComponent } from 'antd'; +import { Button as ButtonComponent, Tag } from 'antd'; import styled from 'styled-components'; export const ButtonContainer = styled.div` @@ -12,6 +12,20 @@ export const ButtonContainer = styled.div` export const Button = styled(ButtonComponent)` &&& { - margin-left: 1rem; + margin-left: 1em; + } +`; + +export const ColumnButton = styled(ButtonComponent)` + &&& { + padding-left: 0; + padding-right: 0; + margin-right: 1.5em; + } +`; + +export const StyledTag = styled(Tag)` + &&& { + white-space: normal; } `; diff --git a/frontend/src/container/MetricsApplication/Tabs/Overview.tsx b/frontend/src/container/MetricsApplication/Tabs/Overview.tsx index 803ed91bcc..2dbf2d33fd 100644 --- a/frontend/src/container/MetricsApplication/Tabs/Overview.tsx +++ b/frontend/src/container/MetricsApplication/Tabs/Overview.tsx @@ -15,7 +15,7 @@ import { PromQLWidgets } from 'types/api/dashboard/getAll'; import MetricReducer from 'types/reducer/metrics'; import { Card, Col, GraphContainer, GraphTitle, Row } from '../styles'; -import TopEndpointsTable from '../TopEndpointsTable'; +import TopOperationsTable from '../TopOperationsTable'; import { Button } from './styles'; function Application({ getWidget }: DashboardProps): JSX.Element { @@ -23,11 +23,13 @@ function Application({ getWidget }: DashboardProps): JSX.Element { const selectedTimeStamp = useRef(0); const { - topEndPoints, + topOperations, serviceOverview, resourceAttributePromQLQuery, resourceAttributeQueries, + topLevelOperations, } = useSelector((state) => state.metrics); + const operationsRegex = topLevelOperations.join('|'); const selectedTraceTags: string = JSON.stringify( convertRawQueriesToTraceSelectedTags(resourceAttributeQueries, 'array') || [], @@ -107,7 +109,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element { - Application latency + Latency { - onClickHandler(ChartEvent, activeElements, chart, data, 'Application'); + onClickHandler(ChartEvent, activeElements, chart, data, 'Service'); }} - name="application_latency" + name="service_latency" type="line" data={{ datasets: [ @@ -175,7 +177,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element { - Requests + Rate (ops/s) { - onClickHandler(event, element, chart, data, 'Request'); + onClickHandler(event, element, chart, data, 'Rate'); }} widget={getWidget([ { - query: `sum(rate(signoz_latency_count{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[5m]))`, - legend: 'Requests', + query: `sum(rate(signoz_latency_count{service_name="${servicename}", operation=~"${operationsRegex}"${resourceAttributePromQLQuery}}[5m]))`, + legend: 'Operations', }, ])} - yAxisUnit="reqps" + yAxisUnit="ops" /> @@ -227,7 +229,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element { }} widget={getWidget([ { - query: `max(sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[5m]) OR rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", http_status_code=~"5.."${resourceAttributePromQLQuery}}[5m]))*100/sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[5m]))) < 1000 OR vector(0)`, + query: `max(sum(rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[5m]) OR rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}", http_status_code=~"5.."${resourceAttributePromQLQuery}}[5m]))*100/sum(rate(signoz_calls_total{service_name="${servicename}", operation=~"${operationsRegex}"${resourceAttributePromQLQuery}}[5m]))) < 1000 OR vector(0)`, legend: 'Error Percentage', }, ])} @@ -239,7 +241,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element { - + diff --git a/frontend/src/container/MetricsApplication/TopEndpointsTable.tsx b/frontend/src/container/MetricsApplication/TopOperationsTable.tsx similarity index 89% rename from frontend/src/container/MetricsApplication/TopEndpointsTable.tsx rename to frontend/src/container/MetricsApplication/TopOperationsTable.tsx index 5ede2d9c6a..4f91a97781 100644 --- a/frontend/src/container/MetricsApplication/TopEndpointsTable.tsx +++ b/frontend/src/container/MetricsApplication/TopOperationsTable.tsx @@ -11,7 +11,7 @@ import { AppState } from 'store/reducers'; import { GlobalReducer } from 'types/reducer/globalTime'; import MetricReducer from 'types/reducer/metrics'; -function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element { +function TopOperationsTable(props: TopOperationsTableProps): JSX.Element { const { minTime, maxTime } = useSelector( (state) => state.globalTime, ); @@ -85,7 +85,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element { title: 'Number of Calls', dataIndex: 'numCalls', key: 'numCalls', - sorter: (a: TopEndpointListItem, b: TopEndpointListItem): number => + sorter: (a: TopOperationListItem, b: TopOperationListItem): number => a.numCalls - b.numCalls, }, ]; @@ -94,7 +94,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element { { - return 'Top Endpoints'; + return 'Key Operations'; }} tableLayout="fixed" dataSource={data} @@ -104,7 +104,7 @@ function TopEndpointsTable(props: TopEndpointsTableProps): JSX.Element { ); } -interface TopEndpointListItem { +interface TopOperationListItem { p50: number; p95: number; p99: number; @@ -112,10 +112,10 @@ interface TopEndpointListItem { name: string; } -type DataProps = TopEndpointListItem; +type DataProps = TopOperationListItem; -interface TopEndpointsTableProps { - data: TopEndpointListItem[]; +interface TopOperationsTableProps { + data: TopOperationListItem[]; } -export default TopEndpointsTable; +export default TopOperationsTable; diff --git a/frontend/src/container/MetricsTable/index.tsx b/frontend/src/container/MetricsTable/index.tsx index cc0778c80e..e81a7badfc 100644 --- a/frontend/src/container/MetricsTable/index.tsx +++ b/frontend/src/container/MetricsTable/index.tsx @@ -56,14 +56,14 @@ function Metrics(): JSX.Element { render: (value: number): string => (value / 1000000).toFixed(2), }, { - title: 'Error Rate (% of requests)', + title: 'Error Rate (% of total)', dataIndex: 'errorRate', key: 'errorRate', sorter: (a: DataProps, b: DataProps): number => a.errorRate - b.errorRate, render: (value: number): string => value.toFixed(2), }, { - title: 'Requests Per Second', + title: 'Operations Per Second', dataIndex: 'callRate', key: 'callRate', sorter: (a: DataProps, b: DataProps): number => a.callRate - b.callRate, diff --git a/frontend/src/container/TopNav/DateTimeSelection/config.ts b/frontend/src/container/TopNav/DateTimeSelection/config.ts index 69bdde40c7..59715d1f86 100644 --- a/frontend/src/container/TopNav/DateTimeSelection/config.ts +++ b/frontend/src/container/TopNav/DateTimeSelection/config.ts @@ -42,8 +42,9 @@ export interface Option { } export const ServiceMapOptions: Option[] = [ - { value: '1min', label: 'Last 1 min' }, { value: '5min', label: 'Last 5 min' }, + { value: '15min', label: 'Last 15 min' }, + { value: '30min', label: 'Last 30 min' }, ]; export const getDefaultOption = (route: string): Time => { diff --git a/frontend/src/container/TriggeredAlerts/Filter.tsx b/frontend/src/container/TriggeredAlerts/Filter.tsx index ae61fbc35a..601651cdff 100644 --- a/frontend/src/container/TriggeredAlerts/Filter.tsx +++ b/frontend/src/container/TriggeredAlerts/Filter.tsx @@ -2,7 +2,7 @@ import type { SelectProps } from 'antd'; import { Tag } from 'antd'; import React, { useCallback, useMemo } from 'react'; -import { Alerts } from 'types/api/alerts/getAll'; +import { Alerts } from 'types/api/alerts/getTriggered'; import { Container, Select } from './styles'; diff --git a/frontend/src/container/TriggeredAlerts/FilteredTable/ExapandableRow.tsx b/frontend/src/container/TriggeredAlerts/FilteredTable/ExapandableRow.tsx index fab66e242d..388e2d7499 100644 --- a/frontend/src/container/TriggeredAlerts/FilteredTable/ExapandableRow.tsx +++ b/frontend/src/container/TriggeredAlerts/FilteredTable/ExapandableRow.tsx @@ -2,7 +2,7 @@ import { Tag, Typography } from 'antd'; import convertDateToAmAndPm from 'lib/convertDateToAmAndPm'; import getFormattedDate from 'lib/getFormatedDate'; import React from 'react'; -import { Alerts } from 'types/api/alerts/getAll'; +import { Alerts } from 'types/api/alerts/getTriggered'; import Status from '../TableComponents/AlertStatus'; import { TableCell, TableRow } from './styles'; diff --git a/frontend/src/container/TriggeredAlerts/FilteredTable/TableRow.tsx b/frontend/src/container/TriggeredAlerts/FilteredTable/TableRow.tsx index 2f446adcf5..97619b5f12 100644 --- a/frontend/src/container/TriggeredAlerts/FilteredTable/TableRow.tsx +++ b/frontend/src/container/TriggeredAlerts/FilteredTable/TableRow.tsx @@ -1,7 +1,7 @@ import { MinusSquareOutlined, PlusSquareOutlined } from '@ant-design/icons'; import { Tag } from 'antd'; import React, { useState } from 'react'; -import { Alerts } from 'types/api/alerts/getAll'; +import { Alerts } from 'types/api/alerts/getTriggered'; import ExapandableRow from './ExapandableRow'; import { IconContainer, StatusContainer, TableCell, TableRow } from './styles'; diff --git a/frontend/src/container/TriggeredAlerts/FilteredTable/index.tsx b/frontend/src/container/TriggeredAlerts/FilteredTable/index.tsx index 8c8f47fdfd..a9e56d903d 100644 --- a/frontend/src/container/TriggeredAlerts/FilteredTable/index.tsx +++ b/frontend/src/container/TriggeredAlerts/FilteredTable/index.tsx @@ -1,6 +1,6 @@ import groupBy from 'lodash-es/groupBy'; import React, { useMemo } from 'react'; -import { Alerts } from 'types/api/alerts/getAll'; +import { Alerts } from 'types/api/alerts/getTriggered'; import { Value } from '../Filter'; import { FilterAlerts } from '../utils'; diff --git a/frontend/src/container/TriggeredAlerts/NoFilterTable.tsx b/frontend/src/container/TriggeredAlerts/NoFilterTable.tsx index a9c8064616..ac4e45131a 100644 --- a/frontend/src/container/TriggeredAlerts/NoFilterTable.tsx +++ b/frontend/src/container/TriggeredAlerts/NoFilterTable.tsx @@ -5,7 +5,7 @@ import AlertStatus from 'container/TriggeredAlerts/TableComponents/AlertStatus'; import convertDateToAmAndPm from 'lib/convertDateToAmAndPm'; import getFormattedDate from 'lib/getFormatedDate'; import React from 'react'; -import { Alerts } from 'types/api/alerts/getAll'; +import { Alerts } from 'types/api/alerts/getTriggered'; import { Value } from './Filter'; import { FilterAlerts } from './utils'; diff --git a/frontend/src/container/TriggeredAlerts/TriggeredAlert.tsx b/frontend/src/container/TriggeredAlerts/TriggeredAlert.tsx index 425334e7ba..b12a09d5e4 100644 --- a/frontend/src/container/TriggeredAlerts/TriggeredAlert.tsx +++ b/frontend/src/container/TriggeredAlerts/TriggeredAlert.tsx @@ -1,7 +1,7 @@ import getTriggeredApi from 'api/alerts/getTriggered'; import useInterval from 'hooks/useInterval'; import React, { useState } from 'react'; -import { Alerts } from 'types/api/alerts/getAll'; +import { Alerts } from 'types/api/alerts/getTriggered'; import Filter, { Value } from './Filter'; import FilteredTable from './FilteredTable'; diff --git a/frontend/src/container/TriggeredAlerts/utils.ts b/frontend/src/container/TriggeredAlerts/utils.ts index aab179e1cf..67d3024d6a 100644 --- a/frontend/src/container/TriggeredAlerts/utils.ts +++ b/frontend/src/container/TriggeredAlerts/utils.ts @@ -1,4 +1,4 @@ -import { Alerts } from 'types/api/alerts/getAll'; +import { Alerts } from 'types/api/alerts/getTriggered'; import { Value } from './Filter'; diff --git a/frontend/src/modules/Servicemap/ServiceMap.tsx b/frontend/src/modules/Servicemap/ServiceMap.tsx index 03256dde59..7bc44d0d5f 100644 --- a/frontend/src/modules/Servicemap/ServiceMap.tsx +++ b/frontend/src/modules/Servicemap/ServiceMap.tsx @@ -45,6 +45,9 @@ interface graphLink { source: string; target: string; value: number; + callRate: number; + errorRate: number; + p99: number; } export interface graphDataType { nodes: graphNode[]; @@ -96,16 +99,16 @@ function ServiceMap(props: ServiceMapProps): JSX.Element { const graphData = { nodes, links }; return ( - + /> */} d.target} linkDirectionalParticles="value" linkDirectionalParticleSpeed={(d) => d.value} @@ -124,7 +127,7 @@ function ServiceMap(props: ServiceMapProps): JSX.Element { ctx.fillStyle = isDarkMode ? '#ffffff' : '#000000'; ctx.fillText(label, node.x, node.y); }} - onNodeClick={(node) => { + onLinkHover={(node) => { const tooltip = document.querySelector('.graph-tooltip'); if (tooltip && node) { tooltip.innerHTML = getTooltip(node); diff --git a/frontend/src/modules/Servicemap/utils.ts b/frontend/src/modules/Servicemap/utils.ts index 6bec25f8a6..f1da9e3c3a 100644 --- a/frontend/src/modules/Servicemap/utils.ts +++ b/frontend/src/modules/Servicemap/utils.ts @@ -1,12 +1,13 @@ /*eslint-disable*/ //@ts-nocheck -import { cloneDeep, find, maxBy, uniq, uniqBy } from 'lodash-es'; +import { cloneDeep, find, maxBy, uniq, uniqBy, groupBy, sumBy } from 'lodash-es'; import { graphDataType } from './ServiceMap'; const MIN_WIDTH = 10; const MAX_WIDTH = 20; const DEFAULT_FONT_SIZE = 6; + export const getDimensions = (num, highest) => { const percentage = (num / highest) * 100; const width = (percentage * (MAX_WIDTH - MIN_WIDTH)) / 100 + MIN_WIDTH; @@ -18,19 +19,30 @@ export const getDimensions = (num, highest) => { }; export const getGraphData = (serviceMap, isDarkMode): graphDataType => { - const { items, services } = serviceMap; + const { items } = serviceMap; + const services = Object.values(groupBy(items, 'child')).map((e) => { + return { + serviceName: e[0].child, + errorRate: sumBy(e, 'errorRate'), + callRate: sumBy(e, 'callRate'), + } + }); const highestCallCount = maxBy(items, (e) => e?.callCount)?.callCount; const highestCallRate = maxBy(services, (e) => e?.callRate)?.callRate; + const divNum = Number( String(1).padEnd(highestCallCount.toString().length, '0'), ); const links = cloneDeep(items).map((node) => { - const { parent, child, callCount } = node; + const { parent, child, callCount, callRate, errorRate, p99 } = node; return { source: parent, target: child, value: (100 - callCount / divNum) * 0.03, + callRate, + errorRate, + p99, }; }); const uniqParent = uniqBy(cloneDeep(items), 'parent').map((e) => e.parent); @@ -47,15 +59,10 @@ export const getGraphData = (serviceMap, isDarkMode): graphDataType => { width: MIN_WIDTH, color, nodeVal: MIN_WIDTH, - callRate: 0, - errorRate: 0, - p99: 0, }; } if (service.errorRate > 0) { color = isDarkMode ? '#DB836E' : '#F98989'; - } else if (service.fourXXRate > 0) { - color = isDarkMode ? '#C79931' : '#F9DA7B'; } const { fontSize, width } = getDimensions(service.callRate, highestCallRate); return { @@ -65,9 +72,6 @@ export const getGraphData = (serviceMap, isDarkMode): graphDataType => { width, color, nodeVal: width, - callRate: service.callRate.toFixed(2), - errorRate: service.errorRate, - p99: service.p99, }; }); return { @@ -90,25 +94,31 @@ export const getZoomPx = (): number => { return 190; }; -export const getTooltip = (node: { +const getRound2DigitsAfterDecimal = (num: number) => { + if (num === 0) { + return 0; + } + return num.toFixed(20).match(/^-?\d*\.?0*\d{0,2}/)[0]; +} + +export const getTooltip = (link: { p99: number; errorRate: number; callRate: number; id: string; }) => { return `
-
${node.id}
P99 latency:
-
${node.p99 / 1000000}ms
+
${getRound2DigitsAfterDecimal(link.p99/ 1000000)}ms
Request:
-
${node.callRate}/sec
+
${getRound2DigitsAfterDecimal(link.callRate)}/sec
Error Rate:
-
${node.errorRate}%
+
${getRound2DigitsAfterDecimal(link.errorRate)}%
`; }; diff --git a/frontend/src/store/actions/metrics/getInitialData.ts b/frontend/src/store/actions/metrics/getInitialData.ts index f994a35c94..0f607f6ea5 100644 --- a/frontend/src/store/actions/metrics/getInitialData.ts +++ b/frontend/src/store/actions/metrics/getInitialData.ts @@ -3,7 +3,8 @@ // import getExternalError from 'api/metrics/getExternalError'; // import getExternalService from 'api/metrics/getExternalService'; import getServiceOverview from 'api/metrics/getServiceOverview'; -import getTopEndPoints from 'api/metrics/getTopEndPoints'; +import getTopLevelOperations from 'api/metrics/getTopLevelOperations'; +import getTopOperations from 'api/metrics/getTopOperations'; import { AxiosError } from 'axios'; import GetMinMax from 'lib/getMinMax'; import getStep from 'lib/getStep'; @@ -46,7 +47,8 @@ export const GetInitialData = ( // getExternalErrorResponse, // getExternalServiceResponse, getServiceOverviewResponse, - getTopEndPointsResponse, + getTopOperationsResponse, + getTopLevelOperationsResponse, ] = await Promise.all([ // getDBOverView({ // ...props, @@ -67,12 +69,15 @@ export const GetInitialData = ( step: getStep({ start: minTime, end: maxTime, inputFormat: 'ns' }), selectedTags: props.selectedTags, }), - getTopEndPoints({ + getTopOperations({ end: maxTime, service: props.serviceName, start: minTime, selectedTags: props.selectedTags, }), + getTopLevelOperations({ + service: props.serviceName, + }), ]); if ( @@ -81,7 +86,8 @@ export const GetInitialData = ( // getExternalErrorResponse.statusCode === 200 && // getExternalServiceResponse.statusCode === 200 && getServiceOverviewResponse.statusCode === 200 && - getTopEndPointsResponse.statusCode === 200 + getTopOperationsResponse.statusCode === 200 && + getTopLevelOperationsResponse.statusCode === 200 ) { dispatch({ type: 'GET_INTIAL_APPLICATION_DATA', @@ -91,7 +97,8 @@ export const GetInitialData = ( // externalError: getExternalErrorResponse.payload, // externalService: getExternalServiceResponse.payload, serviceOverview: getServiceOverviewResponse.payload, - topEndPoints: getTopEndPointsResponse.payload, + topOperations: getTopOperationsResponse.payload, + topLevelOperations: getTopLevelOperationsResponse.payload, }, }); } else { @@ -99,8 +106,9 @@ export const GetInitialData = ( type: 'GET_INITIAL_APPLICATION_ERROR', payload: { errorMessage: - getTopEndPointsResponse.error || + getTopOperationsResponse.error || getServiceOverviewResponse.error || + getTopLevelOperationsResponse.error || // getExternalServiceResponse.error || // getExternalErrorResponse.error || // getExternalAverageDurationResponse.error || diff --git a/frontend/src/store/actions/serviceMap.ts b/frontend/src/store/actions/serviceMap.ts index 36d8e5ba97..e3f527fc57 100644 --- a/frontend/src/store/actions/serviceMap.ts +++ b/frontend/src/store/actions/serviceMap.ts @@ -6,26 +6,16 @@ import { ActionTypes } from './types'; export interface ServiceMapStore { items: ServicesMapItem[]; - services: ServicesItem[]; loading: boolean; } -export interface ServicesItem { - serviceName: string; - p99: number; - avgDuration: number; - numCalls: number; - callRate: number; - numErrors: number; - errorRate: number; - num4XX: number; - fourXXRate: number; -} - export interface ServicesMapItem { parent: string; child: string; callCount: number; + callRate: number; + errorRate: number; + p99: number; } export interface ServiceMapItemAction { @@ -33,11 +23,6 @@ export interface ServiceMapItemAction { payload: ServicesMapItem[]; } -export interface ServicesAction { - type: ActionTypes.getServices; - payload: ServicesItem[]; -} - export interface ServiceMapLoading { type: ActionTypes.serviceMapLoading; payload: { @@ -55,19 +40,13 @@ export const getDetailedServiceMapItems = (globalTime: GlobalTime) => { end, tags: [], }; - const [serviceMapDependenciesResponse, response] = await Promise.all([ - api.post(`/serviceMapDependencies`, serviceMapPayload), - api.post(`/services`, serviceMapPayload), + const [dependencyGraphResponse] = await Promise.all([ + api.post(`/dependency_graph`, serviceMapPayload), ]); - dispatch({ - type: ActionTypes.getServices, - payload: response.data, - }); - dispatch({ type: ActionTypes.getServiceMapItems, - payload: serviceMapDependenciesResponse.data, + payload: dependencyGraphResponse.data, }); dispatch({ diff --git a/frontend/src/store/actions/types.ts b/frontend/src/store/actions/types.ts index 702997d49b..96d3f63538 100644 --- a/frontend/src/store/actions/types.ts +++ b/frontend/src/store/actions/types.ts @@ -1,8 +1,4 @@ -import { - ServiceMapItemAction, - ServiceMapLoading, - ServicesAction, -} from './serviceMap'; +import { ServiceMapItemAction, ServiceMapLoading } from './serviceMap'; import { GetUsageDataAction } from './usage'; export enum ActionTypes { @@ -17,6 +13,5 @@ export enum ActionTypes { export type Action = | GetUsageDataAction - | ServicesAction | ServiceMapItemAction | ServiceMapLoading; diff --git a/frontend/src/store/reducers/metric.ts b/frontend/src/store/reducers/metric.ts index 72b24a6b5b..2cb316d2c1 100644 --- a/frontend/src/store/reducers/metric.ts +++ b/frontend/src/store/reducers/metric.ts @@ -21,7 +21,7 @@ const InitialValue: InitialValueTypes = { services: [], dbOverView: [], externalService: [], - topEndPoints: [], + topOperations: [], externalAverageDuration: [], externalError: [], serviceOverview: [], @@ -29,6 +29,7 @@ const InitialValue: InitialValueTypes = { resourceAttributePromQLQuery: resourceAttributesQueryToPromQL( GetResourceAttributeQueriesFromURL() || [], ), + topLevelOperations: [], }; const metrics = ( @@ -88,22 +89,24 @@ const metrics = ( case GET_INTIAL_APPLICATION_DATA: { const { // dbOverView, - topEndPoints, + topOperations, serviceOverview, // externalService, // externalAverageDuration, // externalError, + topLevelOperations, } = action.payload; return { ...state, // dbOverView, - topEndPoints, + topOperations, serviceOverview, // externalService, // externalAverageDuration, // externalError, metricsApplicationLoading: false, + topLevelOperations, }; } diff --git a/frontend/src/store/reducers/serviceMap.ts b/frontend/src/store/reducers/serviceMap.ts index 18ec21a9ec..04b724615b 100644 --- a/frontend/src/store/reducers/serviceMap.ts +++ b/frontend/src/store/reducers/serviceMap.ts @@ -2,7 +2,6 @@ import { Action, ActionTypes, ServiceMapStore } from 'store/actions'; const initialState: ServiceMapStore = { items: [], - services: [], loading: true, }; @@ -16,11 +15,6 @@ export const ServiceMapReducer = ( ...state, items: action.payload, }; - case ActionTypes.getServices: - return { - ...state, - services: action.payload, - }; case ActionTypes.serviceMapLoading: { return { ...state, diff --git a/frontend/src/types/actions/metrics.ts b/frontend/src/types/actions/metrics.ts index 382e56b560..bc48f0929f 100644 --- a/frontend/src/types/actions/metrics.ts +++ b/frontend/src/types/actions/metrics.ts @@ -5,7 +5,7 @@ import { IResourceAttributeQuery } from 'container/MetricsApplication/ResourceAttributesFilter/types'; import { ServicesList } from 'types/api/metrics/getService'; import { ServiceOverview } from 'types/api/metrics/getServiceOverview'; -import { TopEndPoints } from 'types/api/metrics/getTopEndPoints'; +import { TopOperations } from 'types/api/metrics/getTopOperations'; export const GET_SERVICE_LIST_SUCCESS = 'GET_SERVICE_LIST_SUCCESS'; export const GET_SERVICE_LIST_LOADING_START = 'GET_SERVICE_LIST_LOADING_START'; @@ -38,12 +38,13 @@ export interface GetServiceListError { export interface GetInitialApplicationData { type: typeof GET_INTIAL_APPLICATION_DATA; payload: { - topEndPoints: TopEndPoints[]; + topOperations: TopOperations[]; // dbOverView: DBOverView[]; // externalService: ExternalService[]; // externalAverageDuration: ExternalAverageDuration[]; // externalError: ExternalError[]; serviceOverview: ServiceOverview[]; + topLevelOperations: string[]; }; } diff --git a/frontend/src/types/api/alerts/def.ts b/frontend/src/types/api/alerts/def.ts index 060bdc4d73..f417678ee1 100644 --- a/frontend/src/types/api/alerts/def.ts +++ b/frontend/src/types/api/alerts/def.ts @@ -18,6 +18,8 @@ export interface AlertDef { annotations?: Labels; evalWindow?: string; source?: string; + disabled?: boolean; + preferredChannels?: string[]; } export interface RuleCondition { diff --git a/frontend/src/types/api/alerts/delete.ts b/frontend/src/types/api/alerts/delete.ts index 24dbdc1d8a..5c842ea34c 100644 --- a/frontend/src/types/api/alerts/delete.ts +++ b/frontend/src/types/api/alerts/delete.ts @@ -1,7 +1,7 @@ -import { Alerts } from './getAll'; +import { AlertDef } from './def'; export interface Props { - id: Alerts['id']; + id: AlertDef['id']; } export interface PayloadProps { diff --git a/frontend/src/types/api/alerts/get.ts b/frontend/src/types/api/alerts/get.ts index 69eef474e1..78b637c140 100644 --- a/frontend/src/types/api/alerts/get.ts +++ b/frontend/src/types/api/alerts/get.ts @@ -4,6 +4,13 @@ export interface Props { id: AlertDef['id']; } +export interface GettableAlert extends AlertDef { + id: number; + alert: string; + state: string; + disabled: boolean; +} + export type PayloadProps = { - data: AlertDef; + data: GettableAlert; }; diff --git a/frontend/src/types/api/alerts/getAll.ts b/frontend/src/types/api/alerts/getAll.ts index 501c34a4cb..58351ed703 100644 --- a/frontend/src/types/api/alerts/getAll.ts +++ b/frontend/src/types/api/alerts/getAll.ts @@ -1,32 +1,3 @@ -export interface Alerts { - labels: AlertsLabel; - annotations: { - description: string; - summary: string; - [key: string]: string; - }; - state: string; - name: string; - id: number; - endsAt: string; - fingerprint: string; - generatorURL: string; - receivers: Receivers[]; - startsAt: string; - status: { - inhibitedBy: []; - silencedBy: []; - state: string; - }; - updatedAt: string; -} +import { GettableAlert } from './get'; -interface Receivers { - name: string; -} - -interface AlertsLabel { - [key: string]: string; -} - -export type PayloadProps = Alerts[]; +export type PayloadProps = GettableAlert[]; diff --git a/frontend/src/types/api/alerts/getGroups.ts b/frontend/src/types/api/alerts/getGroups.ts index f7dac48a14..71979d116d 100644 --- a/frontend/src/types/api/alerts/getGroups.ts +++ b/frontend/src/types/api/alerts/getGroups.ts @@ -1,4 +1,4 @@ -import { Alerts } from './getAll'; +import { AlertDef } from './def'; export interface Props { silenced: boolean; @@ -7,8 +7,8 @@ export interface Props { [key: string]: string | boolean; } export interface Group { - alerts: Alerts[]; - label: Alerts['labels']; + alerts: AlertDef[]; + label: AlertDef['labels']; receiver: { [key: string]: string; }; diff --git a/frontend/src/types/api/alerts/getTriggered.ts b/frontend/src/types/api/alerts/getTriggered.ts index 8b0e50a279..97d116b431 100644 --- a/frontend/src/types/api/alerts/getTriggered.ts +++ b/frontend/src/types/api/alerts/getTriggered.ts @@ -1,4 +1,33 @@ -import { Alerts } from './getAll'; +export interface Alerts { + labels: AlertsLabel; + annotations: { + description: string; + summary: string; + [key: string]: string; + }; + state: string; + name: string; + id: number; + endsAt: string; + fingerprint: string; + generatorURL: string; + receivers: Receivers[]; + startsAt: string; + status: { + inhibitedBy: []; + silencedBy: []; + state: string; + }; + updatedAt: string; +} + +interface Receivers { + name: string; +} + +interface AlertsLabel { + [key: string]: string; +} export interface Props { silenced: boolean; diff --git a/frontend/src/types/api/alerts/patch.ts b/frontend/src/types/api/alerts/patch.ts new file mode 100644 index 0000000000..fab1e67cfe --- /dev/null +++ b/frontend/src/types/api/alerts/patch.ts @@ -0,0 +1,12 @@ +import { GettableAlert } from './get'; + +export type PayloadProps = GettableAlert; + +export interface PatchProps { + disabled?: boolean; +} + +export interface Props { + id?: number; + data: PatchProps; +} diff --git a/frontend/src/types/api/alerts/testAlert.ts b/frontend/src/types/api/alerts/testAlert.ts new file mode 100644 index 0000000000..f0928275be --- /dev/null +++ b/frontend/src/types/api/alerts/testAlert.ts @@ -0,0 +1,10 @@ +import { AlertDef } from 'types/api/alerts/def'; + +export interface Props { + data: AlertDef; +} + +export interface PayloadProps { + alertCount: number; + message: string; +} diff --git a/frontend/src/types/api/metrics/getTopLevelOperations.ts b/frontend/src/types/api/metrics/getTopLevelOperations.ts new file mode 100644 index 0000000000..c4e88aed08 --- /dev/null +++ b/frontend/src/types/api/metrics/getTopLevelOperations.ts @@ -0,0 +1,7 @@ +export type TopLevelOperations = string[]; + +export interface Props { + service: string; +} + +export type PayloadProps = TopLevelOperations; diff --git a/frontend/src/types/api/metrics/getTopEndPoints.ts b/frontend/src/types/api/metrics/getTopOperations.ts similarity index 74% rename from frontend/src/types/api/metrics/getTopEndPoints.ts rename to frontend/src/types/api/metrics/getTopOperations.ts index c86d5fd115..f30c01251f 100644 --- a/frontend/src/types/api/metrics/getTopEndPoints.ts +++ b/frontend/src/types/api/metrics/getTopOperations.ts @@ -1,6 +1,6 @@ import { Tags } from 'types/reducer/trace'; -export interface TopEndPoints { +export interface TopOperations { name: string; numCalls: number; p50: number; @@ -15,4 +15,4 @@ export interface Props { selectedTags: Tags[]; } -export type PayloadProps = TopEndPoints[]; +export type PayloadProps = TopOperations[]; diff --git a/frontend/src/types/reducer/metrics.ts b/frontend/src/types/reducer/metrics.ts index d5b500f109..7903b2c21a 100644 --- a/frontend/src/types/reducer/metrics.ts +++ b/frontend/src/types/reducer/metrics.ts @@ -5,7 +5,7 @@ import { ExternalError } from 'types/api/metrics/getExternalError'; import { ExternalService } from 'types/api/metrics/getExternalService'; import { ServicesList } from 'types/api/metrics/getService'; import { ServiceOverview } from 'types/api/metrics/getServiceOverview'; -import { TopEndPoints } from 'types/api/metrics/getTopEndPoints'; +import { TopOperations } from 'types/api/metrics/getTopOperations'; interface MetricReducer { services: ServicesList[]; @@ -15,12 +15,13 @@ interface MetricReducer { errorMessage: string; dbOverView: DBOverView[]; externalService: ExternalService[]; - topEndPoints: TopEndPoints[]; + topOperations: TopOperations[]; externalAverageDuration: ExternalAverageDuration[]; externalError: ExternalError[]; serviceOverview: ServiceOverview[]; resourceAttributeQueries: IResourceAttributeQuery[]; resourceAttributePromQLQuery: string; + topLevelOperations: string[]; } export default MetricReducer; diff --git a/pkg/query-service/Dockerfile b/pkg/query-service/Dockerfile index 3689b2bf23..eec478aaef 100644 --- a/pkg/query-service/Dockerfile +++ b/pkg/query-service/Dockerfile @@ -20,7 +20,7 @@ RUN go mod download -x # Add the sources and proceed with build ADD . . -RUN go build -a -ldflags "-linkmode external -extldflags '-static' -s -w $LD_FLAGS" -o ./bin/query-service ./main.go +RUN go build -tags timetzdata -a -ldflags "-linkmode external -extldflags '-static' -s -w $LD_FLAGS" -o ./bin/query-service ./main.go RUN chmod +x ./bin/query-service diff --git a/pkg/query-service/app/clickhouseReader/options.go b/pkg/query-service/app/clickhouseReader/options.go index aba35490cc..12f277e4f3 100644 --- a/pkg/query-service/app/clickhouseReader/options.go +++ b/pkg/query-service/app/clickhouseReader/options.go @@ -18,21 +18,24 @@ const ( ) const ( - defaultDatasource string = "tcp://localhost:9000" - defaultTraceDB string = "signoz_traces" - defaultOperationsTable string = "signoz_operations" - defaultIndexTable string = "signoz_index_v2" - defaultErrorTable string = "signoz_error_index_v2" - defaulDurationTable string = "durationSortMV" - defaultSpansTable string = "signoz_spans" - defaultLogsDB string = "signoz_logs" - defaultLogsTable string = "logs" - defaultLogAttributeKeysTable string = "logs_atrribute_keys" - defaultLogResourceKeysTable string = "logs_resource_keys" - defaultLiveTailRefreshSeconds int = 10 - defaultWriteBatchDelay time.Duration = 5 * time.Second - defaultWriteBatchSize int = 10000 - defaultEncoding Encoding = EncodingJSON + defaultDatasource string = "tcp://localhost:9000" + defaultTraceDB string = "signoz_traces" + defaultOperationsTable string = "signoz_operations" + defaultIndexTable string = "signoz_index_v2" + defaultErrorTable string = "signoz_error_index_v2" + defaultDurationTable string = "durationSortMV" + defaultUsageExplorerTable string = "usage_explorer" + defaultSpansTable string = "signoz_spans" + defaultDependencyGraphTable string = "dependency_graph_minutes" + defaultTopLevelOperationsTable string = "top_level_operations" + defaultLogsDB string = "signoz_logs" + defaultLogsTable string = "logs" + defaultLogAttributeKeysTable string = "logs_atrribute_keys" + defaultLogResourceKeysTable string = "logs_resource_keys" + defaultLiveTailRefreshSeconds int = 10 + defaultWriteBatchDelay time.Duration = 5 * time.Second + defaultWriteBatchSize int = 10000 + defaultEncoding Encoding = EncodingJSON ) const ( @@ -48,24 +51,27 @@ const ( // NamespaceConfig is Clickhouse's internal configuration data type namespaceConfig struct { - namespace string - Enabled bool - Datasource string - TraceDB string - OperationsTable string - IndexTable string - DurationTable string - SpansTable string - ErrorTable string - LogsDB string - LogsTable string - LogsAttributeKeysTable string - LogsResourceKeysTable string - LiveTailRefreshSeconds int - WriteBatchDelay time.Duration - WriteBatchSize int - Encoding Encoding - Connector Connector + namespace string + Enabled bool + Datasource string + TraceDB string + OperationsTable string + IndexTable string + DurationTable string + UsageExplorerTable string + SpansTable string + ErrorTable string + DependencyGraphTable string + TopLevelOperationsTable string + LogsDB string + LogsTable string + LogsAttributeKeysTable string + LogsResourceKeysTable string + LiveTailRefreshSeconds int + WriteBatchDelay time.Duration + WriteBatchSize int + Encoding Encoding + Connector Connector } // Connecto defines how to connect to the database @@ -112,24 +118,27 @@ func NewOptions(datasource string, primaryNamespace string, otherNamespaces ...s options := &Options{ primary: &namespaceConfig{ - namespace: primaryNamespace, - Enabled: true, - Datasource: datasource, - TraceDB: defaultTraceDB, - OperationsTable: defaultOperationsTable, - IndexTable: defaultIndexTable, - ErrorTable: defaultErrorTable, - DurationTable: defaulDurationTable, - SpansTable: defaultSpansTable, - LogsDB: defaultLogsDB, - LogsTable: defaultLogsTable, - LogsAttributeKeysTable: defaultLogAttributeKeysTable, - LogsResourceKeysTable: defaultLogResourceKeysTable, - LiveTailRefreshSeconds: defaultLiveTailRefreshSeconds, - WriteBatchDelay: defaultWriteBatchDelay, - WriteBatchSize: defaultWriteBatchSize, - Encoding: defaultEncoding, - Connector: defaultConnector, + namespace: primaryNamespace, + Enabled: true, + Datasource: datasource, + TraceDB: defaultTraceDB, + OperationsTable: defaultOperationsTable, + IndexTable: defaultIndexTable, + ErrorTable: defaultErrorTable, + DurationTable: defaultDurationTable, + UsageExplorerTable: defaultUsageExplorerTable, + SpansTable: defaultSpansTable, + DependencyGraphTable: defaultDependencyGraphTable, + TopLevelOperationsTable: defaultTopLevelOperationsTable, + LogsDB: defaultLogsDB, + LogsTable: defaultLogsTable, + LogsAttributeKeysTable: defaultLogAttributeKeysTable, + LogsResourceKeysTable: defaultLogResourceKeysTable, + LiveTailRefreshSeconds: defaultLiveTailRefreshSeconds, + WriteBatchDelay: defaultWriteBatchDelay, + WriteBatchSize: defaultWriteBatchSize, + Encoding: defaultEncoding, + Connector: defaultConnector, }, others: make(map[string]*namespaceConfig, len(otherNamespaces)), } diff --git a/pkg/query-service/app/clickhouseReader/reader.go b/pkg/query-service/app/clickhouseReader/reader.go index 49fe724df5..dabdd3df20 100644 --- a/pkg/query-service/app/clickhouseReader/reader.go +++ b/pkg/query-service/app/clickhouseReader/reader.go @@ -48,16 +48,17 @@ import ( ) const ( - primaryNamespace = "clickhouse" - archiveNamespace = "clickhouse-archive" - signozTraceDBName = "signoz_traces" - signozDurationMVTable = "durationSort" - signozSpansTable = "signoz_spans" - signozErrorIndexTable = "signoz_error_index_v2" - signozTraceTableName = "signoz_index_v2" - signozMetricDBName = "signoz_metrics" - signozSampleTableName = "samples_v2" - signozTSTableName = "time_series_v2" + primaryNamespace = "clickhouse" + archiveNamespace = "clickhouse-archive" + signozTraceDBName = "signoz_traces" + signozDurationMVTable = "durationSort" + signozUsageExplorerTable = "usage_explorer" + signozSpansTable = "signoz_spans" + signozErrorIndexTable = "signoz_error_index_v2" + signozTraceTableName = "signoz_index_v2" + signozMetricDBName = "signoz_metrics" + signozSampleTableName = "samples_v2" + signozTSTableName = "time_series_v2" minTimespanForProgressiveSearch = time.Hour minTimespanForProgressiveSearchMargin = time.Minute @@ -76,20 +77,23 @@ var ( // SpanWriter for reading spans from ClickHouse type ClickHouseReader struct { - db clickhouse.Conn - localDB *sqlx.DB - traceDB string - operationsTable string - durationTable string - indexTable string - errorTable string - spansTable string - logsDB string - logsTable string - logsAttributeKeys string - logsResourceKeys string - queryEngine *promql.Engine - remoteStorage *remote.Storage + db clickhouse.Conn + localDB *sqlx.DB + traceDB string + operationsTable string + durationTable string + indexTable string + errorTable string + usageExplorerTable string + spansTable string + dependencyGraphTable string + topLevelOperationsTable string + logsDB string + logsTable string + logsAttributeKeys string + logsResourceKeys string + queryEngine *promql.Engine + remoteStorage *remote.Storage promConfigFile string promConfig *config.Config @@ -118,21 +122,24 @@ func NewReader(localDB *sqlx.DB, configFile string) *ClickHouseReader { } return &ClickHouseReader{ - db: db, - localDB: localDB, - traceDB: options.primary.TraceDB, - alertManager: alertManager, - operationsTable: options.primary.OperationsTable, - indexTable: options.primary.IndexTable, - errorTable: options.primary.ErrorTable, - durationTable: options.primary.DurationTable, - spansTable: options.primary.SpansTable, - logsDB: options.primary.LogsDB, - logsTable: options.primary.LogsTable, - logsAttributeKeys: options.primary.LogsAttributeKeysTable, - logsResourceKeys: options.primary.LogsResourceKeysTable, - liveTailRefreshSeconds: options.primary.LiveTailRefreshSeconds, - promConfigFile: configFile, + db: db, + localDB: localDB, + traceDB: options.primary.TraceDB, + alertManager: alertManager, + operationsTable: options.primary.OperationsTable, + indexTable: options.primary.IndexTable, + errorTable: options.primary.ErrorTable, + usageExplorerTable: options.primary.UsageExplorerTable, + durationTable: options.primary.DurationTable, + spansTable: options.primary.SpansTable, + dependencyGraphTable: options.primary.DependencyGraphTable, + topLevelOperationsTable: options.primary.TopLevelOperationsTable, + logsDB: options.primary.LogsDB, + logsTable: options.primary.LogsTable, + logsAttributeKeys: options.primary.LogsAttributeKeysTable, + logsResourceKeys: options.primary.LogsResourceKeysTable, + liveTailRefreshSeconds: options.primary.LiveTailRefreshSeconds, + promConfigFile: configFile, } } @@ -386,14 +393,21 @@ func (r *ClickHouseReader) GetChannel(id string) (*model.ChannelItem, *model.Api idInt, _ := strconv.Atoi(id) channel := model.ChannelItem{} - query := fmt.Sprintf("SELECT id, created_at, updated_at, name, type, data data FROM notification_channels WHERE id=%d", idInt) + query := "SELECT id, created_at, updated_at, name, type, data data FROM notification_channels WHERE id=? " - err := r.localDB.Get(&channel, query) + stmt, err := r.localDB.Preparex(query) - zap.S().Info(query) + zap.S().Info(query, idInt) if err != nil { - zap.S().Debug("Error in processing sql query: ", err) + zap.S().Debug("Error in preparing sql query for GetChannel : ", err) + return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} + } + + err = stmt.Get(&channel, idInt) + + if err != nil { + zap.S().Debug(fmt.Sprintf("Error in getting channel with id=%d : ", idInt), err) return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} } @@ -662,103 +676,153 @@ func (r *ClickHouseReader) GetServicesList(ctx context.Context) (*[]string, erro return &services, nil } +func (r *ClickHouseReader) GetTopLevelOperations(ctx context.Context) (*map[string][]string, *model.ApiError) { + + operations := map[string][]string{} + query := fmt.Sprintf(`SELECT DISTINCT name, serviceName FROM %s.%s`, r.traceDB, r.topLevelOperationsTable) + + rows, err := r.db.Query(ctx, query) + + if err != nil { + zap.S().Error("Error in processing sql query: ", err) + return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} + } + + defer rows.Close() + for rows.Next() { + var name, serviceName string + if err := rows.Scan(&name, &serviceName); err != nil { + return nil, &model.ApiError{Typ: model.ErrorInternal, Err: fmt.Errorf("Error in reading data")} + } + if _, ok := operations[serviceName]; !ok { + operations[serviceName] = []string{} + } + operations[serviceName] = append(operations[serviceName], name) + } + return &operations, nil +} + func (r *ClickHouseReader) GetServices(ctx context.Context, queryParams *model.GetServicesParams) (*[]model.ServiceItem, *model.ApiError) { if r.indexTable == "" { return nil, &model.ApiError{Typ: model.ErrorExec, Err: ErrNoIndexTable} } + topLevelOps, apiErr := r.GetTopLevelOperations(ctx) + if apiErr != nil { + return nil, apiErr + } + serviceItems := []model.ServiceItem{} + var wg sync.WaitGroup + // limit the number of concurrent queries to not overload the clickhouse server + sem := make(chan struct{}, 10) + var mtx sync.RWMutex - query := fmt.Sprintf("SELECT serviceName, quantile(0.99)(durationNano) as p99, avg(durationNano) as avgDuration, count(*) as numCalls FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2'", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10)) - args := []interface{}{} - args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) - if errStatus != nil { - return nil, errStatus + for svc, ops := range *topLevelOps { + sem <- struct{}{} + wg.Add(1) + go func(svc string, ops []string) { + defer wg.Done() + defer func() { <-sem }() + var serviceItem model.ServiceItem + var numErrors uint64 + query := fmt.Sprintf( + `SELECT + quantile(0.99)(durationNano) as p99, + avg(durationNano) as avgDuration, + count(*) as numCalls + FROM %s.%s + WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end`, + r.traceDB, r.indexTable, + ) + errorQuery := fmt.Sprintf( + `SELECT + count(*) as numErrors + FROM %s.%s + WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end AND statusCode=2`, + r.traceDB, r.indexTable, + ) + + args := []interface{}{} + args = append(args, + clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), + clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)), + clickhouse.Named("serviceName", svc), + clickhouse.Named("names", ops), + ) + args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) + if errStatus != nil { + zap.S().Error("Error in processing sql query: ", errStatus) + return + } + err := r.db.QueryRow( + ctx, + query, + args..., + ).ScanStruct(&serviceItem) + + if err != nil { + zap.S().Error("Error in processing sql query: ", err) + return + } + + err = r.db.QueryRow(ctx, errorQuery, args...).Scan(&numErrors) + if err != nil { + zap.S().Error("Error in processing sql query: ", err) + return + } + + serviceItem.ServiceName = svc + serviceItem.NumErrors = numErrors + mtx.Lock() + serviceItems = append(serviceItems, serviceItem) + mtx.Unlock() + }(svc, ops) } - query += " GROUP BY serviceName ORDER BY p99 DESC" - err := r.db.Select(ctx, &serviceItems, query, args...) + wg.Wait() - zap.S().Info(query) - - if err != nil { - zap.S().Debug("Error in processing sql query: ", err) - return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} + for idx := range serviceItems { + serviceItems[idx].CallRate = float64(serviceItems[idx].NumCalls) / float64(queryParams.Period) + serviceItems[idx].ErrorRate = float64(serviceItems[idx].NumErrors) * 100 / float64(serviceItems[idx].NumCalls) } - - ////////////////// Below block gets 5xx of services - serviceErrorItems := []model.ServiceItem{} - - query = fmt.Sprintf("SELECT serviceName, count(*) as numErrors FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND (statusCode>=500 OR statusCode=2)", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10)) - args = []interface{}{} - args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) - if errStatus != nil { - return nil, errStatus - } - query += " GROUP BY serviceName" - err = r.db.Select(ctx, &serviceErrorItems, query, args...) - - zap.S().Info(query) - - if err != nil { - zap.S().Debug("Error in processing sql query: ", err) - return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} - } - - m5xx := make(map[string]uint64) - - for j := range serviceErrorItems { - m5xx[serviceErrorItems[j].ServiceName] = serviceErrorItems[j].NumErrors - } - /////////////////////////////////////////// - - ////////////////// Below block gets 4xx of services - - service4xxItems := []model.ServiceItem{} - - query = fmt.Sprintf("SELECT serviceName, count(*) as num4xx FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND statusCode>=400 AND statusCode<500", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10)) - args = []interface{}{} - args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) - if errStatus != nil { - return nil, errStatus - } - query += " GROUP BY serviceName" - err = r.db.Select(ctx, &service4xxItems, query, args...) - - zap.S().Info(query) - - if err != nil { - zap.S().Debug("Error in processing sql query: ", err) - return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} - } - - m4xx := make(map[string]uint64) - - for j := range service4xxItems { - m4xx[service4xxItems[j].ServiceName] = service4xxItems[j].Num4XX - } - - for i := range serviceItems { - if val, ok := m5xx[serviceItems[i].ServiceName]; ok { - serviceItems[i].NumErrors = val - } - if val, ok := m4xx[serviceItems[i].ServiceName]; ok { - serviceItems[i].Num4XX = val - } - serviceItems[i].CallRate = float64(serviceItems[i].NumCalls) / float64(queryParams.Period) - serviceItems[i].FourXXRate = float64(serviceItems[i].Num4XX) * 100 / float64(serviceItems[i].NumCalls) - serviceItems[i].ErrorRate = float64(serviceItems[i].NumErrors) * 100 / float64(serviceItems[i].NumCalls) - } - return &serviceItems, nil } func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams *model.GetServiceOverviewParams) (*[]model.ServiceOverviewItem, *model.ApiError) { + topLevelOps, apiErr := r.GetTopLevelOperations(ctx) + if apiErr != nil { + return nil, apiErr + } + ops, ok := (*topLevelOps)[queryParams.ServiceName] + if !ok { + return nil, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("Service not found")} + } + + namedArgs := []interface{}{ + clickhouse.Named("interval", strconv.Itoa(int(queryParams.StepSeconds/60))), + clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), + clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)), + clickhouse.Named("serviceName", queryParams.ServiceName), + clickhouse.Named("names", ops), + } + serviceOverviewItems := []model.ServiceOverviewItem{} - query := fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %s minute) as time, quantile(0.99)(durationNano) as p99, quantile(0.95)(durationNano) as p95,quantile(0.50)(durationNano) as p50, count(*) as numCalls FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND serviceName='%s'", strconv.Itoa(int(queryParams.StepSeconds/60)), r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName) + query := fmt.Sprintf(` + SELECT + toStartOfInterval(timestamp, INTERVAL @interval minute) as time, + quantile(0.99)(durationNano) as p99, + quantile(0.95)(durationNano) as p95, + quantile(0.50)(durationNano) as p50, + count(*) as numCalls + FROM %s.%s + WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end`, + r.traceDB, r.indexTable, + ) args := []interface{}{} + args = append(args, namedArgs...) args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) if errStatus != nil { return nil, errStatus @@ -766,17 +830,25 @@ func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams * query += " GROUP BY time ORDER BY time DESC" err := r.db.Select(ctx, &serviceOverviewItems, query, args...) - zap.S().Info(query) + zap.S().Debug(query) if err != nil { - zap.S().Debug("Error in processing sql query: ", err) + zap.S().Error("Error in processing sql query: ", err) return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} } serviceErrorItems := []model.ServiceErrorItem{} - query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %s minute) as time, count(*) as numErrors FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' AND kind='2' AND serviceName='%s' AND hasError=true", strconv.Itoa(int(queryParams.StepSeconds/60)), r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName) + query = fmt.Sprintf(` + SELECT + toStartOfInterval(timestamp, INTERVAL @interval minute) as time, + count(*) as numErrors + FROM %s.%s + WHERE serviceName = @serviceName AND name In [@names] AND timestamp>= @start AND timestamp<= @end AND statusCode=2`, + r.traceDB, r.indexTable, + ) args = []interface{}{} + args = append(args, namedArgs...) args, errStatus = buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) if errStatus != nil { return nil, errStatus @@ -784,10 +856,10 @@ func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams * query += " GROUP BY time ORDER BY time DESC" err = r.db.Select(ctx, &serviceErrorItems, query, args...) - zap.S().Info(query) + zap.S().Debug(query) if err != nil { - zap.S().Debug("Error in processing sql query: ", err) + zap.S().Error("Error in processing sql query: ", err) return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} } @@ -1528,45 +1600,67 @@ func (r *ClickHouseReader) GetTagValues(ctx context.Context, queryParams *model. return &cleanedTagValues, nil } -func (r *ClickHouseReader) GetTopEndpoints(ctx context.Context, queryParams *model.GetTopEndpointsParams) (*[]model.TopEndpointsItem, *model.ApiError) { +func (r *ClickHouseReader) GetTopOperations(ctx context.Context, queryParams *model.GetTopOperationsParams) (*[]model.TopOperationsItem, *model.ApiError) { - var topEndpointsItems []model.TopEndpointsItem + namedArgs := []interface{}{ + clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), + clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)), + clickhouse.Named("serviceName", queryParams.ServiceName), + } - query := fmt.Sprintf("SELECT quantile(0.5)(durationNano) as p50, quantile(0.95)(durationNano) as p95, quantile(0.99)(durationNano) as p99, COUNT(1) as numCalls, name FROM %s.%s WHERE timestamp >= '%s' AND timestamp <= '%s' AND kind='2' and serviceName='%s'", r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10), queryParams.ServiceName) + var topOperationsItems []model.TopOperationsItem + + query := fmt.Sprintf(` + SELECT + quantile(0.5)(durationNano) as p50, + quantile(0.95)(durationNano) as p95, + quantile(0.99)(durationNano) as p99, + COUNT(*) as numCalls, + name + FROM %s.%s + WHERE serviceName = @serviceName AND timestamp>= @start AND timestamp<= @end`, + r.traceDB, r.indexTable, + ) args := []interface{}{} + args = append(args, namedArgs...) args, errStatus := buildQueryWithTagParams(ctx, queryParams.Tags, &query, args) if errStatus != nil { return nil, errStatus } - query += " GROUP BY name" - err := r.db.Select(ctx, &topEndpointsItems, query, args...) + query += " GROUP BY name ORDER BY p99 DESC LIMIT 10" + err := r.db.Select(ctx, &topOperationsItems, query, args...) - zap.S().Info(query) + zap.S().Debug(query) if err != nil { - zap.S().Debug("Error in processing sql query: ", err) + zap.S().Error("Error in processing sql query: ", err) return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} } - if topEndpointsItems == nil { - topEndpointsItems = []model.TopEndpointsItem{} + if topOperationsItems == nil { + topOperationsItems = []model.TopOperationsItem{} } - return &topEndpointsItems, nil + return &topOperationsItems, nil } func (r *ClickHouseReader) GetUsage(ctx context.Context, queryParams *model.GetUsageParams) (*[]model.UsageItem, error) { var usageItems []model.UsageItem - + namedArgs := []interface{}{ + clickhouse.Named("interval", queryParams.StepHour), + clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), + clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)), + } var query string if len(queryParams.ServiceName) != 0 { - query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %d HOUR) as time, count(1) as count FROM %s.%s WHERE serviceName='%s' AND timestamp>='%s' AND timestamp<='%s' GROUP BY time ORDER BY time ASC", queryParams.StepHour, r.traceDB, r.indexTable, queryParams.ServiceName, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10)) + namedArgs = append(namedArgs, clickhouse.Named("serviceName", queryParams.ServiceName)) + query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL @interval HOUR) as time, sum(count) as count FROM %s.%s WHERE service_name=@serviceName AND timestamp>=@start AND timestamp<=@end GROUP BY time ORDER BY time ASC", r.traceDB, r.usageExplorerTable) } else { - query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %d HOUR) as time, count(1) as count FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s' GROUP BY time ORDER BY time ASC", queryParams.StepHour, r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10)) + query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL @interval HOUR) as time, sum(count) as count FROM %s.%s WHERE timestamp>=@start AND timestamp<=@end GROUP BY time ORDER BY time ASC", r.traceDB, r.usageExplorerTable) } - err := r.db.Select(ctx, &usageItems, query) + err := r.db.Select(ctx, &usageItems, query, namedArgs...) zap.S().Info(query) @@ -1626,48 +1720,50 @@ func interfaceArrayToStringArray(array []interface{}) []string { return strArray } -func (r *ClickHouseReader) GetServiceMapDependencies(ctx context.Context, queryParams *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error) { - serviceMapDependencyItems := []model.ServiceMapDependencyItem{} +func (r *ClickHouseReader) GetDependencyGraph(ctx context.Context, queryParams *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error) { - query := fmt.Sprintf(`SELECT spanID, parentSpanID, serviceName FROM %s.%s WHERE timestamp>='%s' AND timestamp<='%s'`, r.traceDB, r.indexTable, strconv.FormatInt(queryParams.Start.UnixNano(), 10), strconv.FormatInt(queryParams.End.UnixNano(), 10)) + response := []model.ServiceMapDependencyResponseItem{} - err := r.db.Select(ctx, &serviceMapDependencyItems, query) + args := []interface{}{} + args = append(args, + clickhouse.Named("start", uint64(queryParams.Start.Unix())), + clickhouse.Named("end", uint64(queryParams.End.Unix())), + clickhouse.Named("duration", uint64(queryParams.End.Unix()-queryParams.Start.Unix())), + ) - zap.S().Info(query) + query := fmt.Sprintf(` + WITH + quantilesMergeState(0.5, 0.75, 0.9, 0.95, 0.99)(duration_quantiles_state) AS duration_quantiles_state, + finalizeAggregation(duration_quantiles_state) AS result + SELECT + src as parent, + dest as child, + result[1] AS p50, + result[2] AS p75, + result[3] AS p90, + result[4] AS p95, + result[5] AS p99, + sum(total_count) as callCount, + sum(total_count)/ @duration AS callRate, + sum(error_count)/sum(total_count) as errorRate + FROM %s.%s + WHERE toUInt64(toDateTime(timestamp)) >= @start AND toUInt64(toDateTime(timestamp)) <= @end + GROUP BY + src, + dest`, + r.traceDB, r.dependencyGraphTable, + ) + + zap.S().Debug(query, args) + + err := r.db.Select(ctx, &response, query, args...) if err != nil { - zap.S().Debug("Error in processing sql query: ", err) + zap.S().Error("Error in processing sql query: ", err) return nil, fmt.Errorf("Error in processing sql query") } - serviceMap := make(map[string]*model.ServiceMapDependencyResponseItem) - - spanId2ServiceNameMap := make(map[string]string) - for i := range serviceMapDependencyItems { - spanId2ServiceNameMap[serviceMapDependencyItems[i].SpanId] = serviceMapDependencyItems[i].ServiceName - } - for i := range serviceMapDependencyItems { - parent2childServiceName := spanId2ServiceNameMap[serviceMapDependencyItems[i].ParentSpanId] + "-" + spanId2ServiceNameMap[serviceMapDependencyItems[i].SpanId] - if _, ok := serviceMap[parent2childServiceName]; !ok { - serviceMap[parent2childServiceName] = &model.ServiceMapDependencyResponseItem{ - Parent: spanId2ServiceNameMap[serviceMapDependencyItems[i].ParentSpanId], - Child: spanId2ServiceNameMap[serviceMapDependencyItems[i].SpanId], - CallCount: 1, - } - } else { - serviceMap[parent2childServiceName].CallCount++ - } - } - - retMe := make([]model.ServiceMapDependencyResponseItem, 0, len(serviceMap)) - for _, dependency := range serviceMap { - if dependency.Parent == "" { - continue - } - retMe = append(retMe, *dependency) - } - - return &retMe, nil + return &response, nil } func (r *ClickHouseReader) GetFilteredSpansAggregates(ctx context.Context, queryParams *model.GetFilteredSpanAggregatesParams) (*model.GetFilteredSpansAggregatesResponse, *model.ApiError) { @@ -1907,7 +2003,7 @@ func (r *ClickHouseReader) SetTTL(ctx context.Context, switch params.Type { case constants.TraceTTL: - tableNameArray := []string{signozTraceDBName + "." + signozTraceTableName, signozTraceDBName + "." + signozDurationMVTable, signozTraceDBName + "." + signozSpansTable, signozTraceDBName + "." + signozErrorIndexTable} + tableNameArray := []string{signozTraceDBName + "." + signozTraceTableName, signozTraceDBName + "." + signozDurationMVTable, signozTraceDBName + "." + signozSpansTable, signozTraceDBName + "." + signozErrorIndexTable, signozTraceDBName + "." + signozUsageExplorerTable, signozTraceDBName + "." + defaultDependencyGraphTable} for _, tableName = range tableNameArray { statusItem, err := r.checkTTLStatusItem(ctx, tableName) if err != nil { @@ -2253,7 +2349,7 @@ func (r *ClickHouseReader) GetTTL(ctx context.Context, ttlParams *model.GetTTLPa switch ttlParams.Type { case constants.TraceTTL: - tableNameArray := []string{signozTraceDBName + "." + signozTraceTableName, signozTraceDBName + "." + signozDurationMVTable, signozTraceDBName + "." + signozSpansTable, signozTraceDBName + "." + signozErrorIndexTable} + tableNameArray := []string{signozTraceDBName + "." + signozTraceTableName, signozTraceDBName + "." + signozDurationMVTable, signozTraceDBName + "." + signozSpansTable, signozTraceDBName + "." + signozErrorIndexTable, signozTraceDBName + "." + signozUsageExplorerTable, signozTraceDBName + "." + defaultDependencyGraphTable} status, err := r.setTTLQueryStatus(ctx, tableNameArray) if err != nil { return nil, err diff --git a/pkg/query-service/app/http_handler.go b/pkg/query-service/app/http_handler.go index c90893a55d..ae90926421 100644 --- a/pkg/query-service/app/http_handler.go +++ b/pkg/query-service/app/http_handler.go @@ -311,6 +311,7 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router) { router.HandleFunc("/api/v1/rules", EditAccess(aH.createRule)).Methods(http.MethodPost) router.HandleFunc("/api/v1/rules/{id}", EditAccess(aH.editRule)).Methods(http.MethodPut) router.HandleFunc("/api/v1/rules/{id}", EditAccess(aH.deleteRule)).Methods(http.MethodDelete) + router.HandleFunc("/api/v1/rules/{id}", EditAccess(aH.patchRule)).Methods(http.MethodPatch) router.HandleFunc("/api/v1/dashboards", ViewAccess(aH.getDashboards)).Methods(http.MethodGet) router.HandleFunc("/api/v1/dashboards", EditAccess(aH.createDashboards)).Methods(http.MethodPost) @@ -323,10 +324,11 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router) { router.HandleFunc("/api/v1/services", ViewAccess(aH.getServices)).Methods(http.MethodPost) router.HandleFunc("/api/v1/services/list", aH.getServicesList).Methods(http.MethodGet) router.HandleFunc("/api/v1/service/overview", ViewAccess(aH.getServiceOverview)).Methods(http.MethodPost) - router.HandleFunc("/api/v1/service/top_endpoints", ViewAccess(aH.getTopEndpoints)).Methods(http.MethodPost) + router.HandleFunc("/api/v1/service/top_operations", ViewAccess(aH.getTopOperations)).Methods(http.MethodPost) + router.HandleFunc("/api/v1/service/top_level_operations", ViewAccess(aH.getServicesTopLevelOps)).Methods(http.MethodPost) router.HandleFunc("/api/v1/traces/{traceId}", ViewAccess(aH.searchTraces)).Methods(http.MethodGet) router.HandleFunc("/api/v1/usage", ViewAccess(aH.getUsage)).Methods(http.MethodGet) - router.HandleFunc("/api/v1/serviceMapDependencies", ViewAccess(aH.serviceMapDependencies)).Methods(http.MethodPost) + router.HandleFunc("/api/v1/dependency_graph", ViewAccess(aH.dependencyGraph)).Methods(http.MethodPost) router.HandleFunc("/api/v1/settings/ttl", AdminAccess(aH.setTTL)).Methods(http.MethodPost) router.HandleFunc("/api/v1/settings/ttl", ViewAccess(aH.getTTL)).Methods(http.MethodGet) @@ -786,6 +788,28 @@ func (aH *APIHandler) deleteRule(w http.ResponseWriter, r *http.Request) { } +// patchRule updates only requested changes in the rule +func (aH *APIHandler) patchRule(w http.ResponseWriter, r *http.Request) { + id := mux.Vars(r)["id"] + + defer r.Body.Close() + body, err := ioutil.ReadAll(r.Body) + if err != nil { + zap.S().Errorf("msg: error in getting req body of patch rule API\n", "\t error:", err) + respondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil) + return + } + + gettableRule, err := aH.ruleManager.PatchRule(string(body), id) + + if err != nil { + respondError(w, &model.ApiError{Typ: model.ErrorInternal, Err: err}, nil) + return + } + + aH.respond(w, gettableRule) +} + func (aH *APIHandler) editRule(w http.ResponseWriter, r *http.Request) { id := mux.Vars(r)["id"] @@ -1082,14 +1106,14 @@ func (aH *APIHandler) submitFeedback(w http.ResponseWriter, r *http.Request) { } -func (aH *APIHandler) getTopEndpoints(w http.ResponseWriter, r *http.Request) { +func (aH *APIHandler) getTopOperations(w http.ResponseWriter, r *http.Request) { - query, err := parseGetTopEndpointsRequest(r) + query, err := parseGetTopOperationsRequest(r) if aH.handleError(w, err, http.StatusBadRequest) { return } - result, apiErr := (*aH.reader).GetTopEndpoints(r.Context(), query) + result, apiErr := (*aH.reader).GetTopOperations(r.Context(), query) if apiErr != nil && aH.handleError(w, apiErr.Err, http.StatusInternalServerError) { return @@ -1131,6 +1155,17 @@ func (aH *APIHandler) getServiceOverview(w http.ResponseWriter, r *http.Request) } +func (aH *APIHandler) getServicesTopLevelOps(w http.ResponseWriter, r *http.Request) { + + result, apiErr := (*aH.reader).GetTopLevelOperations(r.Context()) + if apiErr != nil { + respondError(w, apiErr, nil) + return + } + + aH.writeJSON(w, r, result) +} + func (aH *APIHandler) getServices(w http.ResponseWriter, r *http.Request) { query, err := parseGetServicesRequest(r) @@ -1152,14 +1187,14 @@ func (aH *APIHandler) getServices(w http.ResponseWriter, r *http.Request) { aH.writeJSON(w, r, result) } -func (aH *APIHandler) serviceMapDependencies(w http.ResponseWriter, r *http.Request) { +func (aH *APIHandler) dependencyGraph(w http.ResponseWriter, r *http.Request) { query, err := parseGetServicesRequest(r) if aH.handleError(w, err, http.StatusBadRequest) { return } - result, err := (*aH.reader).GetServiceMapDependencies(r.Context(), query) + result, err := (*aH.reader).GetDependencyGraph(r.Context(), query) if aH.handleError(w, err, http.StatusBadRequest) { return } diff --git a/pkg/query-service/app/parser.go b/pkg/query-service/app/parser.go index ddd0ebd42d..1e2935c06f 100644 --- a/pkg/query-service/app/parser.go +++ b/pkg/query-service/app/parser.go @@ -32,8 +32,8 @@ func parseUser(r *http.Request) (*model.User, error) { return &user, nil } -func parseGetTopEndpointsRequest(r *http.Request) (*model.GetTopEndpointsParams, error) { - var postData *model.GetTopEndpointsParams +func parseGetTopOperationsRequest(r *http.Request) (*model.GetTopOperationsParams, error) { + var postData *model.GetTopOperationsParams err := json.NewDecoder(r.Body).Decode(&postData) if err != nil { diff --git a/pkg/query-service/app/server.go b/pkg/query-service/app/server.go index b2fbe2df5a..dd00aea804 100644 --- a/pkg/query-service/app/server.go +++ b/pkg/query-service/app/server.go @@ -140,7 +140,7 @@ func (s *Server) createPrivateServer(api *APIHandler) (*http.Server, error) { //todo(amol): find out a way to add exact domain or // ip here for alert manager AllowedOrigins: []string{"*"}, - AllowedMethods: []string{"GET", "DELETE", "POST", "PUT"}, + AllowedMethods: []string{"GET", "DELETE", "POST", "PUT", "PATCH"}, AllowedHeaders: []string{"Accept", "Authorization", "Content-Type"}, }) @@ -166,7 +166,7 @@ func (s *Server) createPublicServer(api *APIHandler) (*http.Server, error) { c := cors.New(cors.Options{ AllowedOrigins: []string{"*"}, - AllowedMethods: []string{"GET", "DELETE", "POST", "PUT", "OPTIONS"}, + AllowedMethods: []string{"GET", "DELETE", "POST", "PUT", "PATCH", "OPTIONS"}, AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "cache-control"}, }) diff --git a/pkg/query-service/config/prometheus.yml b/pkg/query-service/config/prometheus.yml index c515a46662..d7c0ce6911 100644 --- a/pkg/query-service/config/prometheus.yml +++ b/pkg/query-service/config/prometheus.yml @@ -23,4 +23,4 @@ scrape_configs: remote_read: - - url: tcp://localhost:9001/?database=signoz_metrics + - url: tcp://localhost:9000/?database=signoz_metrics diff --git a/pkg/query-service/go.mod b/pkg/query-service/go.mod index 1d5dfa8e8d..34ddceace9 100644 --- a/pkg/query-service/go.mod +++ b/pkg/query-service/go.mod @@ -11,7 +11,7 @@ require ( github.com/gorilla/mux v1.8.0 github.com/gosimple/slug v1.10.0 github.com/jmoiron/sqlx v1.3.4 - github.com/json-iterator/go v1.1.10 + github.com/json-iterator/go v1.1.12 github.com/mattn/go-sqlite3 v1.14.8 github.com/minio/minio-go/v6 v6.0.57 github.com/oklog/oklog v0.3.2 @@ -92,7 +92,7 @@ require ( github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/go-testing-interface v1.14.1 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223 // indirect github.com/oklog/run v1.1.0 // indirect github.com/oklog/ulid v0.3.1-0.20170117200651-66bb6560562f // indirect diff --git a/pkg/query-service/go.sum b/pkg/query-service/go.sum index d69fb82481..4fbb2b1c25 100644 --- a/pkg/query-service/go.sum +++ b/pkg/query-service/go.sum @@ -295,6 +295,8 @@ github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3/go.mod h1:+SdeFBv github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= @@ -343,6 +345,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223 h1:F9x/1yl3T2AeKLr2AMdilSD8+f9bvMnNN8VS5iDtovc= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/oklog/oklog v0.2.3-0.20170918173356-f857583a70c3/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= diff --git a/pkg/query-service/integrations/alertManager/model.go b/pkg/query-service/integrations/alertManager/model.go index bb709e430f..19371a9bfd 100644 --- a/pkg/query-service/integrations/alertManager/model.go +++ b/pkg/query-service/integrations/alertManager/model.go @@ -40,6 +40,8 @@ type Alert struct { StartsAt time.Time `json:"startsAt,omitempty"` EndsAt time.Time `json:"endsAt,omitempty"` GeneratorURL string `json:"generatorURL,omitempty"` + + Receivers []string `json:"receivers,omitempty"` } // Name returns the name of the alert. It is equivalent to the "alertname" label. @@ -53,7 +55,7 @@ func (a *Alert) Hash() uint64 { } func (a *Alert) String() string { - s := fmt.Sprintf("%s[%s]", a.Name(), fmt.Sprintf("%016x", a.Hash())[:7]) + s := fmt.Sprintf("%s[%s][%s]", a.Name(), fmt.Sprintf("%016x", a.Hash())[:7], a.Receivers) if a.Resolved() { return s + "[resolved]" } diff --git a/pkg/query-service/interfaces/interface.go b/pkg/query-service/interfaces/interface.go index c96a1af470..a2dae6df01 100644 --- a/pkg/query-service/interfaces/interface.go +++ b/pkg/query-service/interfaces/interface.go @@ -20,11 +20,13 @@ type Reader interface { GetInstantQueryMetricsResult(ctx context.Context, query *model.InstantQueryMetricsParams) (*promql.Result, *stats.QueryStats, *model.ApiError) GetQueryRangeResult(ctx context.Context, query *model.QueryRangeParams) (*promql.Result, *stats.QueryStats, *model.ApiError) GetServiceOverview(ctx context.Context, query *model.GetServiceOverviewParams) (*[]model.ServiceOverviewItem, *model.ApiError) + GetTopLevelOperations(ctx context.Context) (*map[string][]string, *model.ApiError) GetServices(ctx context.Context, query *model.GetServicesParams) (*[]model.ServiceItem, *model.ApiError) - GetTopEndpoints(ctx context.Context, query *model.GetTopEndpointsParams) (*[]model.TopEndpointsItem, *model.ApiError) + GetTopOperations(ctx context.Context, query *model.GetTopOperationsParams) (*[]model.TopOperationsItem, *model.ApiError) GetUsage(ctx context.Context, query *model.GetUsageParams) (*[]model.UsageItem, error) GetServicesList(ctx context.Context) (*[]string, error) - GetServiceMapDependencies(ctx context.Context, query *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error) + GetDependencyGraph(ctx context.Context, query *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error) + GetTTL(ctx context.Context, ttlParams *model.GetTTLParams) (*model.GetTTLResponseItem, *model.ApiError) // GetDisks returns a list of disks configured in the underlying DB. It is supported by diff --git a/pkg/query-service/model/queryParams.go b/pkg/query-service/model/queryParams.go index f020a429ea..c8cfa2dcee 100644 --- a/pkg/query-service/model/queryParams.go +++ b/pkg/query-service/model/queryParams.go @@ -135,7 +135,7 @@ type MetricAutocompleteTagParams struct { TagKey string } -type GetTopEndpointsParams struct { +type GetTopOperationsParams struct { StartTime string `json:"start"` EndTime string `json:"end"` ServiceName string `json:"service"` diff --git a/pkg/query-service/model/response.go b/pkg/query-service/model/response.go index feca7de5aa..54ddbde620 100644 --- a/pkg/query-service/model/response.go +++ b/pkg/query-service/model/response.go @@ -3,6 +3,7 @@ package model import ( "encoding/json" "fmt" + "math" "strconv" "time" @@ -206,19 +207,13 @@ func (item *SearchSpanReponseItem) GetValues() []interface{} { return returnArray } -type ServiceMapDependencyItem struct { - SpanId string `json:"spanId,omitempty" ch:"spanID"` - ParentSpanId string `json:"parentSpanId,omitempty" ch:"parentSpanID"` - ServiceName string `json:"serviceName,omitempty" ch:"serviceName"` -} - type UsageItem struct { Time time.Time `json:"time,omitempty" ch:"time"` Timestamp uint64 `json:"timestamp" ch:"timestamp"` Count uint64 `json:"count" ch:"count"` } -type TopEndpointsItem struct { +type TopOperationsItem struct { Percentile50 float64 `json:"p50" ch:"p50"` Percentile95 float64 `json:"p95" ch:"p95"` Percentile99 float64 `json:"p99" ch:"p99"` @@ -233,10 +228,18 @@ type TagFilters struct { type TagValues struct { TagValues string `json:"tagValues" ch:"tagValues"` } + type ServiceMapDependencyResponseItem struct { - Parent string `json:"parent,omitempty" ch:"parent"` - Child string `json:"child,omitempty" ch:"child"` - CallCount int `json:"callCount,omitempty" ch:"callCount"` + Parent string `json:"parent" ch:"parent"` + Child string `json:"child" ch:"child"` + CallCount uint64 `json:"callCount" ch:"callCount"` + CallRate float64 `json:"callRate" ch:"callRate"` + ErrorRate float64 `json:"errorRate" ch:"errorRate"` + P99 float64 `json:"p99" ch:"p99"` + P95 float64 `json:"p95" ch:"p95"` + P90 float64 `json:"p90" ch:"p90"` + P75 float64 `json:"p75" ch:"p75"` + P50 float64 `json:"p50" ch:"p50"` } type GetFilteredSpansAggregatesResponse struct { @@ -462,3 +465,30 @@ type LogsAggregatesDBResponseItem struct { Value float64 `ch:"value"` GroupBy string `ch:"groupBy"` } + +// MarshalJSON implements json.Marshaler. +func (s *ServiceItem) MarshalJSON() ([]byte, error) { + // If a service didn't not send any data in the last interval duration + // it's values such as 99th percentile will return as NaN and + // json encoding doesn't support NaN + // We still want to show it in the UI, so we'll replace NaN with 0 + type Alias ServiceItem + if math.IsInf(s.AvgDuration, 0) || math.IsNaN(s.AvgDuration) { + s.AvgDuration = 0 + } + if math.IsInf(s.CallRate, 0) || math.IsNaN(s.CallRate) { + s.CallRate = 0 + } + if math.IsInf(s.ErrorRate, 0) || math.IsNaN(s.ErrorRate) { + s.ErrorRate = 0 + } + if math.IsInf(s.Percentile99, 0) || math.IsNaN(s.Percentile99) { + s.Percentile99 = 0 + } + + return json.Marshal(&struct { + *Alias + }{ + Alias: (*Alias)(s), + }) +} diff --git a/pkg/query-service/rules/alerting.go b/pkg/query-service/rules/alerting.go index a4768b4036..ecd7205557 100644 --- a/pkg/query-service/rules/alerting.go +++ b/pkg/query-service/rules/alerting.go @@ -2,12 +2,18 @@ package rules import ( "encoding/json" + "fmt" "github.com/pkg/errors" "go.signoz.io/query-service/model" "go.signoz.io/query-service/utils/labels" + "net/url" + "strings" "time" ) +// this file contains common structs and methods used by +// rule engine + // how long before re-sending the alert const resolvedRetention = 15 * time.Minute @@ -41,6 +47,7 @@ const ( StateInactive AlertState = iota StatePending StateFiring + StateDisabled ) func (s AlertState) String() string { @@ -51,6 +58,8 @@ func (s AlertState) String() string { return "pending" case StateFiring: return "firing" + case StateDisabled: + return "disabled" } panic(errors.Errorf("unknown alert state: %d", s)) } @@ -63,6 +72,9 @@ type Alert struct { GeneratorURL string + // list of preferred receivers, e.g. slack + Receivers []string + Value float64 ActiveAt time.Time FiredAt time.Time @@ -71,7 +83,6 @@ type Alert struct { ValidUntil time.Time } -// todo(amol): need to review this with ankit func (a *Alert) needsSending(ts time.Time, resendDelay time.Duration) bool { if a.State == StatePending { return false @@ -198,3 +209,30 @@ func (d *Duration) UnmarshalJSON(b []byte) error { return errors.New("invalid duration") } } + +// prepareRuleGeneratorURL creates an appropriate url +// for the rule. the URL is sent in slack messages as well as +// to other systems and allows backtracking to the rule definition +// from the third party systems. +func prepareRuleGeneratorURL(ruleId string, source string) string { + if source == "" { + return source + } + + // check if source is a valid url + _, err := url.Parse(source) + if err != nil { + return "" + } + // since we capture window.location when a new rule is created + // we end up with rulesource host:port/alerts/new. in this case + // we want to replace new with rule id parameter + + hasNew := strings.LastIndex(source, "new") + if hasNew > -1 { + ruleURL := fmt.Sprintf("%sedit?ruleId=%s", source[0:hasNew], ruleId) + return ruleURL + } + + return source +} diff --git a/pkg/query-service/rules/apiParams.go b/pkg/query-service/rules/apiParams.go index 6f3b466d11..b88fa98fb6 100644 --- a/pkg/query-service/rules/apiParams.go +++ b/pkg/query-service/rules/apiParams.go @@ -30,9 +30,13 @@ type PostableRule struct { Labels map[string]string `yaml:"labels,omitempty" json:"labels,omitempty"` Annotations map[string]string `yaml:"annotations,omitempty" json:"annotations,omitempty"` + Disabled bool `json:"disabled"` + // Source captures the source url where rule has been created Source string `json:"source,omitempty"` + PreferredChannels []string `json:"preferredChannels,omitempty"` + // legacy Expr string `yaml:"expr,omitempty" json:"expr,omitempty"` OldYaml string `json:"yaml,omitempty"` @@ -43,16 +47,23 @@ func ParsePostableRule(content []byte) (*PostableRule, []error) { } func parsePostableRule(content []byte, kind string) (*PostableRule, []error) { - rule := PostableRule{} + return parseIntoRule(PostableRule{}, content, kind) +} + +// parseIntoRule loads the content (data) into PostableRule and also +// validates the end result +func parseIntoRule(initRule PostableRule, content []byte, kind string) (*PostableRule, []error) { + + rule := &initRule var err error if kind == "json" { - if err = json.Unmarshal(content, &rule); err != nil { + if err = json.Unmarshal(content, rule); err != nil { zap.S().Debugf("postable rule content", string(content), "\t kind:", kind) return nil, []error{fmt.Errorf("failed to load json")} } } else if kind == "yaml" { - if err = yaml.Unmarshal(content, &rule); err != nil { + if err = yaml.Unmarshal(content, rule); err != nil { zap.S().Debugf("postable rule content", string(content), "\t kind:", kind) return nil, []error{fmt.Errorf("failed to load yaml")} } @@ -105,7 +116,8 @@ func parsePostableRule(content []byte, kind string) (*PostableRule, []error) { if errs := rule.Validate(); len(errs) > 0 { return nil, errs } - return &rule, []error{} + + return rule, []error{} } func isValidLabelName(ln string) bool { @@ -213,18 +225,7 @@ type GettableRules struct { // GettableRule has info for an alerting rules. type GettableRule struct { - Labels map[string]string `json:"labels"` - Annotations map[string]string `json:"annotations"` - State string `json:"state"` - Alert string `json:"alert"` - // Description string `yaml:"description,omitempty" json:"description,omitempty"` - - Id string `json:"id"` - RuleType RuleType `yaml:"ruleType,omitempty" json:"ruleType,omitempty"` - EvalWindow Duration `yaml:"evalWindow,omitempty" json:"evalWindow,omitempty"` - Frequency Duration `yaml:"frequency,omitempty" json:"frequency,omitempty"` - RuleCondition RuleCondition `yaml:"condition,omitempty" json:"condition,omitempty"` - - // ActiveAt *time.Time `json:"activeAt,omitempty"` - // Value float64 `json:"value"` + Id string `json:"id"` + State string `json:"state"` + PostableRule } diff --git a/pkg/query-service/rules/manager.go b/pkg/query-service/rules/manager.go index 9a040fdf74..6494320043 100644 --- a/pkg/query-service/rules/manager.go +++ b/pkg/query-service/rules/manager.go @@ -29,8 +29,16 @@ func ruleIdFromTaskName(n string) string { return strings.Split(n, "-groupname")[0] } -func prepareTaskName(ruleId int64) string { - return fmt.Sprintf("%d-groupname", ruleId) +func prepareTaskName(ruleId interface{}) string { + switch ruleId.(type) { + case int, int64: + return fmt.Sprintf("%d-groupname", ruleId) + case string: + return fmt.Sprintf("%s-groupname", ruleId) + default: + return fmt.Sprintf("%v-groupname", ruleId) + } + } // ManagerOptions bundles options for the Manager. @@ -170,10 +178,11 @@ func (m *Manager) initiate() error { continue } } - - err := m.addTask(parsedRule, taskName) - if err != nil { - zap.S().Errorf("failed to load the rule definition (%s): %v", taskName, err) + if !parsedRule.Disabled { + err := m.addTask(parsedRule, taskName) + if err != nil { + zap.S().Errorf("failed to load the rule definition (%s): %v", taskName, err) + } } } @@ -206,7 +215,7 @@ func (m *Manager) Stop() { // EditRuleDefinition writes the rule definition to the // datastore and also updates the rule executor func (m *Manager) EditRule(ruleStr string, id string) error { - // todo(amol): fetch recent rule from db first + parsedRule, errs := ParsePostableRule([]byte(ruleStr)) if len(errs) > 0 { @@ -221,16 +230,9 @@ func (m *Manager) EditRule(ruleStr string, id string) error { } if !m.opts.DisableRules { - err = m.editTask(parsedRule, taskName) - if err != nil { - // todo(amol): using tx with sqllite3 is gets - // database locked. need to research and resolve this - //tx.Rollback() - return err - } + return m.syncRuleStateWithTask(taskName, parsedRule) } - // return tx.Commit() return nil } @@ -249,8 +251,7 @@ func (m *Manager) editTask(rule *PostableRule, taskName string) error { // it to finish the current iteration. Then copy it into the new group. oldTask, ok := m.tasks[taskName] if !ok { - zap.S().Errorf("msg:", "rule task not found, edit task failed", "\t task name:", taskName) - return errors.New("rule task not found, edit task failed") + zap.S().Warnf("msg:", "rule task not found, a new task will be created ", "\t task name:", taskName) } delete(m.tasks, taskName) @@ -281,10 +282,7 @@ func (m *Manager) DeleteRule(id string) error { taskName := prepareTaskName(int64(idInt)) if !m.opts.DisableRules { - if err := m.deleteTask(taskName); err != nil { - zap.S().Errorf("msg: ", "failed to unload the rule task from memory, please retry", "\t ruleid: ", id) - return err - } + m.deleteTask(taskName) } if _, _, err := m.ruleDB.DeleteRuleTx(id); err != nil { @@ -295,7 +293,7 @@ func (m *Manager) DeleteRule(id string) error { return nil } -func (m *Manager) deleteTask(taskName string) error { +func (m *Manager) deleteTask(taskName string) { m.mtx.Lock() defer m.mtx.Unlock() @@ -305,11 +303,8 @@ func (m *Manager) deleteTask(taskName string) error { delete(m.tasks, taskName) delete(m.rules, ruleIdFromTaskName(taskName)) } else { - zap.S().Errorf("msg:", "rule not found for deletion", "\t name:", taskName) - return fmt.Errorf("rule not found") + zap.S().Info("msg: ", "rule not found for deletion", "\t name:", taskName) } - - return nil } // CreateRule stores rule def into db and also @@ -386,12 +381,7 @@ func (m *Manager) prepareTask(acquireLock bool, r *PostableRule, taskName string // create a threshold rule tr, err := NewThresholdRule( ruleId, - r.Alert, - r.RuleCondition, - time.Duration(r.EvalWindow), - r.Labels, - r.Annotations, - r.Source, + r, ) if err != nil { @@ -411,14 +401,8 @@ func (m *Manager) prepareTask(acquireLock bool, r *PostableRule, taskName string // create promql rule pr, err := NewPromRule( ruleId, - r.Alert, - r.RuleCondition, - time.Duration(r.EvalWindow), - r.Labels, - r.Annotations, - // required as promql engine works with logger and not zap + r, log.With(m.logger, "alert", r.Alert), - r.Source, ) if err != nil { @@ -526,6 +510,7 @@ func (m *Manager) prepareNotifyFunc() NotifyFunc { Labels: alert.Labels, Annotations: alert.Annotations, GeneratorURL: generatorURL, + Receivers: alert.Receivers, } if !alert.ResolvedAt.IsZero() { a.EndsAt = alert.ResolvedAt @@ -555,6 +540,9 @@ func (m *Manager) ListRuleStates() (*GettableRules, error) { // fetch rules from DB storedRules, err := m.ruleDB.GetStoredRules() + if err != nil { + return nil, err + } // initiate response object resp := make([]*GettableRule, 0) @@ -571,7 +559,8 @@ func (m *Manager) ListRuleStates() (*GettableRules, error) { // fetch state of rule from memory if rm, ok := m.rules[ruleResponse.Id]; !ok { - zap.S().Warnf("msg:", "invalid rule id found while fetching list of rules", "\t err:", err, "\t rule_id:", ruleResponse.Id) + ruleResponse.State = StateDisabled.String() + ruleResponse.Disabled = true } else { ruleResponse.State = rm.State().String() } @@ -593,3 +582,104 @@ func (m *Manager) GetRule(id string) (*GettableRule, error) { r.Id = fmt.Sprintf("%d", s.Id) return r, nil } + +// syncRuleStateWithTask ensures that the state of a stored rule matches +// the task state. For example - if a stored rule is disabled, then +// there is no task running against it. +func (m *Manager) syncRuleStateWithTask(taskName string, rule *PostableRule) error { + + if rule.Disabled { + // check if rule has any task running + if _, ok := m.tasks[taskName]; ok { + // delete task from memory + m.deleteTask(taskName) + } + } else { + // check if rule has a task running + if _, ok := m.tasks[taskName]; !ok { + // rule has not task, start one + if err := m.addTask(rule, taskName); err != nil { + return err + } + } + } + return nil +} + +// PatchRule supports attribute level changes to the rule definition unlike +// EditRule, which updates entire rule definition in the DB. +// the process: +// - get the latest rule from db +// - over write the patch attributes received in input (ruleStr) +// - re-deploy or undeploy task as necessary +// - update the patched rule in the DB +func (m *Manager) PatchRule(ruleStr string, ruleId string) (*GettableRule, error) { + + if ruleId == "" { + return nil, fmt.Errorf("id is mandatory for patching rule") + } + + taskName := prepareTaskName(ruleId) + + // retrieve rule from DB + storedJSON, err := m.ruleDB.GetStoredRule(ruleId) + if err != nil { + zap.S().Errorf("msg:", "failed to get stored rule with given id", "\t error:", err) + return nil, err + } + + // storedRule holds the current stored rule from DB + storedRule := PostableRule{} + if err := json.Unmarshal([]byte(storedJSON.Data), &storedRule); err != nil { + zap.S().Errorf("msg:", "failed to get unmarshal stored rule with given id", "\t error:", err) + return nil, err + } + + // patchedRule is combo of stored rule and patch received in the request + patchedRule, errs := parseIntoRule(storedRule, []byte(ruleStr), "json") + if len(errs) > 0 { + zap.S().Errorf("failed to parse rules:", errs) + // just one rule is being parsed so expect just one error + return nil, errs[0] + } + + // deploy or un-deploy task according to patched (new) rule state + if err := m.syncRuleStateWithTask(taskName, patchedRule); err != nil { + zap.S().Errorf("failed to sync stored rule state with the task") + return nil, err + } + + // prepare rule json to write to update db + patchedRuleBytes, err := json.Marshal(patchedRule) + if err != nil { + return nil, err + } + + // write updated rule to db + if _, _, err = m.ruleDB.EditRuleTx(string(patchedRuleBytes), ruleId); err != nil { + // write failed, rollback task state + + // restore task state from the stored rule + if err := m.syncRuleStateWithTask(taskName, &storedRule); err != nil { + zap.S().Errorf("msg: ", "failed to restore rule after patch failure", "\t error:", err) + } + + return nil, err + } + + // prepare http response + response := GettableRule{ + Id: ruleId, + PostableRule: *patchedRule, + } + + // fetch state of rule from memory + if rm, ok := m.rules[ruleId]; !ok { + response.State = StateDisabled.String() + response.Disabled = true + } else { + response.State = rm.State().String() + } + + return &response, nil +} diff --git a/pkg/query-service/rules/promRule.go b/pkg/query-service/rules/promRule.go index 669d6e3845..bb995de73a 100644 --- a/pkg/query-service/rules/promRule.go +++ b/pkg/query-service/rules/promRule.go @@ -29,6 +29,8 @@ type PromRule struct { labels plabels.Labels annotations plabels.Labels + preferredChannels []string + mtx sync.Mutex evaluationDuration time.Duration evaluationTimestamp time.Time @@ -45,38 +47,37 @@ type PromRule struct { func NewPromRule( id string, - name string, - ruleCondition *RuleCondition, - evalWindow time.Duration, - labels, annotations map[string]string, + postableRule *PostableRule, logger log.Logger, - source string, ) (*PromRule, error) { - if int64(evalWindow) == 0 { - evalWindow = 5 * time.Minute - } - - if ruleCondition == nil { + if postableRule.RuleCondition == nil { return nil, fmt.Errorf("no rule condition") - } else if !ruleCondition.IsValid() { + } else if !postableRule.RuleCondition.IsValid() { return nil, fmt.Errorf("invalid rule condition") } - zap.S().Info("msg:", "creating new alerting rule", "\t name:", name, "\t condition:", ruleCondition.String()) + p := PromRule{ + id: id, + name: postableRule.Alert, + source: postableRule.Source, + ruleCondition: postableRule.RuleCondition, + evalWindow: time.Duration(postableRule.EvalWindow), + labels: plabels.FromMap(postableRule.Labels), + annotations: plabels.FromMap(postableRule.Annotations), + preferredChannels: postableRule.PreferredChannels, + health: HealthUnknown, + active: map[uint64]*Alert{}, + logger: logger, + } - return &PromRule{ - id: id, - name: name, - source: source, - ruleCondition: ruleCondition, - evalWindow: evalWindow, - labels: plabels.FromMap(labels), - annotations: plabels.FromMap(annotations), - health: HealthUnknown, - active: map[uint64]*Alert{}, - logger: logger, - }, nil + if int64(p.evalWindow) == 0 { + p.evalWindow = 5 * time.Minute + } + + zap.S().Info("msg:", "creating new alerting rule", "\t name:", p.name, "\t condition:", p.ruleCondition.String()) + + return &p, nil } func (r *PromRule) Name() string { @@ -96,7 +97,11 @@ func (r *PromRule) Type() RuleType { } func (r *PromRule) GeneratorURL() string { - return r.source + return prepareRuleGeneratorURL(r.ID(), r.source) +} + +func (r *PromRule) PreferredChannels() []string { + return r.preferredChannels } func (r *PromRule) SetLastError(err error) { @@ -382,6 +387,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time, queriers *Queriers) ( State: StatePending, Value: smpl.V, GeneratorURL: r.GeneratorURL(), + Receivers: r.preferredChannels, } } @@ -392,6 +398,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time, queriers *Queriers) ( if alert, ok := r.active[h]; ok && alert.State != StateInactive { alert.Value = a.Value alert.Annotations = a.Annotations + alert.Receivers = r.preferredChannels continue } @@ -429,11 +436,12 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time, queriers *Queriers) ( func (r *PromRule) String() string { ar := PostableRule{ - Alert: r.name, - RuleCondition: r.ruleCondition, - EvalWindow: Duration(r.evalWindow), - Labels: r.labels.Map(), - Annotations: r.annotations.Map(), + Alert: r.name, + RuleCondition: r.ruleCondition, + EvalWindow: Duration(r.evalWindow), + Labels: r.labels.Map(), + Annotations: r.annotations.Map(), + PreferredChannels: r.preferredChannels, } byt, err := yaml.Marshal(ar) diff --git a/pkg/query-service/rules/rule.go b/pkg/query-service/rules/rule.go index ba5c934172..9a2ac1bad0 100644 --- a/pkg/query-service/rules/rule.go +++ b/pkg/query-service/rules/rule.go @@ -19,6 +19,8 @@ type Rule interface { State() AlertState ActiveAlerts() []*Alert + PreferredChannels() []string + Eval(context.Context, time.Time, *Queriers) (interface{}, error) String() string // Query() string diff --git a/pkg/query-service/rules/templates.go b/pkg/query-service/rules/templates.go index 4789780ffc..3d9aa518d8 100644 --- a/pkg/query-service/rules/templates.go +++ b/pkg/query-service/rules/templates.go @@ -17,6 +17,9 @@ import ( "go.signoz.io/query-service/utils/times" ) +// this file contains all the methods and structs +// related to go templating in rule labels and annotations + type tmplQueryRecord struct { Labels map[string]string Value float64 diff --git a/pkg/query-service/rules/thresholdRule.go b/pkg/query-service/rules/thresholdRule.go index 8f734c113d..dc0c29be97 100644 --- a/pkg/query-service/rules/thresholdRule.go +++ b/pkg/query-service/rules/thresholdRule.go @@ -32,6 +32,7 @@ type ThresholdRule struct { labels labels.Labels annotations labels.Labels + preferredChannels []string mtx sync.Mutex evaluationDuration time.Duration evaluationTimestamp time.Time @@ -46,37 +47,35 @@ type ThresholdRule struct { func NewThresholdRule( id string, - name string, - ruleCondition *RuleCondition, - evalWindow time.Duration, - l, a map[string]string, - source string, + p *PostableRule, ) (*ThresholdRule, error) { - if int64(evalWindow) == 0 { - evalWindow = 5 * time.Minute - } - - if ruleCondition == nil { + if p.RuleCondition == nil { return nil, fmt.Errorf("no rule condition") - } else if !ruleCondition.IsValid() { + } else if !p.RuleCondition.IsValid() { return nil, fmt.Errorf("invalid rule condition") } - zap.S().Info("msg:", "creating new alerting rule", "\t name:", name, "\t condition:", ruleCondition.String()) + t := ThresholdRule{ + id: id, + name: p.Alert, + source: p.Source, + ruleCondition: p.RuleCondition, + evalWindow: time.Duration(p.EvalWindow), + labels: labels.FromMap(p.Labels), + annotations: labels.FromMap(p.Annotations), + preferredChannels: p.PreferredChannels, + health: HealthUnknown, + active: map[uint64]*Alert{}, + } - return &ThresholdRule{ - id: id, - name: name, - source: source, - ruleCondition: ruleCondition, - evalWindow: evalWindow, - labels: labels.FromMap(l), - annotations: labels.FromMap(a), + if int64(t.evalWindow) == 0 { + t.evalWindow = 5 * time.Minute + } - health: HealthUnknown, - active: map[uint64]*Alert{}, - }, nil + zap.S().Info("msg:", "creating new alerting rule", "\t name:", t.name, "\t condition:", t.ruleCondition.String(), "\t generatorURL:", t.GeneratorURL()) + + return &t, nil } func (r *ThresholdRule) Name() string { @@ -92,7 +91,11 @@ func (r *ThresholdRule) Condition() *RuleCondition { } func (r *ThresholdRule) GeneratorURL() string { - return r.source + return prepareRuleGeneratorURL(r.ID(), r.source) +} + +func (r *ThresholdRule) PreferredChannels() []string { + return r.preferredChannels } func (r *ThresholdRule) target() *float64 { @@ -231,9 +234,9 @@ func (r *ThresholdRule) GetEvaluationTimestamp() time.Time { // State returns the maximum state of alert instances for this rule. // StateFiring > StatePending > StateInactive func (r *ThresholdRule) State() AlertState { + r.mtx.Lock() defer r.mtx.Unlock() - maxState := StateInactive for _, a := range r.active { if a.State > maxState { @@ -477,6 +480,7 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer } } } + zap.S().Debugf("ruleid:", r.ID(), "\t resultmap(potential alerts):", len(resultMap)) for _, sample := range resultMap { // check alert rule condition before dumping results @@ -484,7 +488,7 @@ func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, quer result = append(result, sample) } } - + zap.S().Debugf("ruleid:", r.ID(), "\t result (found alerts):", len(result)) return result, nil } @@ -613,6 +617,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Querie State: StatePending, Value: smpl.V, GeneratorURL: r.GeneratorURL(), + Receivers: r.preferredChannels, } } @@ -626,6 +631,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Querie alert.Value = a.Value alert.Annotations = a.Annotations + alert.Receivers = r.preferredChannels continue } @@ -663,11 +669,12 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Querie func (r *ThresholdRule) String() string { ar := PostableRule{ - Alert: r.name, - RuleCondition: r.ruleCondition, - EvalWindow: Duration(r.evalWindow), - Labels: r.labels.Map(), - Annotations: r.annotations.Map(), + Alert: r.name, + RuleCondition: r.ruleCondition, + EvalWindow: Duration(r.evalWindow), + Labels: r.labels.Map(), + Annotations: r.annotations.Map(), + PreferredChannels: r.preferredChannels, } byt, err := yaml.Marshal(ar)