diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml new file mode 100644 index 0000000000..053a8733dc --- /dev/null +++ b/.github/workflows/dependency-review.yml @@ -0,0 +1,22 @@ +# Dependency Review Action +# +# This Action will scan dependency manifest files that change as part of a Pull Request, surfacing known-vulnerable versions of the packages declared or updated in the PR. Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable packages will be blocked from merging. +# +# Source repository: https://github.com/actions/dependency-review-action +# Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement +name: 'Dependency Review' +on: [pull_request] + +permissions: + contents: read + +jobs: + dependency-review: + runs-on: ubuntu-latest + steps: + - name: 'Checkout Repository' + uses: actions/checkout@v3 + - name: 'Dependency Review' + with: + fail-on-severity: high + uses: actions/dependency-review-action@v2 diff --git a/Makefile b/Makefile index ac93167fa7..7aaa3a41d6 100644 --- a/Makefile +++ b/Makefile @@ -82,15 +82,9 @@ dev-setup: run-x86: @docker-compose -f $(STANDALONE_DIRECTORY)/docker-compose.yaml up -d -run-arm: - @docker-compose -f $(STANDALONE_DIRECTORY)/docker-compose.arm.yaml up -d - down-x86: @docker-compose -f $(STANDALONE_DIRECTORY)/docker-compose.yaml down -v -down-arm: - @docker-compose -f $(STANDALONE_DIRECTORY)/docker-compose.arm.yaml down -v - clear-standalone-data: @docker run --rm -v "$(PWD)/$(STANDALONE_DIRECTORY)/data:/pwd" busybox \ sh -c "cd /pwd && rm -rf alertmanager/* clickhouse/* signoz/*" diff --git a/deploy/docker-swarm/clickhouse-setup/clickhouse-config.xml b/deploy/docker-swarm/clickhouse-setup/clickhouse-config.xml index 3bb26a3a36..4a6a82b8af 100644 --- a/deploy/docker-swarm/clickhouse-setup/clickhouse-config.xml +++ b/deploy/docker-swarm/clickhouse-setup/clickhouse-config.xml @@ -22,7 +22,7 @@ [1]: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105-L114 --> - trace + information /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.err.log - trace + information /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.err.log - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - - 1000M - 10 - - - - - - - - - - - - - - - - - - 8123 - - - 9000 - - - 9004 - - - 9005 - - - - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 4096 - - - 3 - - - - - false - - - /path/to/ssl_cert_file - /path/to/ssl_key_file - - - false - - - /path/to/ssl_ca_cert_file - - - none - - - 0 - - - -1 - -1 - - - false - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 100 - - - 0 - - - - 10000 - - - - - - 0.9 - - - 4194304 - - - 0 - - - - - - 8589934592 - - - 5368709120 - - - - 1000 - - - 134217728 - - - 10000 - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - - ` - - - - - - /var/lib/clickhouse/user_files/ - - - - - - - - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - - - default - - - - - - - - - - - - default - - - - - - - - - true - - - false - - ' | sed -e 's|.*>\(.*\)<.*|\1|') - wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb - apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb - clickhouse-jdbc-bridge & - - * [CentOS/RHEL] - export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/clickhouse/clickhouse-jdbc-bridge - export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|') - wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm - yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm - clickhouse-jdbc-bridge & - - Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information. - ]]> - - - - - - - - - - - - - - - - localhost - 9000 - - - - - - - - false - - 127.0.0.1 - 9000 - - - 127.0.0.2 - 9000 - - - 127.0.0.3 - 9000 - - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - 127.0.0.1 - 9000 - - - - - 127.0.0.2 - 9000 - - - - - - true - - 127.0.0.1 - 9000 - - - - true - - 127.0.0.2 - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - - - - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - query_views_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - system - metric_log
- 7500 - 1000 -
- - - - system - asynchronous_metric_log
- - 7000 -
- - - - - - engine MergeTree - partition by toYYYYMM(finish_date) - order by (finish_date, finish_time_us, trace_id) - - system - opentelemetry_span_log
- 7500 -
- - - - - system - crash_log
- - - 1000 -
- - - - - - - system - processors_profile_log
- - toYYYYMM(event_date) - 7500 -
- - - - - - - - - *_dictionary.xml - - - *_function.xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - - - hide encrypt/decrypt arguments - ((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\) - - \1(???) - - - - - - - - - - false - - false - - - https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 - - - - - - - - - - - 268435456 - true - -
diff --git a/deploy/docker/clickhouse-setup/docker-compose.yaml b/deploy/docker/clickhouse-setup/docker-compose.yaml index f8c2954446..3b3403a480 100644 --- a/deploy/docker/clickhouse-setup/docker-compose.yaml +++ b/deploy/docker/clickhouse-setup/docker-compose.yaml @@ -39,7 +39,7 @@ services: # Notes for Maintainers/Contributors who will change Line Numbers of Frontend & Query-Section. Please Update Line Numbers in `./scripts/commentLinesForSetup.sh` & `./CONTRIBUTING.md` query-service: - image: signoz/query-service:0.9.2 + image: signoz/query-service:0.10.0 container_name: query-service command: ["-config=/root/config/prometheus.yml"] # ports: @@ -66,7 +66,7 @@ services: condition: service_healthy frontend: - image: signoz/frontend:0.9.2 + image: signoz/frontend:0.10.0 container_name: frontend restart: on-failure depends_on: @@ -78,20 +78,24 @@ services: - ../common/nginx-config.conf:/etc/nginx/conf.d/default.conf otel-collector: - image: signoz/otelcontribcol:0.45.1-1.0 + image: signoz/otelcontribcol:0.45.1-1.1 command: ["--config=/etc/otel-collector-config.yaml"] volumes: - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml + environment: + - OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux ports: + # - "1777:1777" # pprof extension - "4317:4317" # OTLP gRPC receiver - "4318:4318" # OTLP HTTP receiver - # - "8889:8889" # Prometheus metrics exposed by the agent - # - "13133:13133" # health_check - # - "14268:14268" # Jaeger receiver + # - "8888:8888" # OtelCollector internal metrics + # - "8889:8889" # signoz spanmetrics exposed by the agent + # - "9411:9411" # Zipkin port + # - "13133:13133" # health check extension + # - "14250:14250" # Jaeger gRPC + # - "14268:14268" # Jaeger thrift HTTP # - "55678:55678" # OpenCensus receiver - # - "55679:55679" # zpages extension - # - "55680:55680" # OTLP gRPC legacy receiver - # - "55681:55681" # OTLP HTTP legacy receiver + # - "55679:55679" # zPages extension mem_limit: 2000m restart: on-failure depends_on: @@ -99,10 +103,15 @@ services: condition: service_healthy otel-collector-metrics: - image: signoz/otelcontribcol:0.45.1-1.0 + image: signoz/otelcontribcol:0.45.1-1.1 command: ["--config=/etc/otel-collector-metrics-config.yaml"] volumes: - ./otel-collector-metrics-config.yaml:/etc/otel-collector-metrics-config.yaml + # ports: + # - "1777:1777" # pprof extension + # - "8888:8888" # OtelCollector internal metrics + # - "13133:13133" # Health check extension + # - "55679:55679" # zPages extension restart: on-failure depends_on: clickhouse: diff --git a/deploy/docker/clickhouse-setup/otel-collector-config.yaml b/deploy/docker/clickhouse-setup/otel-collector-config.yaml index e363f015df..0717cf4c45 100644 --- a/deploy/docker/clickhouse-setup/otel-collector-config.yaml +++ b/deploy/docker/clickhouse-setup/otel-collector-config.yaml @@ -1,25 +1,36 @@ receivers: + opencensus: + endpoint: 0.0.0.0:55678 otlp/spanmetrics: protocols: grpc: - endpoint: "localhost:12345" + endpoint: localhost:12345 otlp: protocols: grpc: + endpoint: 0.0.0.0:4317 http: + endpoint: 0.0.0.0:4318 jaeger: protocols: grpc: + endpoint: 0.0.0.0:14250 thrift_http: + endpoint: 0.0.0.0:14268 + # thrift_compact: + # endpoint: 0.0.0.0:6831 + # thrift_binary: + # endpoint: 0.0.0.0:6832 hostmetrics: collection_interval: 60s scrapers: - cpu: - load: - memory: - disk: - filesystem: - network: + cpu: {} + load: {} + memory: {} + disk: {} + filesystem: {} + network: {} + processors: batch: send_batch_size: 10000 @@ -49,9 +60,20 @@ processors: # num_workers: 4 # queue_size: 100 # retry_on_failure: true + resourcedetection: + # Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels. + detectors: [env, system] # include ec2 for AWS, gce for GCP and azure for Azure. + timeout: 2s + override: false + extensions: - health_check: {} - zpages: {} + health_check: + endpoint: 0.0.0.0:13133 + zpages: + endpoint: 0.0.0.0:55679 + pprof: + endpoint: 0.0.0.0:1777 + exporters: clickhousetraces: datasource: tcp://clickhouse:9000/?database=signoz_traces @@ -60,18 +82,30 @@ exporters: resource_to_telemetry_conversion: enabled: true prometheus: - endpoint: "0.0.0.0:8889" + endpoint: 0.0.0.0:8889 + # logging: {} + service: - extensions: [health_check, zpages] + telemetry: + metrics: + address: 0.0.0.0:8888 + extensions: + - health_check + - zpages + - pprof pipelines: traces: receivers: [jaeger, otlp] processors: [signozspanmetrics/prometheus, batch] exporters: [clickhousetraces] metrics: - receivers: [otlp, hostmetrics] + receivers: [otlp] processors: [batch] exporters: [clickhousemetricswrite] + metrics/hostmetrics: + receivers: [hostmetrics] + processors: [resourcedetection, batch] + exporters: [clickhousemetricswrite] metrics/spanmetrics: receivers: [otlp/spanmetrics] exporters: [prometheus] diff --git a/deploy/docker/clickhouse-setup/otel-collector-metrics-config.yaml b/deploy/docker/clickhouse-setup/otel-collector-metrics-config.yaml index 26c629ba60..fdc5830f57 100644 --- a/deploy/docker/clickhouse-setup/otel-collector-metrics-config.yaml +++ b/deploy/docker/clickhouse-setup/otel-collector-metrics-config.yaml @@ -3,15 +3,28 @@ receivers: protocols: grpc: http: - - # Data sources: metrics prometheus: config: scrape_configs: + # otel-collector internal metrics - job_name: "otel-collector" scrape_interval: 60s static_configs: - - targets: ["otel-collector:8889"] + - targets: + - otel-collector:8888 + # otel-collector-metrics internal metrics + - job_name: "otel-collector-metrics" + scrape_interval: 60s + static_configs: + - targets: + - localhost:8888 + # SigNoz span metrics + - job_name: "signozspanmetrics-collector" + scrape_interval: 60s + static_configs: + - targets: + - otel-collector:8889 + processors: batch: send_batch_size: 10000 @@ -32,17 +45,29 @@ processors: # num_workers: 4 # queue_size: 100 # retry_on_failure: true + extensions: - health_check: {} - zpages: {} + health_check: + endpoint: 0.0.0.0:13133 + zpages: + endpoint: 0.0.0.0:55679 + pprof: + endpoint: 0.0.0.0:1777 + exporters: clickhousemetricswrite: endpoint: tcp://clickhouse:9000/?database=signoz_metrics service: - extensions: [health_check, zpages] + telemetry: + metrics: + address: 0.0.0.0:8888 + extensions: + - health_check + - zpages + - pprof pipelines: metrics: - receivers: [otlp, prometheus] + receivers: [prometheus] processors: [batch] exporters: [clickhousemetricswrite] diff --git a/deploy/docker/clickhouse-setup/users.xml b/deploy/docker/clickhouse-setup/users.xml deleted file mode 100644 index f18562071d..0000000000 --- a/deploy/docker/clickhouse-setup/users.xml +++ /dev/null @@ -1,123 +0,0 @@ - - - - - - - - - - 10000000000 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/frontend/package.json b/frontend/package.json index f93bc9684c..868e95dce7 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -13,8 +13,9 @@ "jest:coverage": "jest --coverage", "jest:watch": "jest --watch", "postinstall": "is-ci || yarn husky:configure", - "playwright": "playwright test --config=./playwright.config.ts", + "playwright": "NODE_ENV=testing playwright test --config=./playwright.config.ts", "playwright:local:debug": "PWDEBUG=console yarn playwright --headed --browser=chromium", + "playwright:codegen:local":"playwright codegen http://localhost:3301", "husky:configure": "cd .. && husky install frontend/.husky && cd frontend && chmod ug+x .husky/*", "commitlint": "commitlint --edit $1" }, @@ -43,6 +44,7 @@ "babel-preset-react-app": "^10.0.0", "chart.js": "^3.4.0", "chartjs-adapter-date-fns": "^2.0.0", + "chartjs-plugin-annotation": "^1.4.0", "color": "^4.2.1", "cross-env": "^7.0.3", "css-loader": "4.3.0", @@ -81,6 +83,7 @@ "style-loader": "1.3.0", "styled-components": "^5.2.1", "terser-webpack-plugin": "^5.2.5", + "timestamp-nano": "^1.0.0", "ts-node": "^10.2.1", "tsconfig-paths-webpack-plugin": "^3.5.1", "typescript": "^4.0.5", diff --git a/frontend/playwright.config.ts b/frontend/playwright.config.ts index 98fccbcb7f..6733c67536 100644 --- a/frontend/playwright.config.ts +++ b/frontend/playwright.config.ts @@ -16,6 +16,8 @@ const config: PlaywrightTestConfig = { updateSnapshots: 'all', fullyParallel: false, quiet: true, + testMatch: ['**/*.spec.ts'], + reporter: process.env.CI ? 'github' : 'list', }; export default config; diff --git a/frontend/public/locales/en-GB/alerts.json b/frontend/public/locales/en-GB/alerts.json new file mode 100644 index 0000000000..e67bd35273 --- /dev/null +++ b/frontend/public/locales/en-GB/alerts.json @@ -0,0 +1,85 @@ +{ + "preview_chart_unexpected_error": "An unexpeced error occurred updating the chart, please check your query.", + "preview_chart_threshold_label": "Threshold", + "placeholder_label_key_pair": "Click here to enter a label (key value pairs)", + "button_yes": "Yes", + "button_no": "No", + "remove_label_confirm": "This action will remove all the labels. Do you want to proceed?", + "remove_label_success": "Labels cleared", + "alert_form_step1": "Step 1 - Define the metric", + "alert_form_step2": "Step 2 - Define Alert Conditions", + "alert_form_step3": "Step 3 - Alert Configuration", + "metric_query_max_limit": "Can not create query. You can create maximum of 5 queries", + "confirm_save_title": "Save Changes", + "confirm_save_content_part1": "Your alert built with", + "confirm_save_content_part2": "query will be saved. Press OK to confirm.", + "unexpected_error": "Sorry, an unexpected error occurred. Please contact your admin", + "rule_created": "Rule created successfully", + "rule_edited": "Rule edited successfully", + "expression_missing": "expression is missing in {{where}}", + "metricname_missing": "metric name is missing in {{where}}", + "condition_required": "at least one metric condition is required", + "alertname_required": "alert name is required", + "promql_required": "promql expression is required when query format is set to PromQL", + "button_savechanges": "Save Rule", + "button_createrule": "Create Rule", + "button_returntorules": "Return to rules", + "button_cancelchanges": "Cancel", + "button_discard": "Discard", + "text_condition1": "Send a notification when the metric is", + "text_condition2": "the threshold", + "text_condition3": "during the last", + "option_5min": "5 mins", + "option_10min": "10 mins", + "option_15min": "15 mins", + "option_60min": "60 mins", + "option_4hours": "4 hours", + "option_24hours": "24 hours", + "field_threshold": "Alert Threshold", + "option_allthetimes": "all the times", + "option_atleastonce": "at least once", + "option_onaverage": "on average", + "option_intotal": "in total", + "option_above": "above", + "option_below": "below", + "option_equal": "is equal to", + "option_notequal": "not equal to", + "button_query": "Query", + "button_formula": "Formula", + "tab_qb": "Query Builder", + "tab_promql": "PromQL", + "title_confirm": "Confirm", + "button_ok": "Yes", + "button_cancel": "No", + "field_promql_expr": "PromQL Expression", + "field_alert_name": "Alert Name", + "field_alert_desc": "Alert Description", + "field_labels": "Labels", + "field_severity": "Severity", + "option_critical": "Critical", + "option_error": "Error", + "option_warning": "Warning", + "option_info": "Info", + "user_guide_headline": "Steps to create an Alert", + "user_guide_qb_step1": "Step 1 - Define the metric", + "user_guide_qb_step1a": "Choose a metric which you want to create an alert on", + "user_guide_qb_step1b": "Filter it based on WHERE field or GROUPBY if needed", + "user_guide_qb_step1c": "Apply an aggregatiion function like COUNT, SUM, etc. or choose NOOP to plot the raw metric", + "user_guide_qb_step1d": "Create a formula based on Queries if needed", + "user_guide_qb_step2": "Step 2 - Define Alert Conditions", + "user_guide_qb_step2a": "Select the evaluation interval, threshold type and whether you want to alert above/below a value", + "user_guide_qb_step2b": "Enter the Alert threshold", + "user_guide_qb_step3": "Step 3 -Alert Configuration", + "user_guide_qb_step3a": "Set alert severity, name and descriptions", + "user_guide_qb_step3b": "Add tags to the alert in the Label field if needed", + "user_guide_pql_step1": "Step 1 - Define the metric", + "user_guide_pql_step1a": "Write a PromQL query for the metric", + "user_guide_pql_step1b": "Format the legends based on labels you want to highlight", + "user_guide_pql_step2": "Step 2 - Define Alert Conditions", + "user_guide_pql_step2a": "Select the threshold type and whether you want to alert above/below a value", + "user_guide_pql_step2b": "Enter the Alert threshold", + "user_guide_pql_step3": "Step 3 -Alert Configuration", + "user_guide_pql_step3a": "Set alert severity, name and descriptions", + "user_guide_pql_step3b": "Add tags to the alert in the Label field if needed", + "user_tooltip_more_help": "More details on how to create alerts" +} \ No newline at end of file diff --git a/frontend/public/locales/en-GB/rules.json b/frontend/public/locales/en-GB/rules.json new file mode 100644 index 0000000000..e67bd35273 --- /dev/null +++ b/frontend/public/locales/en-GB/rules.json @@ -0,0 +1,85 @@ +{ + "preview_chart_unexpected_error": "An unexpeced error occurred updating the chart, please check your query.", + "preview_chart_threshold_label": "Threshold", + "placeholder_label_key_pair": "Click here to enter a label (key value pairs)", + "button_yes": "Yes", + "button_no": "No", + "remove_label_confirm": "This action will remove all the labels. Do you want to proceed?", + "remove_label_success": "Labels cleared", + "alert_form_step1": "Step 1 - Define the metric", + "alert_form_step2": "Step 2 - Define Alert Conditions", + "alert_form_step3": "Step 3 - Alert Configuration", + "metric_query_max_limit": "Can not create query. You can create maximum of 5 queries", + "confirm_save_title": "Save Changes", + "confirm_save_content_part1": "Your alert built with", + "confirm_save_content_part2": "query will be saved. Press OK to confirm.", + "unexpected_error": "Sorry, an unexpected error occurred. Please contact your admin", + "rule_created": "Rule created successfully", + "rule_edited": "Rule edited successfully", + "expression_missing": "expression is missing in {{where}}", + "metricname_missing": "metric name is missing in {{where}}", + "condition_required": "at least one metric condition is required", + "alertname_required": "alert name is required", + "promql_required": "promql expression is required when query format is set to PromQL", + "button_savechanges": "Save Rule", + "button_createrule": "Create Rule", + "button_returntorules": "Return to rules", + "button_cancelchanges": "Cancel", + "button_discard": "Discard", + "text_condition1": "Send a notification when the metric is", + "text_condition2": "the threshold", + "text_condition3": "during the last", + "option_5min": "5 mins", + "option_10min": "10 mins", + "option_15min": "15 mins", + "option_60min": "60 mins", + "option_4hours": "4 hours", + "option_24hours": "24 hours", + "field_threshold": "Alert Threshold", + "option_allthetimes": "all the times", + "option_atleastonce": "at least once", + "option_onaverage": "on average", + "option_intotal": "in total", + "option_above": "above", + "option_below": "below", + "option_equal": "is equal to", + "option_notequal": "not equal to", + "button_query": "Query", + "button_formula": "Formula", + "tab_qb": "Query Builder", + "tab_promql": "PromQL", + "title_confirm": "Confirm", + "button_ok": "Yes", + "button_cancel": "No", + "field_promql_expr": "PromQL Expression", + "field_alert_name": "Alert Name", + "field_alert_desc": "Alert Description", + "field_labels": "Labels", + "field_severity": "Severity", + "option_critical": "Critical", + "option_error": "Error", + "option_warning": "Warning", + "option_info": "Info", + "user_guide_headline": "Steps to create an Alert", + "user_guide_qb_step1": "Step 1 - Define the metric", + "user_guide_qb_step1a": "Choose a metric which you want to create an alert on", + "user_guide_qb_step1b": "Filter it based on WHERE field or GROUPBY if needed", + "user_guide_qb_step1c": "Apply an aggregatiion function like COUNT, SUM, etc. or choose NOOP to plot the raw metric", + "user_guide_qb_step1d": "Create a formula based on Queries if needed", + "user_guide_qb_step2": "Step 2 - Define Alert Conditions", + "user_guide_qb_step2a": "Select the evaluation interval, threshold type and whether you want to alert above/below a value", + "user_guide_qb_step2b": "Enter the Alert threshold", + "user_guide_qb_step3": "Step 3 -Alert Configuration", + "user_guide_qb_step3a": "Set alert severity, name and descriptions", + "user_guide_qb_step3b": "Add tags to the alert in the Label field if needed", + "user_guide_pql_step1": "Step 1 - Define the metric", + "user_guide_pql_step1a": "Write a PromQL query for the metric", + "user_guide_pql_step1b": "Format the legends based on labels you want to highlight", + "user_guide_pql_step2": "Step 2 - Define Alert Conditions", + "user_guide_pql_step2a": "Select the threshold type and whether you want to alert above/below a value", + "user_guide_pql_step2b": "Enter the Alert threshold", + "user_guide_pql_step3": "Step 3 -Alert Configuration", + "user_guide_pql_step3a": "Set alert severity, name and descriptions", + "user_guide_pql_step3b": "Add tags to the alert in the Label field if needed", + "user_tooltip_more_help": "More details on how to create alerts" +} \ No newline at end of file diff --git a/frontend/public/locales/en/alerts.json b/frontend/public/locales/en/alerts.json new file mode 100644 index 0000000000..e67bd35273 --- /dev/null +++ b/frontend/public/locales/en/alerts.json @@ -0,0 +1,85 @@ +{ + "preview_chart_unexpected_error": "An unexpeced error occurred updating the chart, please check your query.", + "preview_chart_threshold_label": "Threshold", + "placeholder_label_key_pair": "Click here to enter a label (key value pairs)", + "button_yes": "Yes", + "button_no": "No", + "remove_label_confirm": "This action will remove all the labels. Do you want to proceed?", + "remove_label_success": "Labels cleared", + "alert_form_step1": "Step 1 - Define the metric", + "alert_form_step2": "Step 2 - Define Alert Conditions", + "alert_form_step3": "Step 3 - Alert Configuration", + "metric_query_max_limit": "Can not create query. You can create maximum of 5 queries", + "confirm_save_title": "Save Changes", + "confirm_save_content_part1": "Your alert built with", + "confirm_save_content_part2": "query will be saved. Press OK to confirm.", + "unexpected_error": "Sorry, an unexpected error occurred. Please contact your admin", + "rule_created": "Rule created successfully", + "rule_edited": "Rule edited successfully", + "expression_missing": "expression is missing in {{where}}", + "metricname_missing": "metric name is missing in {{where}}", + "condition_required": "at least one metric condition is required", + "alertname_required": "alert name is required", + "promql_required": "promql expression is required when query format is set to PromQL", + "button_savechanges": "Save Rule", + "button_createrule": "Create Rule", + "button_returntorules": "Return to rules", + "button_cancelchanges": "Cancel", + "button_discard": "Discard", + "text_condition1": "Send a notification when the metric is", + "text_condition2": "the threshold", + "text_condition3": "during the last", + "option_5min": "5 mins", + "option_10min": "10 mins", + "option_15min": "15 mins", + "option_60min": "60 mins", + "option_4hours": "4 hours", + "option_24hours": "24 hours", + "field_threshold": "Alert Threshold", + "option_allthetimes": "all the times", + "option_atleastonce": "at least once", + "option_onaverage": "on average", + "option_intotal": "in total", + "option_above": "above", + "option_below": "below", + "option_equal": "is equal to", + "option_notequal": "not equal to", + "button_query": "Query", + "button_formula": "Formula", + "tab_qb": "Query Builder", + "tab_promql": "PromQL", + "title_confirm": "Confirm", + "button_ok": "Yes", + "button_cancel": "No", + "field_promql_expr": "PromQL Expression", + "field_alert_name": "Alert Name", + "field_alert_desc": "Alert Description", + "field_labels": "Labels", + "field_severity": "Severity", + "option_critical": "Critical", + "option_error": "Error", + "option_warning": "Warning", + "option_info": "Info", + "user_guide_headline": "Steps to create an Alert", + "user_guide_qb_step1": "Step 1 - Define the metric", + "user_guide_qb_step1a": "Choose a metric which you want to create an alert on", + "user_guide_qb_step1b": "Filter it based on WHERE field or GROUPBY if needed", + "user_guide_qb_step1c": "Apply an aggregatiion function like COUNT, SUM, etc. or choose NOOP to plot the raw metric", + "user_guide_qb_step1d": "Create a formula based on Queries if needed", + "user_guide_qb_step2": "Step 2 - Define Alert Conditions", + "user_guide_qb_step2a": "Select the evaluation interval, threshold type and whether you want to alert above/below a value", + "user_guide_qb_step2b": "Enter the Alert threshold", + "user_guide_qb_step3": "Step 3 -Alert Configuration", + "user_guide_qb_step3a": "Set alert severity, name and descriptions", + "user_guide_qb_step3b": "Add tags to the alert in the Label field if needed", + "user_guide_pql_step1": "Step 1 - Define the metric", + "user_guide_pql_step1a": "Write a PromQL query for the metric", + "user_guide_pql_step1b": "Format the legends based on labels you want to highlight", + "user_guide_pql_step2": "Step 2 - Define Alert Conditions", + "user_guide_pql_step2a": "Select the threshold type and whether you want to alert above/below a value", + "user_guide_pql_step2b": "Enter the Alert threshold", + "user_guide_pql_step3": "Step 3 -Alert Configuration", + "user_guide_pql_step3a": "Set alert severity, name and descriptions", + "user_guide_pql_step3b": "Add tags to the alert in the Label field if needed", + "user_tooltip_more_help": "More details on how to create alerts" +} \ No newline at end of file diff --git a/frontend/public/locales/en/rules.json b/frontend/public/locales/en/rules.json new file mode 100644 index 0000000000..e67bd35273 --- /dev/null +++ b/frontend/public/locales/en/rules.json @@ -0,0 +1,85 @@ +{ + "preview_chart_unexpected_error": "An unexpeced error occurred updating the chart, please check your query.", + "preview_chart_threshold_label": "Threshold", + "placeholder_label_key_pair": "Click here to enter a label (key value pairs)", + "button_yes": "Yes", + "button_no": "No", + "remove_label_confirm": "This action will remove all the labels. Do you want to proceed?", + "remove_label_success": "Labels cleared", + "alert_form_step1": "Step 1 - Define the metric", + "alert_form_step2": "Step 2 - Define Alert Conditions", + "alert_form_step3": "Step 3 - Alert Configuration", + "metric_query_max_limit": "Can not create query. You can create maximum of 5 queries", + "confirm_save_title": "Save Changes", + "confirm_save_content_part1": "Your alert built with", + "confirm_save_content_part2": "query will be saved. Press OK to confirm.", + "unexpected_error": "Sorry, an unexpected error occurred. Please contact your admin", + "rule_created": "Rule created successfully", + "rule_edited": "Rule edited successfully", + "expression_missing": "expression is missing in {{where}}", + "metricname_missing": "metric name is missing in {{where}}", + "condition_required": "at least one metric condition is required", + "alertname_required": "alert name is required", + "promql_required": "promql expression is required when query format is set to PromQL", + "button_savechanges": "Save Rule", + "button_createrule": "Create Rule", + "button_returntorules": "Return to rules", + "button_cancelchanges": "Cancel", + "button_discard": "Discard", + "text_condition1": "Send a notification when the metric is", + "text_condition2": "the threshold", + "text_condition3": "during the last", + "option_5min": "5 mins", + "option_10min": "10 mins", + "option_15min": "15 mins", + "option_60min": "60 mins", + "option_4hours": "4 hours", + "option_24hours": "24 hours", + "field_threshold": "Alert Threshold", + "option_allthetimes": "all the times", + "option_atleastonce": "at least once", + "option_onaverage": "on average", + "option_intotal": "in total", + "option_above": "above", + "option_below": "below", + "option_equal": "is equal to", + "option_notequal": "not equal to", + "button_query": "Query", + "button_formula": "Formula", + "tab_qb": "Query Builder", + "tab_promql": "PromQL", + "title_confirm": "Confirm", + "button_ok": "Yes", + "button_cancel": "No", + "field_promql_expr": "PromQL Expression", + "field_alert_name": "Alert Name", + "field_alert_desc": "Alert Description", + "field_labels": "Labels", + "field_severity": "Severity", + "option_critical": "Critical", + "option_error": "Error", + "option_warning": "Warning", + "option_info": "Info", + "user_guide_headline": "Steps to create an Alert", + "user_guide_qb_step1": "Step 1 - Define the metric", + "user_guide_qb_step1a": "Choose a metric which you want to create an alert on", + "user_guide_qb_step1b": "Filter it based on WHERE field or GROUPBY if needed", + "user_guide_qb_step1c": "Apply an aggregatiion function like COUNT, SUM, etc. or choose NOOP to plot the raw metric", + "user_guide_qb_step1d": "Create a formula based on Queries if needed", + "user_guide_qb_step2": "Step 2 - Define Alert Conditions", + "user_guide_qb_step2a": "Select the evaluation interval, threshold type and whether you want to alert above/below a value", + "user_guide_qb_step2b": "Enter the Alert threshold", + "user_guide_qb_step3": "Step 3 -Alert Configuration", + "user_guide_qb_step3a": "Set alert severity, name and descriptions", + "user_guide_qb_step3b": "Add tags to the alert in the Label field if needed", + "user_guide_pql_step1": "Step 1 - Define the metric", + "user_guide_pql_step1a": "Write a PromQL query for the metric", + "user_guide_pql_step1b": "Format the legends based on labels you want to highlight", + "user_guide_pql_step2": "Step 2 - Define Alert Conditions", + "user_guide_pql_step2a": "Select the threshold type and whether you want to alert above/below a value", + "user_guide_pql_step2b": "Enter the Alert threshold", + "user_guide_pql_step3": "Step 3 -Alert Configuration", + "user_guide_pql_step3a": "Set alert severity, name and descriptions", + "user_guide_pql_step3b": "Add tags to the alert in the Label field if needed", + "user_tooltip_more_help": "More details on how to create alerts" +} \ No newline at end of file diff --git a/frontend/src/api/alerts/create.ts b/frontend/src/api/alerts/create.ts index 10dbff99b6..cad7917815 100644 --- a/frontend/src/api/alerts/create.ts +++ b/frontend/src/api/alerts/create.ts @@ -9,7 +9,7 @@ const create = async ( ): Promise | ErrorResponse> => { try { const response = await axios.post('/rules', { - data: props.query, + ...props.data, }); return { diff --git a/frontend/src/api/alerts/get.ts b/frontend/src/api/alerts/get.ts index aeddf67fd0..0437f8d1d8 100644 --- a/frontend/src/api/alerts/get.ts +++ b/frontend/src/api/alerts/get.ts @@ -14,7 +14,7 @@ const get = async ( statusCode: 200, error: null, message: response.data.status, - payload: response.data.data, + payload: response.data, }; } catch (error) { return ErrorResponseHandler(error as AxiosError); diff --git a/frontend/src/api/alerts/put.ts b/frontend/src/api/alerts/put.ts index 15d4c7c698..b8c34e96bd 100644 --- a/frontend/src/api/alerts/put.ts +++ b/frontend/src/api/alerts/put.ts @@ -2,14 +2,14 @@ import axios from 'api'; import { ErrorResponseHandler } from 'api/ErrorResponseHandler'; import { AxiosError } from 'axios'; import { ErrorResponse, SuccessResponse } from 'types/api'; -import { PayloadProps, Props } from 'types/api/alerts/put'; +import { PayloadProps, Props } from 'types/api/alerts/save'; const put = async ( props: Props, ): Promise | ErrorResponse> => { try { const response = await axios.put(`/rules/${props.id}`, { - data: props.data, + ...props.data, }); return { diff --git a/frontend/src/api/alerts/save.ts b/frontend/src/api/alerts/save.ts new file mode 100644 index 0000000000..229f0ae126 --- /dev/null +++ b/frontend/src/api/alerts/save.ts @@ -0,0 +1,17 @@ +import { ErrorResponse, SuccessResponse } from 'types/api'; +import { PayloadProps, Props } from 'types/api/alerts/save'; + +import create from './create'; +import put from './put'; + +const save = async ( + props: Props, +): Promise | ErrorResponse> => { + if (props.id && props.id > 0) { + return put({ ...props }); + } + + return create({ ...props }); +}; + +export default save; diff --git a/frontend/src/api/errors/getAll.ts b/frontend/src/api/errors/getAll.ts index dcd8aa8e73..7014e52a56 100644 --- a/frontend/src/api/errors/getAll.ts +++ b/frontend/src/api/errors/getAll.ts @@ -10,9 +10,8 @@ const getAll = async ( ): Promise | ErrorResponse> => { try { const response = await axios.get( - `/errors?${createQueryParams({ - start: props.start.toString(), - end: props.end.toString(), + `/listErrors?${createQueryParams({ + ...props, })}`, ); diff --git a/frontend/src/api/errors/getByErrorTypeAndService.ts b/frontend/src/api/errors/getByErrorTypeAndService.ts index 6a2c6964d9..c9a710fd72 100644 --- a/frontend/src/api/errors/getByErrorTypeAndService.ts +++ b/frontend/src/api/errors/getByErrorTypeAndService.ts @@ -10,11 +10,8 @@ const getByErrorType = async ( ): Promise | ErrorResponse> => { try { const response = await axios.get( - `/errorWithType?${createQueryParams({ - start: props.start.toString(), - end: props.end.toString(), - serviceName: props.serviceName, - errorType: props.errorType, + `/errorFromGroupID?${createQueryParams({ + ...props, })}`, ); diff --git a/frontend/src/api/errors/getById.ts b/frontend/src/api/errors/getById.ts index 3ab7c4aa60..ab0bae3f8a 100644 --- a/frontend/src/api/errors/getById.ts +++ b/frontend/src/api/errors/getById.ts @@ -3,17 +3,15 @@ import { ErrorResponseHandler } from 'api/ErrorResponseHandler'; import { AxiosError } from 'axios'; import createQueryParams from 'lib/createQueryParams'; import { ErrorResponse, SuccessResponse } from 'types/api'; -import { PayloadProps, Props } from 'types/api/errors/getById'; +import { PayloadProps, Props } from 'types/api/errors/getByErrorId'; const getById = async ( props: Props, ): Promise | ErrorResponse> => { try { const response = await axios.get( - `/errorWithId?${createQueryParams({ - start: props.start.toString(), - end: props.end.toString(), - errorId: props.errorId, + `/errorFromErrorID?${createQueryParams({ + ...props, })}`, ); diff --git a/frontend/src/api/errors/getErrorCounts.ts b/frontend/src/api/errors/getErrorCounts.ts new file mode 100644 index 0000000000..4992a6d391 --- /dev/null +++ b/frontend/src/api/errors/getErrorCounts.ts @@ -0,0 +1,29 @@ +import axios from 'api'; +import { ErrorResponseHandler } from 'api/ErrorResponseHandler'; +import { AxiosError } from 'axios'; +import createQueryParams from 'lib/createQueryParams'; +import { ErrorResponse, SuccessResponse } from 'types/api'; +import { PayloadProps, Props } from 'types/api/errors/getErrorCounts'; + +const getErrorCounts = async ( + props: Props, +): Promise | ErrorResponse> => { + try { + const response = await axios.get( + `/countErrors?${createQueryParams({ + ...props, + })}`, + ); + + return { + statusCode: 200, + error: null, + message: response.data.message, + payload: response.data, + }; + } catch (error) { + return ErrorResponseHandler(error as AxiosError); + } +}; + +export default getErrorCounts; diff --git a/frontend/src/api/errors/getNextPrevId.ts b/frontend/src/api/errors/getNextPrevId.ts new file mode 100644 index 0000000000..07798c548e --- /dev/null +++ b/frontend/src/api/errors/getNextPrevId.ts @@ -0,0 +1,29 @@ +import axios from 'api'; +import { ErrorResponseHandler } from 'api/ErrorResponseHandler'; +import { AxiosError } from 'axios'; +import createQueryParams from 'lib/createQueryParams'; +import { ErrorResponse, SuccessResponse } from 'types/api'; +import { PayloadProps, Props } from 'types/api/errors/getNextPrevId'; + +const getErrorCounts = async ( + props: Props, +): Promise | ErrorResponse> => { + try { + const response = await axios.get( + `/nextPrevErrorIDs?${createQueryParams({ + ...props, + })}`, + ); + + return { + statusCode: 200, + error: null, + message: response.data.message, + payload: response.data, + }; + } catch (error) { + return ErrorResponseHandler(error as AxiosError); + } +}; + +export default getErrorCounts; diff --git a/frontend/src/api/user/getVersion.ts b/frontend/src/api/user/getVersion.ts index a65ede2f0d..0f3e7f8e83 100644 --- a/frontend/src/api/user/getVersion.ts +++ b/frontend/src/api/user/getVersion.ts @@ -1,14 +1,15 @@ import axios from 'api'; import { ErrorResponseHandler } from 'api/ErrorResponseHandler'; import { AxiosError } from 'axios'; +import { getVersion } from 'constants/api'; import { ErrorResponse, SuccessResponse } from 'types/api'; import { PayloadProps } from 'types/api/user/getVersion'; -const getVersion = async (): Promise< +const getVersionApi = async (): Promise< SuccessResponse | ErrorResponse > => { try { - const response = await axios.get(`/version`); + const response = await axios.get(`/${getVersion}`); return { statusCode: 200, @@ -21,4 +22,4 @@ const getVersion = async (): Promise< } }; -export default getVersion; +export default getVersionApi; diff --git a/frontend/src/components/Graph/index.tsx b/frontend/src/components/Graph/index.tsx index 4bb76276c0..3df4de3caa 100644 --- a/frontend/src/components/Graph/index.tsx +++ b/frontend/src/components/Graph/index.tsx @@ -22,6 +22,7 @@ import { Tooltip, } from 'chart.js'; import * as chartjsAdapter from 'chartjs-adapter-date-fns'; +import annotationPlugin from 'chartjs-plugin-annotation'; import React, { useCallback, useEffect, useRef } from 'react'; import { useSelector } from 'react-redux'; import { AppState } from 'store/reducers'; @@ -50,6 +51,7 @@ Chart.register( SubTitle, BarController, BarElement, + annotationPlugin, ); function Graph({ @@ -62,6 +64,7 @@ function Graph({ name, yAxisUnit = 'short', forceReRender, + staticLine, }: GraphProps): JSX.Element { const { isDarkMode } = useSelector((state) => state.app); const chartRef = useRef(null); @@ -99,6 +102,30 @@ function Graph({ intersect: false, }, plugins: { + annotation: staticLine + ? { + annotations: [ + { + type: 'line', + yMin: staticLine.yMin, + yMax: staticLine.yMax, + borderColor: staticLine.borderColor, + borderWidth: staticLine.borderWidth, + label: { + content: staticLine.lineText, + enabled: true, + font: { + size: 10, + }, + borderWidth: 0, + position: 'start', + backgroundColor: 'transparent', + color: staticLine.textColor, + }, + }, + ], + } + : undefined, title: { display: title !== undefined, text: title, @@ -180,6 +207,7 @@ function Graph({ } }, }; + const chartHasData = hasData(data); const chartPlugins = []; @@ -205,6 +233,7 @@ function Graph({ name, yAxisUnit, onClickHandler, + staticLine, ]); useEffect(() => { @@ -229,6 +258,16 @@ interface GraphProps { name: string; yAxisUnit?: string; forceReRender?: boolean | null | number; + staticLine?: StaticLineProps | undefined; +} + +export interface StaticLineProps { + yMin: number | undefined; + yMax: number | undefined; + borderColor: string; + borderWidth: number; + lineText: string; + textColor: string; } export type GraphOnClickHandler = ( @@ -245,5 +284,6 @@ Graph.defaultProps = { onClickHandler: undefined, yAxisUnit: undefined, forceReRender: undefined, + staticLine: undefined, }; export default Graph; diff --git a/frontend/src/constants/api.ts b/frontend/src/constants/api.ts new file mode 100644 index 0000000000..8ebfe3b73c --- /dev/null +++ b/frontend/src/constants/api.ts @@ -0,0 +1,3 @@ +const getVersion = 'version'; + +export { getVersion }; diff --git a/frontend/src/container/AllError/index.tsx b/frontend/src/container/AllError/index.tsx index 51f47c1104..253af7dfe1 100644 --- a/frontend/src/container/AllError/index.tsx +++ b/frontend/src/container/AllError/index.tsx @@ -1,31 +1,85 @@ -import { notification, Table, Tooltip, Typography } from 'antd'; +import { notification, Table, TableProps, Tooltip, Typography } from 'antd'; import { ColumnsType } from 'antd/lib/table'; import getAll from 'api/errors/getAll'; +import getErrorCounts from 'api/errors/getErrorCounts'; import ROUTES from 'constants/routes'; import dayjs from 'dayjs'; -import React, { useEffect } from 'react'; +import createQueryParams from 'lib/createQueryParams'; +import history from 'lib/history'; +import React, { useEffect, useMemo } from 'react'; import { useTranslation } from 'react-i18next'; -import { useQuery } from 'react-query'; +import { useQueries } from 'react-query'; import { useSelector } from 'react-redux'; -import { Link } from 'react-router-dom'; +import { Link, useLocation } from 'react-router-dom'; import { AppState } from 'store/reducers'; -import { Exception } from 'types/api/errors/getAll'; +import { ErrorResponse, SuccessResponse } from 'types/api'; +import { Exception, PayloadProps } from 'types/api/errors/getAll'; import { GlobalReducer } from 'types/reducer/globalTime'; +import { + getDefaultOrder, + getNanoSeconds, + getOffSet, + getOrder, + getOrderParams, + getUpdatePageSize, + urlKey, +} from './utils'; + function AllErrors(): JSX.Element { - const { maxTime, minTime } = useSelector( + const { maxTime, minTime, loading } = useSelector( (state) => state.globalTime, ); + const { search, pathname } = useLocation(); + const params = useMemo(() => new URLSearchParams(search), [search]); const { t } = useTranslation(['common']); - const { isLoading, data } = useQuery(['getAllError', [maxTime, minTime]], { - queryFn: () => - getAll({ - end: maxTime, - start: minTime, - }), - }); + const updatedOrder = getOrder(params.get(urlKey.order)); + const getUpdatedOffset = getOffSet(params.get(urlKey.offset)); + const getUpdatedParams = getOrderParams(params.get(urlKey.orderParam)); + const getUpdatedPageSize = getUpdatePageSize(params.get(urlKey.pageSize)); + + const updatedPath = useMemo( + () => + `${pathname}?${createQueryParams({ + order: updatedOrder, + offset: getUpdatedOffset, + orderParam: getUpdatedParams, + pageSize: getUpdatedPageSize, + })}`, + [ + pathname, + updatedOrder, + getUpdatedOffset, + getUpdatedParams, + getUpdatedPageSize, + ], + ); + + const [{ isLoading, data }, errorCountResponse] = useQueries([ + { + queryKey: ['getAllErrors', updatedPath, maxTime, minTime], + queryFn: (): Promise | ErrorResponse> => + getAll({ + end: maxTime, + start: minTime, + order: updatedOrder, + limit: getUpdatedPageSize, + offset: getUpdatedOffset, + orderParam: getUpdatedParams, + }), + enabled: !loading, + }, + { + queryKey: ['getErrorCounts', maxTime, minTime], + queryFn: (): Promise> => + getErrorCounts({ + end: maxTime, + start: minTime, + }), + }, + ]); useEffect(() => { if (data?.error) { @@ -35,11 +89,9 @@ function AllErrors(): JSX.Element { } }, [data?.error, data?.payload, t]); - const getDateValue = (value: string): JSX.Element => { - return ( - {dayjs(value).format('DD/MM/YYYY HH:mm:ss A')} - ); - }; + const getDateValue = (value: string): JSX.Element => ( + {dayjs(value).format('DD/MM/YYYY HH:mm:ss A')} + ); const columns: ColumnsType = [ { @@ -49,14 +101,20 @@ function AllErrors(): JSX.Element { render: (value, record): JSX.Element => ( value}> {value} ), - sorter: (a, b): number => - a.exceptionType.charCodeAt(0) - b.exceptionType.charCodeAt(0), + sorter: true, + defaultSortOrder: getDefaultOrder( + getUpdatedParams, + updatedOrder, + 'exceptionType', + ), }, { title: 'Error Message', @@ -78,39 +136,86 @@ function AllErrors(): JSX.Element { title: 'Count', dataIndex: 'exceptionCount', key: 'exceptionCount', - sorter: (a, b): number => a.exceptionCount - b.exceptionCount, + sorter: true, + defaultSortOrder: getDefaultOrder( + getUpdatedParams, + updatedOrder, + 'exceptionCount', + ), }, { title: 'Last Seen', dataIndex: 'lastSeen', key: 'lastSeen', render: getDateValue, - sorter: (a, b): number => - dayjs(b.lastSeen).isBefore(dayjs(a.lastSeen)) === true ? 1 : 0, + sorter: true, + defaultSortOrder: getDefaultOrder( + getUpdatedParams, + updatedOrder, + 'lastSeen', + ), }, { title: 'First Seen', dataIndex: 'firstSeen', key: 'firstSeen', render: getDateValue, - sorter: (a, b): number => - dayjs(b.firstSeen).isBefore(dayjs(a.firstSeen)) === true ? 1 : 0, + sorter: true, + defaultSortOrder: getDefaultOrder( + getUpdatedParams, + updatedOrder, + 'firstSeen', + ), }, { title: 'Application', dataIndex: 'serviceName', key: 'serviceName', - sorter: (a, b): number => - a.serviceName.charCodeAt(0) - b.serviceName.charCodeAt(0), + sorter: true, + defaultSortOrder: getDefaultOrder( + getUpdatedParams, + updatedOrder, + 'serviceName', + ), }, ]; + const onChangeHandler: TableProps['onChange'] = ( + paginations, + _, + sorter, + ) => { + if (!Array.isArray(sorter)) { + const { pageSize = 0, current = 0 } = paginations; + const { columnKey = '', order } = sorter; + const updatedOrder = order === 'ascend' ? 'ascending' : 'descending'; + + history.replace( + `${pathname}?${createQueryParams({ + order: updatedOrder, + offset: (current - 1) * pageSize, + orderParam: columnKey, + pageSize, + })}`, + ); + } + }; + return ( ); } diff --git a/frontend/src/container/AllError/utils.test.ts b/frontend/src/container/AllError/utils.test.ts new file mode 100644 index 0000000000..344d318ebf --- /dev/null +++ b/frontend/src/container/AllError/utils.test.ts @@ -0,0 +1,109 @@ +import { Order, OrderBy } from 'types/api/errors/getAll'; + +import { + getDefaultOrder, + getLimit, + getOffSet, + getOrder, + getOrderParams, + getUpdatePageSize, + isOrder, + isOrderParams, +} from './utils'; + +describe('Error utils', () => { + test('Valid OrderBy Params', () => { + expect(isOrderParams('serviceName')).toBe(true); + expect(isOrderParams('exceptionCount')).toBe(true); + expect(isOrderParams('lastSeen')).toBe(true); + expect(isOrderParams('firstSeen')).toBe(true); + expect(isOrderParams('exceptionType')).toBe(true); + }); + + test('Invalid OrderBy Params', () => { + expect(isOrderParams('invalid')).toBe(false); + expect(isOrderParams(null)).toBe(false); + expect(isOrderParams('')).toBe(false); + }); + + test('Valid Order', () => { + expect(isOrder('ascending')).toBe(true); + expect(isOrder('descending')).toBe(true); + }); + + test('Invalid Order', () => { + expect(isOrder('invalid')).toBe(false); + expect(isOrder(null)).toBe(false); + expect(isOrder('')).toBe(false); + }); + + test('Default Order', () => { + const OrderBy: OrderBy[] = [ + 'exceptionCount', + 'exceptionType', + 'firstSeen', + 'lastSeen', + 'serviceName', + ]; + + const order: Order[] = ['ascending', 'descending']; + + const ascOrd = order[0]; + const desOrd = order[1]; + + OrderBy.forEach((order) => { + expect(getDefaultOrder(order, ascOrd, order)).toBe('ascend'); + expect(getDefaultOrder(order, desOrd, order)).toBe('descend'); + }); + }); + + test('Limit', () => { + expect(getLimit(null)).toBe(10); + expect(getLimit('')).toBe(10); + expect(getLimit('0')).toBe(0); + expect(getLimit('1')).toBe(1); + expect(getLimit('10')).toBe(10); + expect(getLimit('11')).toBe(11); + expect(getLimit('100')).toBe(100); + expect(getLimit('101')).toBe(101); + }); + + test('Update Page Size', () => { + expect(getUpdatePageSize(null)).toBe(10); + expect(getUpdatePageSize('')).toBe(10); + expect(getUpdatePageSize('0')).toBe(0); + expect(getUpdatePageSize('1')).toBe(1); + expect(getUpdatePageSize('10')).toBe(10); + expect(getUpdatePageSize('11')).toBe(11); + expect(getUpdatePageSize('100')).toBe(100); + expect(getUpdatePageSize('101')).toBe(101); + }); + + test('Order Params', () => { + expect(getOrderParams(null)).toBe('serviceName'); + expect(getOrderParams('')).toBe('serviceName'); + expect(getOrderParams('serviceName')).toBe('serviceName'); + expect(getOrderParams('exceptionCount')).toBe('exceptionCount'); + expect(getOrderParams('lastSeen')).toBe('lastSeen'); + expect(getOrderParams('firstSeen')).toBe('firstSeen'); + expect(getOrderParams('exceptionType')).toBe('exceptionType'); + }); + + test('OffSet', () => { + expect(getOffSet(null)).toBe(0); + expect(getOffSet('')).toBe(0); + expect(getOffSet('0')).toBe(0); + expect(getOffSet('1')).toBe(1); + expect(getOffSet('10')).toBe(10); + expect(getOffSet('11')).toBe(11); + expect(getOffSet('100')).toBe(100); + expect(getOffSet('101')).toBe(101); + }); + + test('Order', () => { + expect(getOrder(null)).toBe('ascending'); + expect(getOrder('')).toBe('ascending'); + expect(getOrder('ascending')).toBe('ascending'); + expect(getOrder('descending')).toBe('descending'); + }); +}); diff --git a/frontend/src/container/AllError/utils.ts b/frontend/src/container/AllError/utils.ts new file mode 100644 index 0000000000..239d404b1c --- /dev/null +++ b/frontend/src/container/AllError/utils.ts @@ -0,0 +1,89 @@ +import { SortOrder } from 'antd/lib/table/interface'; +import Timestamp from 'timestamp-nano'; +import { Order, OrderBy } from 'types/api/errors/getAll'; + +export const isOrder = (order: string | null): order is Order => + !!(order === 'ascending' || order === 'descending'); + +export const urlKey = { + order: 'order', + offset: 'offset', + orderParam: 'orderParam', + pageSize: 'pageSize', +}; + +export const isOrderParams = (orderBy: string | null): orderBy is OrderBy => { + return !!( + orderBy === 'serviceName' || + orderBy === 'exceptionCount' || + orderBy === 'lastSeen' || + orderBy === 'firstSeen' || + orderBy === 'exceptionType' + ); +}; + +export const getOrder = (order: string | null): Order => { + if (isOrder(order)) { + return order; + } + return 'ascending'; +}; + +export const getLimit = (limit: string | null): number => { + if (limit) { + return parseInt(limit, 10); + } + return 10; +}; + +export const getOffSet = (offset: string | null): number => { + if (offset && typeof offset === 'string') { + return parseInt(offset, 10); + } + return 0; +}; + +export const getOrderParams = (order: string | null): OrderBy => { + if (isOrderParams(order)) { + return order; + } + return 'serviceName'; +}; + +export const getDefaultOrder = ( + orderBy: OrderBy, + order: Order, + data: OrderBy, + // eslint-disable-next-line sonarjs/cognitive-complexity +): SortOrder | undefined => { + if (orderBy === 'exceptionType' && data === 'exceptionType') { + return order === 'ascending' ? 'ascend' : 'descend'; + } + if (orderBy === 'serviceName' && data === 'serviceName') { + return order === 'ascending' ? 'ascend' : 'descend'; + } + if (orderBy === 'exceptionCount' && data === 'exceptionCount') { + return order === 'ascending' ? 'ascend' : 'descend'; + } + if (orderBy === 'lastSeen' && data === 'lastSeen') { + return order === 'ascending' ? 'ascend' : 'descend'; + } + if (orderBy === 'firstSeen' && data === 'firstSeen') { + return order === 'ascending' ? 'ascend' : 'descend'; + } + return undefined; +}; + +export const getNanoSeconds = (date: string): string => { + return ( + Math.floor(new Date(date).getTime() / 1e3).toString() + + Timestamp.fromString(date).getNano().toString() + ); +}; + +export const getUpdatePageSize = (pageSize: string | null): number => { + if (pageSize) { + return parseInt(pageSize, 10); + } + return 10; +}; diff --git a/frontend/src/container/CreateAlertRule/index.tsx b/frontend/src/container/CreateAlertRule/index.tsx new file mode 100644 index 0000000000..f527fbbdf1 --- /dev/null +++ b/frontend/src/container/CreateAlertRule/index.tsx @@ -0,0 +1,22 @@ +import { Form } from 'antd'; +import FormAlertRules from 'container/FormAlertRules'; +import React from 'react'; +import { AlertDef } from 'types/api/alerts/def'; + +function CreateRules({ initialValue }: CreateRulesProps): JSX.Element { + const [formInstance] = Form.useForm(); + + return ( + + ); +} + +interface CreateRulesProps { + initialValue: AlertDef; +} + +export default CreateRules; diff --git a/frontend/src/container/EditRules/index.tsx b/frontend/src/container/EditRules/index.tsx index e228af0a10..cf4a02e717 100644 --- a/frontend/src/container/EditRules/index.tsx +++ b/frontend/src/container/EditRules/index.tsx @@ -1,102 +1,23 @@ -import { SaveFilled } from '@ant-design/icons'; -import { Button, notification } from 'antd'; -import put from 'api/alerts/put'; -import Editor from 'components/Editor'; -import ROUTES from 'constants/routes'; -import { State } from 'hooks/useFetch'; -import history from 'lib/history'; -import React, { useCallback, useState } from 'react'; -import { PayloadProps } from 'types/api/alerts/get'; -import { PayloadProps as PutPayloadProps } from 'types/api/alerts/put'; +import { Form } from 'antd'; +import FormAlertRules from 'container/FormAlertRules'; +import React from 'react'; +import { AlertDef } from 'types/api/alerts/def'; -import { ButtonContainer } from './styles'; - -function EditRules({ initialData, ruleId }: EditRulesProps): JSX.Element { - const [value, setEditorValue] = useState(initialData); - const [notifications, Element] = notification.useNotification(); - const [editButtonState, setEditButtonState] = useState>( - { - error: false, - errorMessage: '', - loading: false, - success: false, - payload: undefined, - }, - ); - - const onClickHandler = useCallback(async () => { - try { - setEditButtonState((state) => ({ - ...state, - loading: true, - })); - const response = await put({ - data: value, - id: parseInt(ruleId, 10), - }); - - if (response.statusCode === 200) { - setEditButtonState((state) => ({ - ...state, - loading: false, - payload: response.payload, - })); - - notifications.success({ - message: 'Success', - description: 'Congrats. The alert was Edited correctly.', - }); - - setTimeout(() => { - history.push(ROUTES.LIST_ALL_ALERT); - }, 2000); - } else { - setEditButtonState((state) => ({ - ...state, - loading: false, - errorMessage: response.error || 'Something went wrong', - error: true, - })); - - notifications.error({ - message: 'Error', - description: - response.error || - 'Oops! Some issue occured in editing the alert please try again or contact support@signoz.io', - }); - } - } catch (error) { - notifications.error({ - message: 'Error', - description: - 'Oops! Some issue occured in editing the alert please try again or contact support@signoz.io', - }); - } - }, [value, ruleId, notifications]); +function EditRules({ initialValue, ruleId }: EditRulesProps): JSX.Element { + const [formInstance] = Form.useForm(); return ( - <> - {Element} - - setEditorValue(value)} value={value} /> - - - - - + ); } interface EditRulesProps { - initialData: PayloadProps['data']; - ruleId: string; + initialValue: AlertDef; + ruleId: number; } export default EditRules; diff --git a/frontend/src/container/ErrorDetails/index.tsx b/frontend/src/container/ErrorDetails/index.tsx index a5f8efe756..d42d2e4a3e 100644 --- a/frontend/src/container/ErrorDetails/index.tsx +++ b/frontend/src/container/ErrorDetails/index.tsx @@ -1,25 +1,49 @@ import { Button, Divider, notification, Space, Table, Typography } from 'antd'; +import getNextPrevId from 'api/errors/getNextPrevId'; import Editor from 'components/Editor'; +import { getNanoSeconds } from 'container/AllError/utils'; import dayjs from 'dayjs'; import history from 'lib/history'; +import { urlKey } from 'pages/ErrorDetails/utils'; import React, { useMemo, useState } from 'react'; import { useTranslation } from 'react-i18next'; +import { useQuery } from 'react-query'; import { useLocation } from 'react-router-dom'; import { PayloadProps as GetByErrorTypeAndServicePayload } from 'types/api/errors/getByErrorTypeAndService'; -import { PayloadProps } from 'types/api/errors/getById'; import { DashedContainer, EditorContainer, EventContainer } from './styles'; function ErrorDetails(props: ErrorDetailsProps): JSX.Element { const { idPayload } = props; - const [isLoading, setLoading] = useState(false); const { t } = useTranslation(['errorDetails', 'common']); - const { search } = useLocation(); - const params = new URLSearchParams(search); - const queryErrorId = params.get('errorId'); - const serviceName = params.get('serviceName'); - const errorType = params.get('errorType'); + + const params = useMemo(() => new URLSearchParams(search), [search]); + + const errorId = params.get(urlKey.errorId); + const serviceName = params.get(urlKey.serviceName); + const errorType = params.get(urlKey.exceptionType); + const timestamp = params.get(urlKey.timestamp); + + const { data: nextPrevData, status: nextPrevStatus } = useQuery( + [ + idPayload.errorId, + idPayload.groupID, + idPayload.timestamp, + errorId, + serviceName, + errorType, + timestamp, + ], + { + queryFn: () => + getNextPrevId({ + errorID: errorId || idPayload.errorId, + groupID: idPayload.groupID, + timestamp: timestamp || getNanoSeconds(idPayload.timestamp), + }), + }, + ); const errorDetail = idPayload; @@ -48,34 +72,32 @@ function ErrorDetails(props: ErrorDetailsProps): JSX.Element { 'errorId', 'timestamp', 'exceptionMessage', - 'newerErrorId', - 'olderErrorId', + 'exceptionEscaped', ], [], ); - const onClickErrorIdHandler = async (id: string): Promise => { + const onClickErrorIdHandler = async ( + id: string, + timestamp: string, + ): Promise => { try { - setLoading(true); - if (id.length === 0) { notification.error({ message: 'Error Id cannot be empty', }); - setLoading(false); return; } - setLoading(false); - - history.push( - `${history.location.pathname}?errorId=${id}&serviceName=${serviceName}&errorType=${errorType}`, + history.replace( + `${history.location.pathname}?&groupId=${ + idPayload.groupID + }×tamp=${getNanoSeconds(timestamp)}&errorId=${id}`, ); } catch (error) { notification.error({ message: t('something_went_wrong'), }); - setLoading(false); } }; @@ -106,25 +128,25 @@ function ErrorDetails(props: ErrorDetailsProps): JSX.Element {
+ {t('user_guide_headline')} + + + + + + {queryType === EQueryType.QUERY_BUILDER && renderGuideForQB()} + {queryType === EQueryType.PROM && renderGuideForPQL()} + + ); +} + +interface UserGuideProps { + queryType: EQueryType; +} + +export default UserGuide; diff --git a/frontend/src/container/FormAlertRules/UserGuide/styles.ts b/frontend/src/container/FormAlertRules/UserGuide/styles.ts new file mode 100644 index 0000000000..d4292f32d8 --- /dev/null +++ b/frontend/src/container/FormAlertRules/UserGuide/styles.ts @@ -0,0 +1,17 @@ +import { Card, Typography } from 'antd'; +import styled from 'styled-components'; + +export const StyledMainContainer = styled(Card)``; + +export const StyledTopic = styled(Typography.Paragraph)` + font-weight: 600; +`; + +export const StyledList = styled.ul` + padding-left: 18px; +`; + +export const StyledListItem = styled.li` + font-style: italic; + padding-bottom: 0.5rem; +`; diff --git a/frontend/src/container/FormAlertRules/index.tsx b/frontend/src/container/FormAlertRules/index.tsx new file mode 100644 index 0000000000..38fcaad04d --- /dev/null +++ b/frontend/src/container/FormAlertRules/index.tsx @@ -0,0 +1,381 @@ +import { ExclamationCircleOutlined, SaveOutlined } from '@ant-design/icons'; +import { FormInstance, Modal, notification, Typography } from 'antd'; +import saveAlertApi from 'api/alerts/save'; +import ROUTES from 'constants/routes'; +import QueryTypeTag from 'container/NewWidget/LeftContainer/QueryTypeTag'; +import PlotTag from 'container/NewWidget/LeftContainer/WidgetGraph/PlotTag'; +import history from 'lib/history'; +import React, { useCallback, useEffect, useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import { useQueryClient } from 'react-query'; +import { + IFormulaQueries, + IMetricQueries, + IPromQueries, +} from 'types/api/alerts/compositeQuery'; +import { + AlertDef, + defaultEvalWindow, + defaultMatchType, +} from 'types/api/alerts/def'; +import { Query as StagedQuery } from 'types/api/dashboard/getAll'; +import { EQueryType } from 'types/common/dashboard'; + +import BasicInfo from './BasicInfo'; +import ChartPreview from './ChartPreview'; +import QuerySection from './QuerySection'; +import RuleOptions from './RuleOptions'; +import { + ActionButton, + ButtonContainer, + MainFormContainer, + PanelContainer, + StyledLeftContainer, + StyledRightContainer, +} from './styles'; +import useDebounce from './useDebounce'; +import UserGuide from './UserGuide'; +import { + prepareBuilderQueries, + prepareStagedQuery, + toChartInterval, + toFormulaQueries, + toMetricQueries, +} from './utils'; + +function FormAlertRules({ + formInstance, + initialValue, + ruleId, +}: FormAlertRuleProps): JSX.Element { + // init namespace for translations + const { t } = useTranslation('alerts'); + + // use query client + const ruleCache = useQueryClient(); + + const [loading, setLoading] = useState(false); + + // alertDef holds the form values to be posted + const [alertDef, setAlertDef] = useState(initialValue); + + // initQuery contains initial query when component was mounted + const initQuery = initialValue?.condition?.compositeMetricQuery; + + const [queryCategory, setQueryCategory] = useState( + initQuery?.queryType, + ); + + // local state to handle metric queries + const [metricQueries, setMetricQueries] = useState( + toMetricQueries(initQuery?.builderQueries), + ); + + // local state to handle formula queries + const [formulaQueries, setFormulaQueries] = useState( + toFormulaQueries(initQuery?.builderQueries), + ); + + // local state to handle promql queries + const [promQueries, setPromQueries] = useState({ + ...initQuery?.promQueries, + }); + + // staged query is used to display chart preview + const [stagedQuery, setStagedQuery] = useState(); + const debouncedStagedQuery = useDebounce(stagedQuery, 500); + + // this use effect initiates staged query and + // other queries based on server data. + // useful when fetching of initial values (from api) + // is delayed + useEffect(() => { + const initQuery = initialValue?.condition?.compositeMetricQuery; + const typ = initQuery?.queryType; + + // extract metric query from builderQueries + const mq = toMetricQueries(initQuery?.builderQueries); + + // extract formula query from builderQueries + const fq = toFormulaQueries(initQuery?.builderQueries); + + // prepare staged query + const sq = prepareStagedQuery(typ, mq, fq, initQuery?.promQueries); + const pq = initQuery?.promQueries; + + setQueryCategory(typ); + setMetricQueries(mq); + setFormulaQueries(fq); + setPromQueries(pq); + setStagedQuery(sq); + setAlertDef(initialValue); + }, [initialValue]); + + // this useEffect updates staging query when + // any of its sub-parameters changes + useEffect(() => { + // prepare staged query + const sq: StagedQuery = prepareStagedQuery( + queryCategory, + metricQueries, + formulaQueries, + promQueries, + ); + setStagedQuery(sq); + }, [queryCategory, metricQueries, formulaQueries, promQueries]); + + const onCancelHandler = useCallback(() => { + history.replace(ROUTES.LIST_ALL_ALERT); + }, []); + + // onQueryCategoryChange handles changes to query category + // in state as well as sets additional defaults + const onQueryCategoryChange = (val: EQueryType): void => { + setQueryCategory(val); + if (val === EQueryType.PROM) { + setAlertDef({ + ...alertDef, + condition: { + ...alertDef.condition, + matchType: defaultMatchType, + }, + evalWindow: defaultEvalWindow, + }); + } + }; + + const isFormValid = useCallback((): boolean => { + let retval = true; + + if (!alertDef.alert || alertDef.alert === '') { + notification.error({ + message: 'Error', + description: t('alertname_required'), + }); + return false; + } + + if ( + queryCategory === EQueryType.PROM && + (!promQueries || Object.keys(promQueries).length === 0) + ) { + notification.error({ + message: 'Error', + description: t('promql_required'), + }); + return false; + } + + if ( + (queryCategory === EQueryType.QUERY_BUILDER && !metricQueries) || + Object.keys(metricQueries).length === 0 + ) { + notification.error({ + message: 'Error', + description: t('condition_required'), + }); + return false; + } + + Object.keys(metricQueries).forEach((key) => { + if (metricQueries[key].metricName === '') { + retval = false; + notification.error({ + message: 'Error', + description: t('metricname_missing', { where: metricQueries[key].name }), + }); + } + }); + + Object.keys(formulaQueries).forEach((key) => { + if (formulaQueries[key].expression === '') { + retval = false; + notification.error({ + message: 'Error', + description: t('expression_missing', formulaQueries[key].name), + }); + } + }); + + return retval; + }, [t, alertDef, queryCategory, metricQueries, formulaQueries, promQueries]); + + const saveRule = useCallback(async () => { + if (!isFormValid()) { + return; + } + + const postableAlert: AlertDef = { + ...alertDef, + source: window?.location.toString(), + ruleType: + queryCategory === EQueryType.PROM ? 'promql_rule' : 'threshold_rule', + condition: { + ...alertDef.condition, + compositeMetricQuery: { + builderQueries: prepareBuilderQueries(metricQueries, formulaQueries), + promQueries, + queryType: queryCategory, + }, + }, + }; + + setLoading(true); + try { + const apiReq = + ruleId && ruleId > 0 + ? { data: postableAlert, id: ruleId } + : { data: postableAlert }; + + const response = await saveAlertApi(apiReq); + + if (response.statusCode === 200) { + notification.success({ + message: 'Success', + description: + !ruleId || ruleId === 0 ? t('rule_created') : t('rule_edited'), + }); + console.log('invalidting cache'); + // invalidate rule in cache + ruleCache.invalidateQueries(['ruleId', ruleId]); + + setTimeout(() => { + history.replace(ROUTES.LIST_ALL_ALERT); + }, 2000); + } else { + notification.error({ + message: 'Error', + description: response.error || t('unexpected_error'), + }); + } + } catch (e) { + console.log('save alert api failed:', e); + notification.error({ + message: 'Error', + description: t('unexpected_error'), + }); + } + setLoading(false); + }, [ + t, + isFormValid, + queryCategory, + ruleId, + alertDef, + metricQueries, + formulaQueries, + promQueries, + ruleCache, + ]); + + const onSaveHandler = useCallback(async () => { + const content = ( + + {' '} + {t('confirm_save_content_part1')} {' '} + {t('confirm_save_content_part2')} + + ); + Modal.confirm({ + icon: , + title: t('confirm_save_title'), + centered: true, + content, + onOk() { + saveRule(); + }, + }); + }, [t, saveRule, queryCategory]); + + const renderBasicInfo = (): JSX.Element => ( + + ); + + const renderQBChartPreview = (): JSX.Element => { + return ( + } + name="" + threshold={alertDef.condition?.target} + query={debouncedStagedQuery} + selectedInterval={toChartInterval(alertDef.evalWindow)} + /> + ); + }; + + const renderPromChartPreview = (): JSX.Element => { + return ( + } + name="Chart Preview" + threshold={alertDef.condition?.target} + query={debouncedStagedQuery} + /> + ); + }; + + return ( + <> + {Element} + + + + {queryCategory === EQueryType.QUERY_BUILDER && renderQBChartPreview()} + {queryCategory === EQueryType.PROM && renderPromChartPreview()} + + + + + {renderBasicInfo()} + + } + > + {ruleId > 0 ? t('button_savechanges') : t('button_createrule')} + + + {ruleId === 0 && t('button_cancelchanges')} + {ruleId > 0 && t('button_discard')} + + + + + + + + + + ); +} + +interface FormAlertRuleProps { + formInstance: FormInstance; + initialValue: AlertDef; + ruleId: number; +} + +export default FormAlertRules; diff --git a/frontend/src/container/FormAlertRules/labels/Labels.machine.ts b/frontend/src/container/FormAlertRules/labels/Labels.machine.ts new file mode 100644 index 0000000000..812a498c65 --- /dev/null +++ b/frontend/src/container/FormAlertRules/labels/Labels.machine.ts @@ -0,0 +1,49 @@ +import { createMachine } from 'xstate'; + +export const ResourceAttributesFilterMachine = + /** @xstate-layout N4IgpgJg5mDOIC5QBECGsAWAjA9qgThAAQDKYBAxhkQIIB2xAYgJYA2ALmPgHQAqqUANJgAngGIAcgFEAGr0SgADjljN2zHHQUgAHogAcAFgAM3AOz6ATAEYAzJdsA2Y4cOWAnABoQIxAFpDR2tuQ319AFYTcKdbFycAX3jvNExcAmIySmp6JjZOHn4hUTFNACFWAFd8bWVVdU1tPQQzY1MXY2tDdzNHM3dHd0NvXwR7biMTa313S0i+63DE5PRsPEJScnwqWgYiFg4uPgFhcQAlKRIpeSQQWrUNLRumx3Czbg8TR0sbS31jfUcw38fW47gBHmm4XCVms3SWIBSq3SGyyO1yBx4AHlFFxUOwcPhJLJrkoVPcGk9ENYFuF3i5YR0wtEHECEAEgiEmV8zH1DLYzHZ4Yi0utMltsrt9vluNjcfjCWVKtUbnd6o9QE1rMYBtxbGFvsZ3NrZj1WdYOfotUZLX0XEFHEKViKMpttjk9nlDrL8HiCWJzpcSbcyWrGoh3NCQj0zK53P1ph1WeFLLqnJZ2s5vmZLA6kginWsXaj3VLDoUAGqoSpgEp0cpVGohh5hhDWDy0sz8zruakzamWVm-Qyg362V5-AZOayO1KFlHitEejFHKCV6v+i5XRt1ZuU1s52zjNOOaZfdOWIY+RDZ0Hc6ZmKEXqyLPPCudit2Sz08ACSEFYNbSHI27kuquiIOEjiONwjJgrM3RWJYZisgEIJgnYPTmuEdi2OaiR5nQOAQHA2hvsiH4Sui0qFCcIGhnuLSmP0YJuJ2xjJsmKELG8XZTK0tjdHG06vgW5GupRS7St6vrKqSO4UhqVL8TBWp8o4eqdl0A5Xmy3G6gK56-B4uERDOSKiuJi6lgUAhrhUYB0buimtrEKZBDYrxaS0OZca8+ltheybOI4hivGZzrzp+VGHH+AGOQp4EIHy+ghNYnawtG4TsbYvk8QKfHGAJfQ9uF76WSW37xWBTSGJ0qXpd0vRZdEKGPqC2YeO2-zfO4+HxEAA */ + createMachine({ + tsTypes: {} as import('./Labels.machine.typegen').Typegen0, + initial: 'Idle', + states: { + LabelKey: { + on: { + NEXT: { + actions: 'onSelectLabelValue', + target: 'LabelValue', + }, + onBlur: { + actions: 'onSelectLabelValue', + target: 'LabelValue', + }, + RESET: { + target: 'Idle', + }, + }, + }, + LabelValue: { + on: { + NEXT: { + actions: ['onValidateQuery'], + }, + onBlur: { + actions: ['onValidateQuery'], + // target: 'Idle', + }, + RESET: { + target: 'Idle', + }, + }, + }, + Idle: { + on: { + NEXT: { + actions: 'onSelectLabelKey', + description: 'Enter a label key', + target: 'LabelKey', + }, + }, + }, + }, + id: 'Label Key Values', + }); diff --git a/frontend/src/container/FormAlertRules/labels/Labels.machine.typegen.ts b/frontend/src/container/FormAlertRules/labels/Labels.machine.typegen.ts new file mode 100644 index 0000000000..f31469f659 --- /dev/null +++ b/frontend/src/container/FormAlertRules/labels/Labels.machine.typegen.ts @@ -0,0 +1,25 @@ +// This file was automatically generated. Edits will be overwritten + +export interface Typegen0 { + '@@xstate/typegen': true; + eventsCausingActions: { + onSelectLabelValue: 'NEXT' | 'onBlur'; + onValidateQuery: 'NEXT' | 'onBlur'; + onSelectLabelKey: 'NEXT'; + }; + internalEvents: { + 'xstate.init': { type: 'xstate.init' }; + }; + invokeSrcNameMap: {}; + missingImplementations: { + actions: 'onSelectLabelValue' | 'onValidateQuery' | 'onSelectLabelKey'; + services: never; + guards: never; + delays: never; + }; + eventsCausingServices: {}; + eventsCausingGuards: {}; + eventsCausingDelays: {}; + matchesStates: 'LabelKey' | 'LabelValue' | 'Idle'; + tags: never; +} diff --git a/frontend/src/container/FormAlertRules/labels/QueryChip.tsx b/frontend/src/container/FormAlertRules/labels/QueryChip.tsx new file mode 100644 index 0000000000..47e4c956ff --- /dev/null +++ b/frontend/src/container/FormAlertRules/labels/QueryChip.tsx @@ -0,0 +1,26 @@ +import React from 'react'; + +import { QueryChipContainer, QueryChipItem } from './styles'; +import { ILabelRecord } from './types'; + +interface QueryChipProps { + queryData: ILabelRecord; + onRemove: (id: string) => void; +} + +export default function QueryChip({ + queryData, + onRemove, +}: QueryChipProps): JSX.Element { + const { key, value } = queryData; + return ( + + onRemove(key)} + > + {key}: {value} + + + ); +} diff --git a/frontend/src/container/FormAlertRules/labels/index.tsx b/frontend/src/container/FormAlertRules/labels/index.tsx new file mode 100644 index 0000000000..98fd564cbd --- /dev/null +++ b/frontend/src/container/FormAlertRules/labels/index.tsx @@ -0,0 +1,164 @@ +import { + CloseCircleFilled, + ExclamationCircleOutlined, +} from '@ant-design/icons'; +import { useMachine } from '@xstate/react'; +import { Button, Input, message, Modal } from 'antd'; +import { map } from 'lodash-es'; +import React, { useCallback, useEffect, useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import { useSelector } from 'react-redux'; +import { AppState } from 'store/reducers'; +import { Labels } from 'types/api/alerts/def'; +import AppReducer from 'types/reducer/app'; +import { v4 as uuid } from 'uuid'; + +import { ResourceAttributesFilterMachine } from './Labels.machine'; +import QueryChip from './QueryChip'; +import { QueryChipItem, SearchContainer } from './styles'; +import { ILabelRecord } from './types'; +import { createQuery, flattenLabels, prepareLabels } from './utils'; + +interface LabelSelectProps { + onSetLabels: (q: Labels) => void; + initialValues: Labels | undefined; +} + +function LabelSelect({ + onSetLabels, + initialValues, +}: LabelSelectProps): JSX.Element | null { + const { t } = useTranslation('alerts'); + const { isDarkMode } = useSelector((state) => state.app); + const [currentVal, setCurrentVal] = useState(''); + const [staging, setStaging] = useState([]); + const [queries, setQueries] = useState( + initialValues ? flattenLabels(initialValues) : [], + ); + + const dispatchChanges = (updatedRecs: ILabelRecord[]): void => { + onSetLabels(prepareLabels(updatedRecs, initialValues)); + setQueries(updatedRecs); + }; + + const [state, send] = useMachine(ResourceAttributesFilterMachine, { + actions: { + onSelectLabelKey: () => {}, + onSelectLabelValue: () => { + if (currentVal !== '') { + setStaging((prevState) => [...prevState, currentVal]); + } else { + return; + } + setCurrentVal(''); + }, + onValidateQuery: (): void => { + if (currentVal === '') { + return; + } + + const generatedQuery = createQuery([...staging, currentVal]); + + if (generatedQuery) { + dispatchChanges([...queries, generatedQuery]); + setStaging([]); + setCurrentVal(''); + send('RESET'); + } + }, + }, + }); + + const handleFocus = (): void => { + if (state.value === 'Idle') { + send('NEXT'); + } + }; + + const handleBlur = useCallback((): void => { + if (staging.length === 1 && staging[0] !== undefined) { + send('onBlur'); + } + }, [send, staging]); + + useEffect(() => { + handleBlur(); + }, [handleBlur]); + + const handleChange = (e: React.ChangeEvent): void => { + setCurrentVal(e.target?.value); + }; + + const handleClose = (key: string): void => { + dispatchChanges(queries.filter((queryData) => queryData.key !== key)); + }; + + const handleClearAll = (): void => { + Modal.confirm({ + title: 'Confirm', + icon: , + content: t('remove_label_confirm'), + onOk() { + send('RESET'); + dispatchChanges([]); + setStaging([]); + message.success(t('remove_label_success')); + }, + okText: t('button_yes'), + cancelText: t('button_no'), + }); + }; + const renderPlaceholder = useCallback((): string => { + if (state.value === 'LabelKey') return 'Enter a label key then press ENTER.'; + if (state.value === 'LabelValue') + return `Enter a value for label key(${staging[0]}) then press ENTER.`; + return t('placeholder_label_key_pair'); + }, [t, state, staging]); + return ( + +
+ {queries.length > 0 && + map( + queries, + (query): JSX.Element => { + return ( + + ); + }, + )} +
+
+ {map(staging, (item) => { + return {item}; + })} +
+ +
+ { + if (e.key === 'Enter' || e.code === 'Enter') { + send('NEXT'); + } + }} + bordered={false} + value={currentVal as never} + style={{ flex: 1 }} + onFocus={handleFocus} + onBlur={handleBlur} + /> + + {queries.length || staging.length || currentVal ? ( +
+
+ ); +} + +export default LabelSelect; diff --git a/frontend/src/container/FormAlertRules/labels/styles.ts b/frontend/src/container/FormAlertRules/labels/styles.ts new file mode 100644 index 0000000000..04d6871315 --- /dev/null +++ b/frontend/src/container/FormAlertRules/labels/styles.ts @@ -0,0 +1,35 @@ +import { grey } from '@ant-design/colors'; +import { Tag } from 'antd'; +import styled from 'styled-components'; + +interface SearchContainerProps { + isDarkMode: boolean; + disabled: boolean; +} + +export const SearchContainer = styled.div` + width: 70%; + border-radisu: 4px; + background: ${({ isDarkMode }): string => (isDarkMode ? '#000' : '#fff')}; + flex: 1; + display: flex; + flex-direction: column; + padding: 0.2rem; + border: 1px solid #ccc5; + ${({ disabled }): string => (disabled ? `cursor: not-allowed;` : '')} +`; + +export const QueryChipContainer = styled.span` + display: flex; + align-items: center; + margin-right: 0.5rem; + &:hover { + & > * { + background: ${grey.primary}44; + } + } +`; + +export const QueryChipItem = styled(Tag)` + margin-right: 0.1rem; +`; diff --git a/frontend/src/container/FormAlertRules/labels/types.ts b/frontend/src/container/FormAlertRules/labels/types.ts new file mode 100644 index 0000000000..b10fc3fded --- /dev/null +++ b/frontend/src/container/FormAlertRules/labels/types.ts @@ -0,0 +1,9 @@ +export interface ILabelRecord { + key: string; + value: string; +} + +export interface IOption { + label: string; + value: string; +} diff --git a/frontend/src/container/FormAlertRules/labels/utils.ts b/frontend/src/container/FormAlertRules/labels/utils.ts new file mode 100644 index 0000000000..1a2943f3ee --- /dev/null +++ b/frontend/src/container/FormAlertRules/labels/utils.ts @@ -0,0 +1,54 @@ +import { Labels } from 'types/api/alerts/def'; + +import { ILabelRecord } from './types'; + +const hiddenLabels = ['severity', 'description']; + +export const createQuery = ( + selectedItems: Array = [], +): ILabelRecord | null => { + if (selectedItems.length === 2) { + return { + key: selectedItems[0] as string, + value: selectedItems[1] as string, + }; + } + return null; +}; + +export const flattenLabels = (labels: Labels): ILabelRecord[] => { + const recs: ILabelRecord[] = []; + + Object.keys(labels).forEach((key) => { + if (!hiddenLabels.includes(key)) { + recs.push({ + key, + value: labels[key], + }); + } + }); + + return recs; +}; + +export const prepareLabels = ( + recs: ILabelRecord[], + alertLabels: Labels | undefined, +): Labels => { + const labels: Labels = {}; + + recs.forEach((rec) => { + if (!hiddenLabels.includes(rec.key)) { + labels[rec.key] = rec.value; + } + }); + if (alertLabels) { + Object.keys(alertLabels).forEach((key) => { + if (hiddenLabels.includes(key)) { + labels[key] = alertLabels[key]; + } + }); + } + + return labels; +}; diff --git a/frontend/src/container/FormAlertRules/styles.ts b/frontend/src/container/FormAlertRules/styles.ts new file mode 100644 index 0000000000..3c64414cbe --- /dev/null +++ b/frontend/src/container/FormAlertRules/styles.ts @@ -0,0 +1,103 @@ +import { Button, Card, Col, Form, Input, InputNumber, Row, Select } from 'antd'; +import TextArea from 'antd/lib/input/TextArea'; +import styled from 'styled-components'; + +export const PanelContainer = styled(Row)` + flex-wrap: nowrap; +`; + +export const StyledRightContainer = styled(Col)` + &&& { + } +`; + +export const StyledLeftContainer = styled(Col)` + &&& { + margin-right: 1rem; + } +`; + +export const MainFormContainer = styled(Form)``; + +export const ButtonContainer = styled.div` + &&& { + display: flex; + justify-content: flex-start; + align-items: center; + margin-top: 1rem; + margin-bottom: 3rem; + } +`; + +export const ActionButton = styled(Button)` + margin-right: 1rem; +`; + +export const QueryButton = styled(Button)` + &&& { + display: flex; + align-items: center; + margin-right: 1rem; + } +`; + +export const QueryContainer = styled(Card)` + &&& { + margin-top: 1rem; + min-height: 23.5%; + } +`; + +export const Container = styled.div` + margin-top: 1rem; + display: flex; + flex-direction: column; +`; + +export const StepHeading = styled.p` + margin-top: 1rem; + font-weight: bold; +`; + +export const InlineSelect = styled(Select)` + display: inline-block; + width: 10% !important; + margin-left: 0.2em; + margin-right: 0.2em; +`; + +export const SeveritySelect = styled(Select)` + width: 15% !important; +`; + +export const InputSmall = styled(Input)` + width: 40% !important; +`; + +export const FormContainer = styled.div` + padding: 2em; + margin-top: 1rem; + display: flex; + flex-direction: column; + background: #141414; + border-radius: 4px; + border: 1px solid #303030; +`; + +export const ThresholdInput = styled(InputNumber)` + & > div { + display: flex; + align-items: center; + & > .ant-input-number-group-addon { + width: 130px; + } + & > .ant-input-number { + width: 50%; + margin-left: 1em; + } + } +`; + +export const TextareaMedium = styled(TextArea)` + width: 70%; +`; diff --git a/frontend/src/container/FormAlertRules/useDebounce.js b/frontend/src/container/FormAlertRules/useDebounce.js new file mode 100644 index 0000000000..e430f55d63 --- /dev/null +++ b/frontend/src/container/FormAlertRules/useDebounce.js @@ -0,0 +1,31 @@ +/* eslint-disable */ +// @ts-ignore +// @ts-nocheck + +import { useEffect, useState } from 'react'; + +// see https://github.com/tannerlinsley/react-query/issues/293 +// see https://usehooks.com/useDebounce/ +export default function useDebounce(value, delay) { + // State and setters for debounced value + const [debouncedValue, setDebouncedValue] = useState(value); + + useEffect( + () => { + // Update debounced value after delay + const handler = setTimeout(() => { + setDebouncedValue(value); + }, delay); + + // Cancel the timeout if value changes (also on delay change or unmount) + // This is how we prevent debounced value from updating if value is changed ... + // .. within the delay period. Timeout gets cleared and restarted. + return () => { + clearTimeout(handler); + }; + }, + [value, delay] // Only re-call effect if value or delay changes + ); + + return debouncedValue; +} diff --git a/frontend/src/container/FormAlertRules/utils.ts b/frontend/src/container/FormAlertRules/utils.ts new file mode 100644 index 0000000000..ea40ce692b --- /dev/null +++ b/frontend/src/container/FormAlertRules/utils.ts @@ -0,0 +1,136 @@ +import { Time } from 'container/TopNav/DateTimeSelection/config'; +import { + IBuilderQueries, + IFormulaQueries, + IFormulaQuery, + IMetricQueries, + IMetricQuery, + IPromQueries, + IPromQuery, +} from 'types/api/alerts/compositeQuery'; +import { + IMetricsBuilderQuery, + Query as IStagedQuery, +} from 'types/api/dashboard/getAll'; +import { EQueryType } from 'types/common/dashboard'; + +export const toFormulaQueries = (b: IBuilderQueries): IFormulaQueries => { + const f: IFormulaQueries = {}; + if (!b) return f; + Object.keys(b).forEach((key) => { + if (key === 'F1') { + f[key] = b[key] as IFormulaQuery; + } + }); + + return f; +}; + +export const toMetricQueries = (b: IBuilderQueries): IMetricQueries => { + const m: IMetricQueries = {}; + if (!b) return m; + Object.keys(b).forEach((key) => { + if (key !== 'F1') { + m[key] = b[key] as IMetricQuery; + } + }); + + return m; +}; + +export const toIMetricsBuilderQuery = ( + q: IMetricQuery, +): IMetricsBuilderQuery => { + return { + name: q.name, + metricName: q.metricName, + tagFilters: q.tagFilters, + groupBy: q.groupBy, + aggregateOperator: q.aggregateOperator, + disabled: q.disabled, + legend: q.legend, + }; +}; + +export const prepareBuilderQueries = ( + m: IMetricQueries, + f: IFormulaQueries, +): IBuilderQueries => { + if (!m) return {}; + const b: IBuilderQueries = { + ...m, + }; + + Object.keys(f).forEach((key) => { + b[key] = { + ...f[key], + aggregateOperator: undefined, + metricName: '', + }; + }); + return b; +}; + +export const prepareStagedQuery = ( + t: EQueryType, + m: IMetricQueries, + f: IFormulaQueries, + p: IPromQueries, +): IStagedQuery => { + const qbList: IMetricQuery[] = []; + const formulaList: IFormulaQuery[] = []; + const promList: IPromQuery[] = []; + + // convert map[string]IMetricQuery to IMetricQuery[] + if (m) { + Object.keys(m).forEach((key) => { + qbList.push(m[key]); + }); + } + + // convert map[string]IFormulaQuery to IFormulaQuery[] + if (f) { + Object.keys(f).forEach((key) => { + formulaList.push(f[key]); + }); + } + + // convert map[string]IPromQuery to IPromQuery[] + if (p) { + Object.keys(p).forEach((key) => { + promList.push({ ...p[key], name: key }); + }); + } + + return { + queryType: t, + promQL: promList, + metricsBuilder: { + formulas: formulaList, + queryBuilder: qbList, + }, + clickHouse: [], + }; +}; + +// toChartInterval converts eval window to chart selection time interval +export const toChartInterval = (evalWindow: string | undefined): Time => { + switch (evalWindow) { + case '5m0s': + return '5min'; + case '10m0s': + return '10min'; + case '15m0s': + return '15min'; + case '30m0s': + return '30min'; + case '60m0s': + return '30min'; + case '4h0m0s': + return '4hr'; + case '24h0m0s': + return '1day'; + default: + return '5min'; + } +}; diff --git a/frontend/src/container/GantChart/SpanName/index.tsx b/frontend/src/container/GantChart/SpanName/index.tsx index 47d58c3e5c..7f536624b9 100644 --- a/frontend/src/container/GantChart/SpanName/index.tsx +++ b/frontend/src/container/GantChart/SpanName/index.tsx @@ -10,7 +10,7 @@ function SpanNameComponent({ {name} - {serviceName} + {serviceName} ); diff --git a/frontend/src/container/GantChart/SpanName/styles.ts b/frontend/src/container/GantChart/SpanName/styles.ts index 642e28f639..abd41dc54e 100644 --- a/frontend/src/container/GantChart/SpanName/styles.ts +++ b/frontend/src/container/GantChart/SpanName/styles.ts @@ -9,7 +9,7 @@ export const Span = styled(Typography.Paragraph)` } `; -export const Service = styled(Typography)` +export const Service = styled(Typography.Paragraph)` &&& { color: #acacac; font-size: 0.75rem; diff --git a/frontend/src/container/GantChart/Trace/index.tsx b/frontend/src/container/GantChart/Trace/index.tsx index db607092eb..d6982e04e6 100644 --- a/frontend/src/container/GantChart/Trace/index.tsx +++ b/frontend/src/container/GantChart/Trace/index.tsx @@ -39,6 +39,7 @@ function Trace(props: TraceProps): JSX.Element { isExpandAll, intervalUnit, children, + isMissing, } = props; const { isDarkMode } = useThemeMode(); @@ -125,7 +126,7 @@ function Trace(props: TraceProps): JSX.Element { isDarkMode={isDarkMode} /> - +
@@ -174,6 +175,7 @@ function Trace(props: TraceProps): JSX.Element { activeSpanPath={activeSpanPath} isExpandAll={isExpandAll} intervalUnit={intervalUnit} + isMissing={child.isMissing} /> ))} @@ -182,6 +184,10 @@ function Trace(props: TraceProps): JSX.Element { ); } +Trace.defaultProps = { + isMissing: false, +}; + interface ITraceGlobal { globalSpread: ITraceMetaData['spread']; globalStart: ITraceMetaData['globalStart']; @@ -196,6 +202,7 @@ interface TraceProps extends ITraceTree, ITraceGlobal { activeSpanPath: string[]; isExpandAll: boolean; intervalUnit: IIntervalUnit; + isMissing?: boolean; } export default Trace; diff --git a/frontend/src/container/GantChart/Trace/styles.ts b/frontend/src/container/GantChart/Trace/styles.ts index ccf139d6c2..a85eec454c 100644 --- a/frontend/src/container/GantChart/Trace/styles.ts +++ b/frontend/src/container/GantChart/Trace/styles.ts @@ -1,3 +1,4 @@ +import { volcano } from '@ant-design/colors'; import styled, { css, DefaultTheme, @@ -15,7 +16,6 @@ export const Wrapper = styled.ul` padding-top: 0.5rem; position: relative; z-index: 1; - ul { border-left: ${({ isOnlyChild }): StyledCSS => isOnlyChild && 'none'} !important; @@ -36,10 +36,14 @@ export const Wrapper = styled.ul` } `; -export const CardContainer = styled.li` +export const CardContainer = styled.li<{ isMissing?: boolean }>` display: flex; width: 100%; cursor: pointer; + border-radius: 0.25rem; + z-index: 2; + ${({ isMissing }): string => + isMissing ? `border: 1px dashed ${volcano[6]} !important;` : ''} `; interface Props { diff --git a/frontend/src/container/GantChart/index.tsx b/frontend/src/container/GantChart/index.tsx index a25f4af228..dbe707c2d7 100644 --- a/frontend/src/container/GantChart/index.tsx +++ b/frontend/src/container/GantChart/index.tsx @@ -3,7 +3,7 @@ import { IIntervalUnit } from 'container/TraceDetail/utils'; import React, { useEffect, useState } from 'react'; import { ITraceTree } from 'types/api/trace/getTraceItem'; -import { CardContainer, CardWrapper, CollapseButton, Wrapper } from './styles'; +import { CardContainer, CardWrapper, CollapseButton } from './styles'; import Trace from './Trace'; import { getSpanPath } from './utils'; @@ -36,35 +36,33 @@ function GanttChart(props: GanttChartProps): JSX.Element { setIsExpandAll((prev) => !prev); }; return ( - - - - {isExpandAll ? : } - - - - - - + + + {isExpandAll ? : } + + + + + ); } diff --git a/frontend/src/container/GantChart/styles.ts b/frontend/src/container/GantChart/styles.ts index 4d523c4998..6f05611599 100644 --- a/frontend/src/container/GantChart/styles.ts +++ b/frontend/src/container/GantChart/styles.ts @@ -38,6 +38,7 @@ export const CardWrapper = styled.div` export const CardContainer = styled.li` display: flex; width: 100%; + position: relative; `; export const CollapseButton = styled.div` diff --git a/frontend/src/container/GantChart/utils.ts b/frontend/src/container/GantChart/utils.ts index d229af9839..c91564b3e2 100644 --- a/frontend/src/container/GantChart/utils.ts +++ b/frontend/src/container/GantChart/utils.ts @@ -1,4 +1,5 @@ -import { ITraceTree } from 'types/api/trace/getTraceItem'; +import { set } from 'lodash-es'; +import { ITraceForest, ITraceTree } from 'types/api/trace/getTraceItem'; interface GetTraceMetaData { globalStart: number; @@ -65,25 +66,48 @@ export function getTopLeftFromBody( export const getNodeById = ( searchingId: string, - treeData: ITraceTree, -): ITraceTree | undefined => { - let foundNode: ITraceTree | undefined; - const traverse = (treeNode: ITraceTree, level = 0): void => { + treesData: ITraceForest | undefined, +): ITraceForest => { + const newtreeData: ITraceForest = {} as ITraceForest; + + const traverse = ( + treeNode: ITraceTree, + setCallBack: (arg0: ITraceTree) => void, + level = 0, + ): void => { if (!treeNode) { return; } if (searchingId === treeNode.id) { - foundNode = treeNode; + setCallBack(treeNode); } treeNode.children.forEach((childNode) => { - traverse(childNode, level + 1); + traverse(childNode, setCallBack, level + 1); }); }; - traverse(treeData, 1); - return foundNode; + const spanTreeSetCallback = ( + path: (keyof ITraceForest)[], + value: ITraceTree, + ): ITraceForest => set(newtreeData, path, [value]); + + if (treesData?.spanTree) + treesData.spanTree.forEach((tree) => { + traverse(tree, (value) => spanTreeSetCallback(['spanTree'], value), 1); + }); + + if (treesData?.missingSpanTree) + treesData.missingSpanTree.forEach((tree) => { + traverse( + tree, + (value) => spanTreeSetCallback(['missingSpanTree'], value), + 1, + ); + }); + + return newtreeData; }; const getSpanWithoutChildren = ( diff --git a/frontend/src/container/GridGraphComponent/index.tsx b/frontend/src/container/GridGraphComponent/index.tsx index d2139b1a08..3a1b84e963 100644 --- a/frontend/src/container/GridGraphComponent/index.tsx +++ b/frontend/src/container/GridGraphComponent/index.tsx @@ -1,6 +1,6 @@ import { Typography } from 'antd'; import { ChartData } from 'chart.js'; -import Graph, { GraphOnClickHandler } from 'components/Graph'; +import Graph, { GraphOnClickHandler, StaticLineProps } from 'components/Graph'; import { getYAxisFormattedValue } from 'components/Graph/yAxisConfig'; import ValueGraph from 'components/ValueGraph'; import { GRAPH_TYPES } from 'container/NewDashboard/ComponentsSlider'; @@ -18,6 +18,7 @@ function GridGraphComponent({ onClickHandler, name, yAxisUnit, + staticLine, }: GridGraphComponentProps): JSX.Element | null { const location = history.location.pathname; @@ -36,6 +37,7 @@ function GridGraphComponent({ onClickHandler, name, yAxisUnit, + staticLine, }} /> ); @@ -82,6 +84,7 @@ export interface GridGraphComponentProps { onClickHandler?: GraphOnClickHandler; name: string; yAxisUnit?: string; + staticLine?: StaticLineProps; } GridGraphComponent.defaultProps = { @@ -90,6 +93,7 @@ GridGraphComponent.defaultProps = { isStacked: undefined, onClickHandler: undefined, yAxisUnit: undefined, + staticLine: undefined, }; export default GridGraphComponent; diff --git a/frontend/src/container/ListAlertRules/ListAlert.tsx b/frontend/src/container/ListAlertRules/ListAlert.tsx index b851b0829a..4df6290725 100644 --- a/frontend/src/container/ListAlertRules/ListAlert.tsx +++ b/frontend/src/container/ListAlertRules/ListAlert.tsx @@ -64,9 +64,14 @@ function ListAlert({ allAlertRules, refetch }: ListAlertProps): JSX.Element { }, { title: 'Alert Name', - dataIndex: 'name', + dataIndex: 'alert', key: 'name', sorter: (a, b): number => a.name.charCodeAt(0) - b.name.charCodeAt(0), + render: (value, record): JSX.Element => ( + onEditHandler(record.id.toString())}> + {value} + + ), }, { title: 'Severity', @@ -83,7 +88,7 @@ function ListAlert({ allAlertRules, refetch }: ListAlertProps): JSX.Element { }, }, { - title: 'Tags', + title: 'Labels', dataIndex: 'labels', key: 'tags', align: 'center', @@ -100,7 +105,7 @@ function ListAlert({ allAlertRules, refetch }: ListAlertProps): JSX.Element { {withOutSeverityKeys.map((e) => { return ( - {e} + {e}: {value[e]} ); })} diff --git a/frontend/src/container/MetricsApplication/Tabs/DBCall.tsx b/frontend/src/container/MetricsApplication/Tabs/DBCall.tsx index 60441b7876..2c14c099f7 100644 --- a/frontend/src/container/MetricsApplication/Tabs/DBCall.tsx +++ b/frontend/src/container/MetricsApplication/Tabs/DBCall.tsx @@ -25,7 +25,7 @@ function DBCall({ getWidget }: DBCallProps): JSX.Element { fullViewOptions={false} widget={getWidget([ { - query: `sum(rate(signoz_db_latency_count{service_name="${servicename}"${resourceAttributePromQLQuery}}[1m])) by (db_system)`, + query: `sum(rate(signoz_db_latency_count{service_name="${servicename}"${resourceAttributePromQLQuery}}[5m])) by (db_system)`, legend: '{{db_system}}', }, ])} diff --git a/frontend/src/container/MetricsApplication/Tabs/External.tsx b/frontend/src/container/MetricsApplication/Tabs/External.tsx index 9811e2f269..4fd039e979 100644 --- a/frontend/src/container/MetricsApplication/Tabs/External.tsx +++ b/frontend/src/container/MetricsApplication/Tabs/External.tsx @@ -14,7 +14,7 @@ function External({ getWidget }: ExternalProps): JSX.Element { const { resourceAttributePromQLQuery } = useSelector( (state) => state.metrics, ); - const legend = '{{http_url}}'; + const legend = '{{address}}'; return ( <> @@ -28,7 +28,7 @@ function External({ getWidget }: ExternalProps): JSX.Element { fullViewOptions={false} widget={getWidget([ { - query: `max((sum(rate(signoz_external_call_latency_count{service_name="${servicename}", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[1m]) OR rate(signoz_external_call_latency_count{service_name="${servicename}", http_status_code=~"5.."${resourceAttributePromQLQuery}}[1m]) OR vector(0)) by (http_url))*100/sum(rate(signoz_external_call_latency_count{service_name="${servicename}"${resourceAttributePromQLQuery}}[1m])) by (http_url)) < 1000 OR vector(0)`, + query: `max((sum(rate(signoz_external_call_latency_count{service_name="${servicename}", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[5m]) OR vector(0)) by (address))*100/sum(rate(signoz_external_call_latency_count{service_name="${servicename}"${resourceAttributePromQLQuery}}[5m])) by (address)) < 1000 OR vector(0)`, legend: 'External Call Error Percentage', }, ])} @@ -68,7 +68,7 @@ function External({ getWidget }: ExternalProps): JSX.Element { fullViewOptions={false} widget={getWidget([ { - query: `sum(rate(signoz_external_call_latency_count{service_name="${servicename}"${resourceAttributePromQLQuery}}[5m])) by (http_url)`, + query: `sum(rate(signoz_external_call_latency_count{service_name="${servicename}"${resourceAttributePromQLQuery}}[5m])) by (address)`, legend, }, ])} @@ -87,7 +87,7 @@ function External({ getWidget }: ExternalProps): JSX.Element { fullViewOptions={false} widget={getWidget([ { - query: `(sum(rate(signoz_external_call_latency_sum{service_name="${servicename}"${resourceAttributePromQLQuery}}[5m])) by (http_url))/(sum(rate(signoz_external_call_latency_count{service_name="${servicename}"${resourceAttributePromQLQuery}}[5m])) by (http_url))`, + query: `(sum(rate(signoz_external_call_latency_sum{service_name="${servicename}"${resourceAttributePromQLQuery}}[5m])) by (address))/(sum(rate(signoz_external_call_latency_count{service_name="${servicename}"${resourceAttributePromQLQuery}}[5m])) by (address))`, legend, }, ])} diff --git a/frontend/src/container/MetricsApplication/Tabs/Overview.tsx b/frontend/src/container/MetricsApplication/Tabs/Overview.tsx index a53714d05d..803ed91bcc 100644 --- a/frontend/src/container/MetricsApplication/Tabs/Overview.tsx +++ b/frontend/src/container/MetricsApplication/Tabs/Overview.tsx @@ -193,7 +193,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element { }} widget={getWidget([ { - query: `sum(rate(signoz_latency_count{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[2m]))`, + query: `sum(rate(signoz_latency_count{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[5m]))`, legend: 'Requests', }, ])} @@ -227,7 +227,7 @@ function Application({ getWidget }: DashboardProps): JSX.Element { }} widget={getWidget([ { - query: `max(sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[1m]) OR rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", http_status_code=~"5.."${resourceAttributePromQLQuery}}[1m]))*100/sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[1m]))) < 1000 OR vector(0)`, + query: `max(sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", status_code="STATUS_CODE_ERROR"${resourceAttributePromQLQuery}}[5m]) OR rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER", http_status_code=~"5.."${resourceAttributePromQLQuery}}[5m]))*100/sum(rate(signoz_calls_total{service_name="${servicename}", span_kind="SPAN_KIND_SERVER"${resourceAttributePromQLQuery}}[5m]))) < 1000 OR vector(0)`, legend: 'Error Percentage', }, ])} diff --git a/frontend/src/container/MetricsTable/index.tsx b/frontend/src/container/MetricsTable/index.tsx index ff700da83a..cc0778c80e 100644 --- a/frontend/src/container/MetricsTable/index.tsx +++ b/frontend/src/container/MetricsTable/index.tsx @@ -56,7 +56,7 @@ function Metrics(): JSX.Element { render: (value: number): string => (value / 1000000).toFixed(2), }, { - title: 'Error Rate (in %)', + title: 'Error Rate (% of requests)', dataIndex: 'errorRate', key: 'errorRate', sorter: (a: DataProps, b: DataProps): number => a.errorRate - b.errorRate, diff --git a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/index.tsx b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/index.tsx index 1d96297d04..55adbd740b 100644 --- a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/index.tsx +++ b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/index.tsx @@ -29,15 +29,15 @@ function PromQLQueryContainer({ toggleDelete, }: IPromQLQueryHandleChange): void => { const allQueries = queryData[WIDGET_PROMQL_QUERY_KEY_NAME]; - const currentIndexQuery = allQueries[queryIndex]; - if (query) currentIndexQuery.query = query; - if (legend) currentIndexQuery.legend = legend; + const currentIndexQuery = allQueries[queryIndex as number]; + if (query !== undefined) currentIndexQuery.query = query; + if (legend !== undefined) currentIndexQuery.legend = legend; if (toggleDisable) { currentIndexQuery.disabled = !currentIndexQuery.disabled; } if (toggleDelete) { - allQueries.splice(queryIndex, 1); + allQueries.splice(queryIndex as number, 1); } updateQueryData({ updatedQuery: { ...queryData } }); }; diff --git a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/query.tsx b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/query.tsx index 1a6dd2f9d2..6cffd55d8d 100644 --- a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/query.tsx +++ b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/query.tsx @@ -7,7 +7,7 @@ import { IPromQLQueryHandleChange } from './types'; interface IPromQLQueryBuilderProps { queryData: IPromQLQuery; - queryIndex: number; + queryIndex: number | string; handleQueryChange: (args: IPromQLQueryHandleChange) => void; } diff --git a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/types.ts b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/types.ts index f1c88dd488..668a0c1f87 100644 --- a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/types.ts +++ b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/promQL/types.ts @@ -1,7 +1,7 @@ import { IPromQLQuery } from 'types/api/dashboard/getAll'; export interface IPromQLQueryHandleChange { - queryIndex: number; + queryIndex: number | string; query?: IPromQLQuery['query']; legend?: IPromQLQuery['legend']; toggleDisable?: IPromQLQuery['disabled']; diff --git a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/formula.tsx b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/formula.tsx index 5be08f044e..02bc41198c 100644 --- a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/formula.tsx +++ b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/formula.tsx @@ -9,7 +9,7 @@ const { TextArea } = Input; interface IMetricsBuilderFormulaProps { formulaData: IMetricsBuilderFormula; - formulaIndex: number; + formulaIndex: number | string; handleFormulaChange: (args: IQueryBuilderFormulaHandleChange) => void; } function MetricsBuilderFormula({ diff --git a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/index.tsx b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/index.tsx index fd5d7f32ad..fdb6d4b7bc 100644 --- a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/index.tsx +++ b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/index.tsx @@ -50,12 +50,12 @@ function QueryBuilderQueryContainer({ }: IQueryBuilderQueryHandleChange): void => { const allQueries = queryData[WIDGET_QUERY_BUILDER_QUERY_KEY_NAME].queryBuilder; - const currentIndexQuery = allQueries[queryIndex]; + const currentIndexQuery = allQueries[queryIndex as number]; if (aggregateFunction) { currentIndexQuery.aggregateOperator = aggregateFunction; } - if (metricName) { + if (metricName !== undefined) { currentIndexQuery.metricName = metricName; } @@ -78,7 +78,7 @@ function QueryBuilderQueryContainer({ currentIndexQuery.disabled = !currentIndexQuery.disabled; } if (toggleDelete) { - allQueries.splice(queryIndex, 1); + allQueries.splice(queryIndex as number, 1); } updateQueryData({ updatedQuery: { ...queryData } }); }; @@ -92,7 +92,7 @@ function QueryBuilderQueryContainer({ queryData[WIDGET_QUERY_BUILDER_QUERY_KEY_NAME][ WIDGET_QUERY_BUILDER_FORMULA_KEY_NAME ]; - const currentIndexFormula = allFormulas[formulaIndex]; + const currentIndexFormula = allFormulas[formulaIndex as number]; if (expression) { currentIndexFormula.expression = expression; @@ -103,7 +103,7 @@ function QueryBuilderQueryContainer({ } if (toggleDelete) { - allFormulas.splice(formulaIndex, 1); + allFormulas.splice(formulaIndex as number, 1); } updateQueryData({ updatedQuery: { ...queryData } }); }; diff --git a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/query.tsx b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/query.tsx index fccf108b41..8f171baa3c 100644 --- a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/query.tsx +++ b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/query.tsx @@ -15,7 +15,7 @@ import { IQueryBuilderQueryHandleChange } from './types'; const { Option } = Select; interface IMetricsBuilderProps { - queryIndex: number; + queryIndex: number | string; selectedGraph: GRAPH_TYPES; queryData: IMetricsBuilderQuery; handleQueryChange: (args: IQueryBuilderQueryHandleChange) => void; diff --git a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/types.ts b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/types.ts index 8d177cffd8..c577b8d123 100644 --- a/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/types.ts +++ b/frontend/src/container/NewWidget/LeftContainer/QuerySection/QueryBuilder/queryBuilder/types.ts @@ -4,7 +4,7 @@ import { } from 'types/api/dashboard/getAll'; export interface IQueryBuilderQueryHandleChange { - queryIndex: number; + queryIndex: number | string; aggregateFunction?: IMetricsBuilderQuery['aggregateOperator']; metricName?: IMetricsBuilderQuery['metricName']; tagFilters?: IMetricsBuilderQuery['tagFilters']['items']; @@ -16,7 +16,7 @@ export interface IQueryBuilderQueryHandleChange { } export interface IQueryBuilderFormulaHandleChange { - formulaIndex: number; + formulaIndex: number | string; expression?: IMetricsBuilderFormula['expression']; toggleDisable?: IMetricsBuilderFormula['disabled']; toggleDelete?: boolean; diff --git a/frontend/src/container/TopNav/DateTimeSelection/config.ts b/frontend/src/container/TopNav/DateTimeSelection/config.ts index 29d031e25b..69bdde40c7 100644 --- a/frontend/src/container/TopNav/DateTimeSelection/config.ts +++ b/frontend/src/container/TopNav/DateTimeSelection/config.ts @@ -1,20 +1,24 @@ import ROUTES from 'constants/routes'; type FiveMin = '5min'; +type TenMin = '10min'; type FifteenMin = '15min'; type ThirtyMin = '30min'; type OneMin = '1min'; type SixHour = '6hr'; type OneHour = '1hr'; +type FourHour = '4hr'; type OneDay = '1day'; type OneWeek = '1week'; type Custom = 'custom'; export type Time = | FiveMin + | TenMin | FifteenMin | ThirtyMin | OneMin + | FourHour | SixHour | OneHour | Custom diff --git a/frontend/src/container/TopNav/index.tsx b/frontend/src/container/TopNav/index.tsx index ffd1b28175..ddf10023a3 100644 --- a/frontend/src/container/TopNav/index.tsx +++ b/frontend/src/container/TopNav/index.tsx @@ -19,6 +19,9 @@ const routesToSkip = [ ROUTES.ALL_DASHBOARD, ROUTES.ORG_SETTINGS, ROUTES.ERROR_DETAIL, + ROUTES.ALERTS_NEW, + ROUTES.EDIT_ALERTS, + ROUTES.LIST_ALL_ALERT, ]; function TopNav(): JSX.Element | null { diff --git a/frontend/src/container/Trace/Filters/index.tsx b/frontend/src/container/Trace/Filters/index.tsx index 276b69b227..95f73f4ed9 100644 --- a/frontend/src/container/Trace/Filters/index.tsx +++ b/frontend/src/container/Trace/Filters/index.tsx @@ -9,7 +9,9 @@ export const AllTraceFilterEnum: TraceFilterEnum[] = [ 'serviceName', 'operation', 'component', - 'httpCode', + 'rpcMethod', + 'responseStatusCode', + // 'httpCode', 'httpHost', 'httpMethod', 'httpRoute', diff --git a/frontend/src/container/Trace/TraceGraphFilter/config.ts b/frontend/src/container/Trace/TraceGraphFilter/config.ts index 882ccc2e96..357f22a3ee 100644 --- a/frontend/src/container/Trace/TraceGraphFilter/config.ts +++ b/frontend/src/container/Trace/TraceGraphFilter/config.ts @@ -38,6 +38,14 @@ export const groupBy: Dropdown[] = [ displayValue: 'HTTP status code', key: 'httpCode', }, + { + displayValue: 'RPC Method', + key: 'rpcMethod', + }, + { + displayValue: 'Status Code', + key: 'responseStatusCode', + }, { displayValue: 'Database name', key: 'dbName', diff --git a/frontend/src/container/TraceDetail/Missingtrace.tsx b/frontend/src/container/TraceDetail/Missingtrace.tsx new file mode 100644 index 0000000000..eb0620a4ed --- /dev/null +++ b/frontend/src/container/TraceDetail/Missingtrace.tsx @@ -0,0 +1,41 @@ +import { volcano } from '@ant-design/colors'; +import { InfoCircleOutlined } from '@ant-design/icons'; +import { Popover } from 'antd'; +import React from 'react'; + +function PopOverContent(): JSX.Element { + return ( +
+ More details on missing spans{' '} + + here + +
+ ); +} + +function MissingSpansMessage(): JSX.Element { + return ( + +
+ {' '} + This trace has missing spans +
+
+ ); +} + +export default MissingSpansMessage; diff --git a/frontend/src/container/TraceDetail/SelectedSpanDetails/EllipsedButton.tsx b/frontend/src/container/TraceDetail/SelectedSpanDetails/EllipsedButton.tsx new file mode 100644 index 0000000000..56ef64e4ee --- /dev/null +++ b/frontend/src/container/TraceDetail/SelectedSpanDetails/EllipsedButton.tsx @@ -0,0 +1,53 @@ +import { StyledButton } from 'components/Styled'; +import React from 'react'; + +import { styles } from './styles'; + +function EllipsedButton({ + onToggleHandler, + setText, + value, + event, + buttonText, +}: Props): JSX.Element { + const isFullValueButton = buttonText === 'View full value'; + + const style = [styles.removePadding]; + + if (!isFullValueButton) { + style.push(styles.removeMargin); + } else { + style.push(styles.selectedSpanDetailsContainer); + style.push(styles.buttonContainer); + } + + return ( + { + onToggleHandler(true); + setText({ + subText: value, + text: event, + }); + }} + type="link" + > + {buttonText} + + ); +} + +interface Props { + onToggleHandler: (isOpen: boolean) => void; + setText: (text: { subText: string; text: string }) => void; + value: string; + event: string; + buttonText?: string; +} + +EllipsedButton.defaultProps = { + buttonText: 'View full log event message', +}; + +export default EllipsedButton; diff --git a/frontend/src/container/TraceDetail/SelectedSpanDetails/ErrorTag.tsx b/frontend/src/container/TraceDetail/SelectedSpanDetails/ErrorTag.tsx index 2a663387a5..69b51b3cd8 100644 --- a/frontend/src/container/TraceDetail/SelectedSpanDetails/ErrorTag.tsx +++ b/frontend/src/container/TraceDetail/SelectedSpanDetails/ErrorTag.tsx @@ -1,29 +1,22 @@ -import { Collapse, Modal } from 'antd'; -import Editor from 'components/Editor'; -import { StyledButton } from 'components/Styled'; +import { Collapse } from 'antd'; import useThemeMode from 'hooks/useThemeMode'; import keys from 'lodash-es/keys'; import map from 'lodash-es/map'; -import React, { useState } from 'react'; +import React from 'react'; import { ITraceTree } from 'types/api/trace/getTraceItem'; -import { CustomSubText, CustomSubTitle, styles } from './styles'; +import EllipsedButton from './EllipsedButton'; +import { CustomSubText, CustomSubTitle } from './styles'; const { Panel } = Collapse; -function ErrorTag({ event }: ErrorTagProps): JSX.Element { - const [isOpen, setIsOpen] = useState(false); +function ErrorTag({ + event, + onToggleHandler, + setText, +}: ErrorTagProps): JSX.Element { const { isDarkMode } = useThemeMode(); - const [text, setText] = useState({ - text: '', - subText: '', - }); - - const onToggleHandler = (state: boolean): void => { - setIsOpen(state); - }; - return ( <> {map(event, ({ attributeMap, name }) => { @@ -45,23 +38,23 @@ function ErrorTag({ event }: ErrorTagProps): JSX.Element { return ( <> {event} - + {value}
{isEllipsed && ( - { - onToggleHandler(true); - setText({ - subText: value, - text: event, - }); + - View full log event message - + /> )}
@@ -71,31 +64,14 @@ function ErrorTag({ event }: ErrorTagProps): JSX.Element { ); })} - - onToggleHandler(false)} - title="Log Message" - visible={isOpen} - destroyOnClose - footer={[]} - width="70vw" - > - {text.text} - - {text.text === 'exception.stacktrace' ? ( - {}} readOnly value={text.subText} /> - ) : ( - - {text.subText} - - )} - ); } interface ErrorTagProps { event: ITraceTree['event']; + onToggleHandler: (isOpen: boolean) => void; + setText: (text: { subText: string; text: string }) => void; } export default ErrorTag; diff --git a/frontend/src/container/TraceDetail/SelectedSpanDetails/index.tsx b/frontend/src/container/TraceDetail/SelectedSpanDetails/index.tsx index 50f2aa9537..49596d14d0 100644 --- a/frontend/src/container/TraceDetail/SelectedSpanDetails/index.tsx +++ b/frontend/src/container/TraceDetail/SelectedSpanDetails/index.tsx @@ -1,9 +1,11 @@ -import { Tabs, Tooltip, Typography } from 'antd'; +import { Modal, Tabs, Tooltip, Typography } from 'antd'; +import Editor from 'components/Editor'; import { StyledSpace } from 'components/Styled'; import useThemeMode from 'hooks/useThemeMode'; -import React, { useMemo } from 'react'; +import React, { useMemo, useState } from 'react'; import { ITraceTree } from 'types/api/trace/getTraceItem'; +import EllipsedButton from './EllipsedButton'; import ErrorTag from './ErrorTag'; import { CardContainer, @@ -12,12 +14,14 @@ import { CustomText, CustomTitle, styles, + SubTextContainer, } from './styles'; const { TabPane } = Tabs; function SelectedSpanDetails(props: SelectedSpanDetailsProps): JSX.Element { const { tree } = props; + const { isDarkMode } = useThemeMode(); const OverLayComponentName = useMemo(() => tree?.name, [tree?.name]); @@ -25,6 +29,17 @@ function SelectedSpanDetails(props: SelectedSpanDetailsProps): JSX.Element { tree?.serviceName, ]); + const [isOpen, setIsOpen] = useState(false); + + const [text, setText] = useState({ + text: '', + subText: '', + }); + + const onToggleHandler = (state: boolean): void => { + setIsOpen(state); + }; + if (!tree) { return
; } @@ -51,18 +66,60 @@ function SelectedSpanDetails(props: SelectedSpanDetailsProps): JSX.Element { + onToggleHandler(false)} + title={text.text} + visible={isOpen} + destroyOnClose + footer={[]} + width="70vw" + centered + > + {text.text === 'exception.stacktrace' ? ( + {}} readOnly value={text.subText} /> + ) : ( + + {text.subText} + + )} + + {tags.length !== 0 ? ( tags.map((tags) => { + const value = tags.key === 'error' ? 'true' : tags.value; + const isEllipsed = value.length > 24; + return ( {tags.value && ( <> {tags.key} - - {tags.key === 'error' ? 'true' : tags.value} - + + value}> + + {value} + + + {isEllipsed && ( + + )} + + )} @@ -74,7 +131,11 @@ function SelectedSpanDetails(props: SelectedSpanDetailsProps): JSX.Element { {tree.event && Object.keys(tree.event).length !== 0 ? ( - + ) : ( No events data in selected span )} diff --git a/frontend/src/container/TraceDetail/SelectedSpanDetails/styles.ts b/frontend/src/container/TraceDetail/SelectedSpanDetails/styles.ts index d8bae86ba7..3c9180dc94 100644 --- a/frontend/src/container/TraceDetail/SelectedSpanDetails/styles.ts +++ b/frontend/src/container/TraceDetail/SelectedSpanDetails/styles.ts @@ -18,7 +18,8 @@ export const CustomText = styled(Paragraph)` export const CustomSubTitle = styled(Title)` &&& { font-size: 14px; - margin-bottom: 8px; + margin-bottom: 0.1rem; + margin-top: 0.5rem; } `; @@ -26,13 +27,19 @@ interface CustomSubTextProps { isDarkMode: boolean; } +export const SubTextContainer = styled.div` + &&& { + background: ${({ isDarkMode }): string => (isDarkMode ? '#444' : '#ddd')}; + } +`; + export const CustomSubText = styled(Paragraph)` &&& { background: ${({ isDarkMode }): string => (isDarkMode ? '#444' : '#ddd')}; font-size: 12px; - padding: 6px 8px; + padding: 4px 8px; word-break: break-all; - margin-bottom: 16px; + margin-bottom: 0rem; } `; @@ -81,10 +88,15 @@ const overflow = css` } `; +const buttonContainer = css` + height: 1.5rem; +`; + export const styles = { removeMargin, removePadding, selectedSpanDetailsContainer, spanEventsTabsContainer, overflow, + buttonContainer, }; diff --git a/frontend/src/container/TraceDetail/index.tsx b/frontend/src/container/TraceDetail/index.tsx index c705123b50..816f59ec2f 100644 --- a/frontend/src/container/TraceDetail/index.tsx +++ b/frontend/src/container/TraceDetail/index.tsx @@ -17,15 +17,23 @@ import dayjs from 'dayjs'; import useUrlQuery from 'hooks/useUrlQuery'; import { spanServiceNameToColorMapping } from 'lib/getRandomColor'; import history from 'lib/history'; +import { map } from 'lodash-es'; import { SPAN_DETAILS_LEFT_COL_WIDTH } from 'pages/TraceDetail/constants'; import React, { useEffect, useMemo, useState } from 'react'; -import { ITraceTree, PayloadProps } from 'types/api/trace/getTraceItem'; +import { ITraceForest, PayloadProps } from 'types/api/trace/getTraceItem'; import { getSpanTreeMetadata } from 'utils/getSpanTreeMetadata'; import { spanToTreeUtil } from 'utils/spanToTree'; +import MissingSpansMessage from './Missingtrace'; import SelectedSpanDetails from './SelectedSpanDetails'; import * as styles from './styles'; -import { getSortedData, IIntervalUnit, INTERVAL_UNITS } from './utils'; +import { FlameGraphMissingSpansContainer, GanttChartWrapper } from './styles'; +import { + getSortedData, + getTreeLevelsCount, + IIntervalUnit, + INTERVAL_UNITS, +} from './utils'; function TraceDetail({ response }: TraceDetailProps): JSX.Element { const spanServiceColors = useMemo( @@ -43,17 +51,23 @@ function TraceDetail({ response }: TraceDetailProps): JSX.Element { const [activeHoverId, setActiveHoverId] = useState(''); const [activeSelectedId, setActiveSelectedId] = useState(spanId || ''); - const [treeData, setTreeData] = useState( + const [treesData, setTreesData] = useState( spanToTreeUtil(response[0].events), ); - const { treeData: tree, ...traceMetaData } = useMemo(() => { - const tree = getSortedData(treeData); + const { treesData: tree, ...traceMetaData } = useMemo(() => { + const sortedTreesData: ITraceForest = { + spanTree: map(treesData.spanTree, (tree) => getSortedData(tree)), + missingSpanTree: map( + treesData.missingSpanTree, + (tree) => getSortedData(tree) || [], + ), + }; // Note: Handle undefined /*eslint-disable */ - return getSpanTreeMetadata(tree as ITraceTree, spanServiceColors); + return getSpanTreeMetadata(sortedTreesData, spanServiceColors); /* eslint-enable */ - }, [treeData, spanServiceColors]); + }, [treesData, spanServiceColors]); const [globalTraceMetadata] = useState({ ...traceMetaData, @@ -69,24 +83,34 @@ function TraceDetail({ response }: TraceDetailProps): JSX.Element { }, [activeSelectedId]); const getSelectedNode = useMemo(() => { - return getNodeById(activeSelectedId, treeData); - }, [activeSelectedId, treeData]); + return getNodeById(activeSelectedId, treesData); + }, [activeSelectedId, treesData]); // const onSearchHandler = (value: string) => { // setSearchSpanString(value); // setTreeData(spanToTreeUtil(response[0].events)); // }; + const onFocusSelectedSpanHandler = (): void => { const treeNode = getNodeById(activeSelectedId, tree); + if (treeNode) { - setTreeData(treeNode); + setTreesData(treeNode); } }; const onResetHandler = (): void => { - setTreeData(spanToTreeUtil(response[0].events)); + setTreesData(spanToTreeUtil(response[0].events)); }; + const hasMissingSpans = useMemo( + (): boolean => + tree.missingSpanTree && + Array.isArray(tree.missingSpanTree) && + tree.missingSpanTree.length > 0, + [tree], + ); + return ( @@ -101,16 +125,45 @@ function TraceDetail({ response }: TraceDetailProps): JSX.Element { {traceMetaData.totalSpans} Span + {hasMissingSpans && }
- + {map(tree.spanTree, (tree) => { + return ( + + ); + })} + + {hasMissingSpans && ( + + {map(tree.missingSpanTree, (tree) => { + return ( + + ); + })} + + )} @@ -122,7 +175,9 @@ function TraceDetail({ response }: TraceDetailProps): JSX.Element { justifyContent: 'center', }} > - {tree && dayjs(tree.startTime).format('hh:mm:ss a MM/DD')} + {tree && + traceMetaData.globalStart && + dayjs(traceMetaData.globalStart).format('hh:mm:ss a MM/DD')} - - {/* */} - + - + tree)} + /> ); diff --git a/frontend/src/container/TraceDetail/styles.ts b/frontend/src/container/TraceDetail/styles.ts index 2000ac17ae..5f7a8f84c2 100644 --- a/frontend/src/container/TraceDetail/styles.ts +++ b/frontend/src/container/TraceDetail/styles.ts @@ -1,4 +1,5 @@ -import { css } from 'styled-components'; +import { volcano } from '@ant-design/colors'; +import styled, { css } from 'styled-components'; /** * Styles for the left container. Containers flamegraph, timeline and gantt chart @@ -76,3 +77,38 @@ export const floatRight = css` export const removeMargin = css` margin: 0; `; + +export const GanttChartWrapper = styled.ul` + padding-left: 0; + position: absolute; + width: 100%; + height: 100%; + + ul { + list-style: none; + border-left: 1px solid #434343; + padding-left: 1rem; + width: 100%; + } + + ul li { + position: relative; + + &:before { + position: absolute; + left: -1rem; + top: 10px; + content: ''; + height: 1px; + width: 1rem; + background-color: #434343; + } + } +`; + +export const FlameGraphMissingSpansContainer = styled.div` + border: 1px dashed ${volcano[6]}; + padding: 0.5rem 0; + margin-top: 1rem; + border-radius: 0.25rem; +`; diff --git a/frontend/src/container/TraceDetail/utils.ts b/frontend/src/container/TraceDetail/utils.ts index 3c83607ce6..2541cf9bf0 100644 --- a/frontend/src/container/TraceDetail/utils.ts +++ b/frontend/src/container/TraceDetail/utils.ts @@ -62,7 +62,7 @@ export const convertTimeToRelevantUnit = ( return relevantTime; }; -export const getSortedData = (treeData: ITraceTree): undefined | ITraceTree => { +export const getSortedData = (treeData: ITraceTree): ITraceTree => { const traverse = (treeNode: ITraceTree, level = 0): void => { if (!treeNode) { return; @@ -80,3 +80,21 @@ export const getSortedData = (treeData: ITraceTree): undefined | ITraceTree => { return treeData; }; + +export const getTreeLevelsCount = (tree: ITraceTree): number => { + let levels = 0; + const traverse = (treeNode: ITraceTree, level: number): void => { + if (!treeNode) { + return; + } + + levels = Math.max(level, levels); + + treeNode.children.forEach((childNode) => { + traverse(childNode, level + 1); + }); + }; + traverse(tree, levels); + + return levels; +}; diff --git a/frontend/src/container/TraceFlameGraph/__tests__/TraceFlameGraph.test.tsx b/frontend/src/container/TraceFlameGraph/__tests__/TraceFlameGraph.test.tsx index 3a0954c8fa..fef6314c5a 100644 --- a/frontend/src/container/TraceFlameGraph/__tests__/TraceFlameGraph.test.tsx +++ b/frontend/src/container/TraceFlameGraph/__tests__/TraceFlameGraph.test.tsx @@ -28,6 +28,7 @@ test('loads and displays greeting', () => { spread: 0, totalSpans: 0, }, + missingSpanTree: false, treeData: { children: [], id: '', diff --git a/frontend/src/container/TraceFlameGraph/index.tsx b/frontend/src/container/TraceFlameGraph/index.tsx index 026efe58ca..8cadb3c972 100644 --- a/frontend/src/container/TraceFlameGraph/index.tsx +++ b/frontend/src/container/TraceFlameGraph/index.tsx @@ -93,8 +93,9 @@ function TraceFlameGraph(props: { onSpanSelect: SpanItemProps['onSpanSelect']; hoveredSpanId: string; selectedSpanId: string; + missingSpanTree: boolean; }): JSX.Element { - const { treeData, traceMetaData, onSpanHover } = props; + const { treeData, traceMetaData, onSpanHover, missingSpanTree } = props; if (!treeData || treeData.id === 'empty' || !traceMetaData) { return
; @@ -140,6 +141,7 @@ function TraceFlameGraph(props: { hoveredSpanId={hoveredSpanId} selectedSpanId={selectedSpanId} /> + {spanData.children.map((childData) => ( diff --git a/frontend/src/lib/createQueryParams.ts b/frontend/src/lib/createQueryParams.ts index 0f1b3f7ad7..5630098cb2 100644 --- a/frontend/src/lib/createQueryParams.ts +++ b/frontend/src/lib/createQueryParams.ts @@ -1,6 +1,6 @@ -const createQueryParams = (params: { [x: string]: string }): string => +const createQueryParams = (params: { [x: string]: string | number }): string => Object.keys(params) - .map((k) => `${k}=${encodeURI(params[k])}`) + .map((k) => `${k}=${encodeURI(String(params[k]))}`) .join('&'); export default createQueryParams; diff --git a/frontend/src/lib/getMinMax.ts b/frontend/src/lib/getMinMax.ts index 9c1fab94c3..ae830cc06a 100644 --- a/frontend/src/lib/getMinMax.ts +++ b/frontend/src/lib/getMinMax.ts @@ -13,6 +13,9 @@ const GetMinMax = ( if (interval === '1min') { const minTimeAgo = getMinAgo({ minutes: 1 }).getTime(); minTime = minTimeAgo; + } else if (interval === '10min') { + const minTimeAgo = getMinAgo({ minutes: 10 }).getTime(); + minTime = minTimeAgo; } else if (interval === '15min') { const minTimeAgo = getMinAgo({ minutes: 15 }).getTime(); minTime = minTimeAgo; @@ -33,8 +36,9 @@ const GetMinMax = ( // one week = one day * 7 const minTimeAgo = getMinAgo({ minutes: 26 * 60 * 7 }).getTime(); minTime = minTimeAgo; - } else if (interval === '6hr') { - const minTimeAgo = getMinAgo({ minutes: 6 * 60 }).getTime(); + } else if (['4hr', '6hr'].includes(interval)) { + const h = parseInt(interval.replace('hr', ''), 10); + const minTimeAgo = getMinAgo({ minutes: h * 60 }).getTime(); minTime = minTimeAgo; } else if (interval === 'custom') { maxTime = (dateTimeRange || [])[1] || 0; diff --git a/frontend/src/pages/CreateAlert/index.tsx b/frontend/src/pages/CreateAlert/index.tsx index edfe543b1f..3bab0c1ee7 100644 --- a/frontend/src/pages/CreateAlert/index.tsx +++ b/frontend/src/pages/CreateAlert/index.tsx @@ -1,109 +1,9 @@ -import { SaveOutlined } from '@ant-design/icons'; -import { Button, notification } from 'antd'; -import createAlertsApi from 'api/alerts/create'; -import Editor from 'components/Editor'; -import ROUTES from 'constants/routes'; -import { State } from 'hooks/useFetch'; -import history from 'lib/history'; -import React, { useCallback, useState } from 'react'; -import { PayloadProps as CreateAlertPayloadProps } from 'types/api/alerts/create'; +import CreateAlertRule from 'container/CreateAlertRule'; +import React from 'react'; +import { alertDefaults } from 'types/api/alerts/create'; -import { ButtonContainer, Title } from './styles'; - -function CreateAlert(): JSX.Element { - const [value, setEditorValue] = useState( - `\n alert: High RPS\n expr: sum(rate(signoz_latency_count{span_kind="SPAN_KIND_SERVER"}[2m])) by (service_name) > 100\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: High RPS of Applications\n description: "RPS is > 100\n\t\t\t VALUE = {{ $value }}\n\t\t\t LABELS = {{ $labels }}"\n `, - ); - - const [newAlertState, setNewAlertState] = useState< - State - >({ - error: false, - errorMessage: '', - loading: false, - payload: undefined, - success: false, - }); - const [notifications, Element] = notification.useNotification(); - - const defaultError = - 'Oops! Some issue occured in saving the alert please try again or contact support@signoz.io'; - - const onSaveHandler = useCallback(async () => { - try { - setNewAlertState((state) => ({ - ...state, - loading: true, - })); - - if (value.length === 0) { - setNewAlertState((state) => ({ - ...state, - loading: false, - })); - notifications.error({ - description: `Oops! We didn't catch that. Please make sure the alert settings are not empty or try again`, - message: 'Error', - }); - return; - } - - const response = await createAlertsApi({ - query: value, - }); - - if (response.statusCode === 200) { - setNewAlertState((state) => ({ - ...state, - loading: false, - payload: response.payload, - })); - notifications.success({ - message: 'Success', - description: 'Congrats. The alert was saved correctly.', - }); - - setTimeout(() => { - history.push(ROUTES.LIST_ALL_ALERT); - }, 3000); - } else { - notifications.error({ - description: response.error || defaultError, - message: 'Error', - }); - setNewAlertState((state) => ({ - ...state, - loading: false, - error: true, - errorMessage: response.error || defaultError, - })); - } - } catch (error) { - notifications.error({ - message: defaultError, - }); - } - }, [notifications, value]); - - return ( - <> - {Element} - - Create New Alert - setEditorValue(value)} value={value} /> - - - - - - ); +function CreateAlertPage(): JSX.Element { + return ; } -export default CreateAlert; +export default CreateAlertPage; diff --git a/frontend/src/pages/EditRules/index.tsx b/frontend/src/pages/EditRules/index.tsx index 09cda600ab..0217e40efc 100644 --- a/frontend/src/pages/EditRules/index.tsx +++ b/frontend/src/pages/EditRules/index.tsx @@ -47,7 +47,12 @@ function EditRules(): JSX.Element { return ; } - return ; + return ( + + ); } export default EditRules; diff --git a/frontend/src/pages/ErrorDetails/index.tsx b/frontend/src/pages/ErrorDetails/index.tsx index 5f9e6c8e9f..348391b741 100644 --- a/frontend/src/pages/ErrorDetails/index.tsx +++ b/frontend/src/pages/ErrorDetails/index.tsx @@ -4,107 +4,87 @@ import getById from 'api/errors/getById'; import Spinner from 'components/Spinner'; import ROUTES from 'constants/routes'; import ErrorDetailsContainer from 'container/ErrorDetails'; -import React from 'react'; +import React, { useMemo } from 'react'; import { useTranslation } from 'react-i18next'; import { useQuery } from 'react-query'; import { useSelector } from 'react-redux'; import { Redirect, useLocation } from 'react-router-dom'; import { AppState } from 'store/reducers'; -import { PayloadProps } from 'types/api/errors/getById'; import { GlobalReducer } from 'types/reducer/globalTime'; +import { urlKey } from './utils'; + +// eslint-disable-next-line sonarjs/cognitive-complexity function ErrorDetails(): JSX.Element { const { t } = useTranslation(['common']); const { maxTime, minTime } = useSelector( (state) => state.globalTime, ); const { search } = useLocation(); - const params = new URLSearchParams(search); + const params = useMemo(() => new URLSearchParams(search), [search]); + + const groupId = params.get(urlKey.groupId); + const errorId = params.get(urlKey.errorId); + const timestamp = params.get(urlKey.timestamp); - const errorId = params.get('errorId'); - const errorType = params.get('errorType'); - const serviceName = params.get('serviceName'); const defaultError = t('something_went_wrong'); - const { data, status } = useQuery( - [ - 'errorByType', - errorType, - 'serviceName', - serviceName, - maxTime, - minTime, - errorId, - ], - { - queryFn: () => - getByErrorType({ - end: maxTime, - errorType: errorType || '', - serviceName: serviceName || '', - start: minTime, - }), - enabled: errorId === null && errorType !== null && serviceName !== null, - cacheTime: 5000, - }, - ); - - const { status: ErrorIdStatus, data: errorIdPayload } = useQuery( - [ - 'errorByType', - errorType, - 'serviceName', - serviceName, - maxTime, - minTime, - 'errorId', - errorId, - ], + const { data: IdData, status: IdStatus } = useQuery( + [errorId, timestamp, groupId], { queryFn: () => getById({ - end: maxTime, - errorId: errorId || data?.payload?.errorId || '', - start: minTime, + errorID: errorId || '', + groupID: groupId || '', + timestamp: timestamp || '', }), enabled: - (errorId !== null || status === 'success') && - errorType !== null && - serviceName !== null, - cacheTime: 5000, + errorId !== null && + groupId !== null && + timestamp !== null && + errorId.length !== 0 && + groupId.length !== 0 && + timestamp.length !== 0, }, ); + const { data, status } = useQuery([maxTime, minTime, groupId], { + queryFn: () => + getByErrorType({ + groupID: groupId || '', + timestamp: timestamp || '', + }), + enabled: !!groupId && IdStatus !== 'success', + }); + // if errorType and serviceName is null redirecting to the ALL_ERROR page not now - if (errorType === null || serviceName === null) { + if (groupId === null || timestamp === null) { return ; } // when the api is in loading state - if (status === 'loading' || ErrorIdStatus === 'loading') { + if (status === 'loading' || IdStatus === 'loading') { return ; } // if any error occurred while loading - if (status === 'error' || ErrorIdStatus === 'error') { - return ( - - {data?.error || errorIdPayload?.error || defaultError} - - ); + if (status === 'error' || IdStatus === 'error') { + return {data?.error || defaultError}; } + const idPayload = data?.payload || IdData?.payload; + // if API is successfully but there is an error if ( (status === 'success' && data?.statusCode >= 400) || - (ErrorIdStatus === 'success' && errorIdPayload.statusCode >= 400) + (IdStatus === 'success' && IdData.statusCode >= 400) || + idPayload === null || + idPayload === undefined ) { return {data?.error || defaultError}; } - return ( - - ); + return ; } export interface ErrorDetailsParams { diff --git a/frontend/src/pages/ErrorDetails/utils.ts b/frontend/src/pages/ErrorDetails/utils.ts new file mode 100644 index 0000000000..e26db2290f --- /dev/null +++ b/frontend/src/pages/ErrorDetails/utils.ts @@ -0,0 +1,8 @@ +export const urlKey = { + serviceName: 'serviceName', + exceptionType: 'exceptionType', + groupId: 'groupId', + lastSeen: 'lastSeen', + errorId: 'errorId', + timestamp: 'timestamp', +}; diff --git a/frontend/src/pages/SignUp/SignUp.tsx b/frontend/src/pages/SignUp/SignUp.tsx index d184e74a4e..9465d870c4 100644 --- a/frontend/src/pages/SignUp/SignUp.tsx +++ b/frontend/src/pages/SignUp/SignUp.tsx @@ -262,12 +262,13 @@ function SignUp({ version }: SignUpProps): JSX.Element { setState(updateValue, setConfirmPassword); }} required - id="UpdatePassword" + id="confirmPassword" /> {confirmPasswordError && ( , + Omit { + aggregateOperator: EAggregateOperator | undefined; + disabled: boolean; + name: string; + legend?: string; + metricName: string | null; + groupBy?: string[]; + expression?: string; + tagFilters?: IQueryBuilderTagFilters; + toggleDisable?: boolean; + toggleDelete?: boolean; +} + +export interface IFormulaQueries { + [key: string]: IFormulaQuery; +} + +export interface IFormulaQuery extends IMetricsBuilderFormula { + formulaOnly: boolean; + queryName: string; +} + +export interface IMetricQueries { + [key: string]: IMetricQuery; +} + +export interface IMetricQuery extends IMetricsBuilderQuery { + formulaOnly: boolean; + expression?: string; + queryName: string; +} diff --git a/frontend/src/types/api/alerts/create.ts b/frontend/src/types/api/alerts/create.ts index 6a2e5c09ab..6f179af79a 100644 --- a/frontend/src/types/api/alerts/create.ts +++ b/frontend/src/types/api/alerts/create.ts @@ -1,8 +1,48 @@ +import { AlertDef } from 'types/api/alerts/def'; + +import { defaultCompareOp, defaultEvalWindow, defaultMatchType } from './def'; + export interface Props { - query: string; + data: AlertDef; } export interface PayloadProps { status: string; data: string; } + +export const alertDefaults: AlertDef = { + condition: { + compositeMetricQuery: { + builderQueries: { + A: { + queryName: 'A', + name: 'A', + formulaOnly: false, + metricName: '', + tagFilters: { + op: 'AND', + items: [], + }, + groupBy: [], + aggregateOperator: 1, + expression: 'A', + disabled: false, + toggleDisable: false, + toggleDelete: false, + }, + }, + promQueries: {}, + queryType: 1, + }, + op: defaultCompareOp, + matchType: defaultMatchType, + }, + labels: { + severity: 'warning', + }, + annotations: { + description: 'A new alert', + }, + evalWindow: defaultEvalWindow, +}; diff --git a/frontend/src/types/api/alerts/def.ts b/frontend/src/types/api/alerts/def.ts new file mode 100644 index 0000000000..060bdc4d73 --- /dev/null +++ b/frontend/src/types/api/alerts/def.ts @@ -0,0 +1,32 @@ +import { ICompositeMetricQuery } from 'types/api/alerts/compositeQuery'; + +// default match type for threshold +export const defaultMatchType = '1'; + +// default eval window +export const defaultEvalWindow = '5m0s'; + +// default compare op: above +export const defaultCompareOp = '1'; + +export interface AlertDef { + id?: number; + alert?: string; + ruleType?: string; + condition: RuleCondition; + labels?: Labels; + annotations?: Labels; + evalWindow?: string; + source?: string; +} + +export interface RuleCondition { + compositeMetricQuery: ICompositeMetricQuery; + op?: string | undefined; + target?: number | undefined; + matchType?: string | undefined; +} + +export interface Labels { + [key: string]: string; +} diff --git a/frontend/src/types/api/alerts/get.ts b/frontend/src/types/api/alerts/get.ts index 52e9a78e7b..69eef474e1 100644 --- a/frontend/src/types/api/alerts/get.ts +++ b/frontend/src/types/api/alerts/get.ts @@ -1,9 +1,9 @@ -import { Alerts } from './getAll'; +import { AlertDef } from './def'; export interface Props { - id: Alerts['id']; + id: AlertDef['id']; } export type PayloadProps = { - data: string; + data: AlertDef; }; diff --git a/frontend/src/types/api/alerts/put.ts b/frontend/src/types/api/alerts/put.ts deleted file mode 100644 index e70de0b630..0000000000 --- a/frontend/src/types/api/alerts/put.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { PayloadProps as DeletePayloadProps } from './delete'; -import { Alerts } from './getAll'; - -export type PayloadProps = DeletePayloadProps; - -export interface Props { - id: Alerts['id']; - data: DeletePayloadProps['data']; -} diff --git a/frontend/src/types/api/alerts/queryType.ts b/frontend/src/types/api/alerts/queryType.ts new file mode 100644 index 0000000000..277d6f0703 --- /dev/null +++ b/frontend/src/types/api/alerts/queryType.ts @@ -0,0 +1,17 @@ +export type QueryType = 1 | 2 | 3; + +export const QUERY_BUILDER: QueryType = 1; +export const PROMQL: QueryType = 3; + +export const resolveQueryCategoryName = (s: number): string => { + switch (s) { + case 1: + return 'Query Builder'; + case 2: + return 'Clickhouse Query'; + case 3: + return 'PromQL'; + default: + return ''; + } +}; diff --git a/frontend/src/types/api/alerts/save.ts b/frontend/src/types/api/alerts/save.ts new file mode 100644 index 0000000000..a815c728d2 --- /dev/null +++ b/frontend/src/types/api/alerts/save.ts @@ -0,0 +1,11 @@ +import { AlertDef } from './def'; + +export type PayloadProps = { + status: string; + data: string; +}; + +export interface Props { + id?: number; + data: AlertDef; +} diff --git a/frontend/src/types/api/errors/getAll.ts b/frontend/src/types/api/errors/getAll.ts index 98c3122f7d..d0bbd7995a 100644 --- a/frontend/src/types/api/errors/getAll.ts +++ b/frontend/src/types/api/errors/getAll.ts @@ -1,8 +1,20 @@ import { GlobalTime } from 'types/actions/globalTime'; +export type Order = 'ascending' | 'descending'; +export type OrderBy = + | 'serviceName' + | 'exceptionCount' + | 'lastSeen' + | 'firstSeen' + | 'exceptionType'; + export interface Props { start: GlobalTime['minTime']; end: GlobalTime['maxTime']; + order?: Order; + orderParam?: OrderBy; + limit?: number; + offset?: number; } export interface Exception { @@ -12,6 +24,7 @@ export interface Exception { lastSeen: string; firstSeen: string; serviceName: string; + groupID: string; } export type PayloadProps = Exception[]; diff --git a/frontend/src/types/api/errors/getByErrorId.ts b/frontend/src/types/api/errors/getByErrorId.ts new file mode 100644 index 0000000000..cceef67ded --- /dev/null +++ b/frontend/src/types/api/errors/getByErrorId.ts @@ -0,0 +1,9 @@ +import { PayloadProps as Prop } from './getByErrorTypeAndService'; + +export interface Props { + groupID: string; + errorID: string; + timestamp: string; +} + +export type PayloadProps = Prop; diff --git a/frontend/src/types/api/errors/getByErrorTypeAndService.ts b/frontend/src/types/api/errors/getByErrorTypeAndService.ts index 4f987874b7..dc15c786ee 100644 --- a/frontend/src/types/api/errors/getByErrorTypeAndService.ts +++ b/frontend/src/types/api/errors/getByErrorTypeAndService.ts @@ -1,10 +1,6 @@ -import { GlobalTime } from 'types/actions/globalTime'; - export interface Props { - start: GlobalTime['minTime']; - end: GlobalTime['maxTime']; - serviceName: string; - errorType: string; + timestamp: string; + groupID: string; } export interface PayloadProps { @@ -16,7 +12,6 @@ export interface PayloadProps { timestamp: string; spanID: string; traceID: string; - serviceName: Props['serviceName']; - newerErrorId: string; - olderErrorId: string; + serviceName: string; + groupID: string; } diff --git a/frontend/src/types/api/errors/getById.ts b/frontend/src/types/api/errors/getErrorCounts.ts similarity index 53% rename from frontend/src/types/api/errors/getById.ts rename to frontend/src/types/api/errors/getErrorCounts.ts index c812410b89..ab690bd0c6 100644 --- a/frontend/src/types/api/errors/getById.ts +++ b/frontend/src/types/api/errors/getErrorCounts.ts @@ -1,11 +1,8 @@ import { GlobalTime } from 'types/actions/globalTime'; -import { PayloadProps as Payload } from './getByErrorTypeAndService'; - -export type PayloadProps = Payload; - export type Props = { start: GlobalTime['minTime']; end: GlobalTime['minTime']; - errorId: string; }; + +export type PayloadProps = number; diff --git a/frontend/src/types/api/errors/getNextPrevId.ts b/frontend/src/types/api/errors/getNextPrevId.ts new file mode 100644 index 0000000000..99a336fdd8 --- /dev/null +++ b/frontend/src/types/api/errors/getNextPrevId.ts @@ -0,0 +1,13 @@ +export type Props = { + errorID: string; + timestamp: string; + groupID: string; +}; + +export type PayloadProps = { + prevErrorID: string; + nextErrorID: string; + groupID: string; + nextTimestamp: string; + prevTimestamp: string; +}; diff --git a/frontend/src/types/api/trace/getTraceItem.ts b/frontend/src/types/api/trace/getTraceItem.ts index fc05448650..4b12d15b2f 100644 --- a/frontend/src/types/api/trace/getTraceItem.ts +++ b/frontend/src/types/api/trace/getTraceItem.ts @@ -18,10 +18,11 @@ export type Span = [ string, string, string, - string | string[], - string | string[], - string | string[], - ITraceTree[], + string[], + string[], + string[], + string[], + boolean, ]; export interface ITraceTree { @@ -37,6 +38,10 @@ export interface ITraceTree { serviceColour: string; hasError?: boolean; event?: ITraceEvents[]; + isMissing?: boolean; + // For internal use + isProcessed?: boolean; + references?: Record[]; } export interface ITraceTag { @@ -48,3 +53,8 @@ interface ITraceEvents { attributeMap: { event: string; [key: string]: string }; name?: string; } + +export interface ITraceForest { + spanTree: ITraceTree[]; + missingSpanTree: ITraceTree[]; +} diff --git a/frontend/src/types/reducer/trace.ts b/frontend/src/types/reducer/trace.ts index fc1c08f4fc..fed82dd0be 100644 --- a/frontend/src/types/reducer/trace.ts +++ b/frontend/src/types/reducer/trace.ts @@ -69,7 +69,9 @@ export type TraceFilterEnum = | 'httpUrl' | 'operation' | 'serviceName' - | 'status'; + | 'status' + | 'responseStatusCode' + | 'rpcMethod'; export const AllPanelHeading: { key: TraceFilterEnum; @@ -107,6 +109,14 @@ export const AllPanelHeading: { key: 'operation', displayValue: 'Operation', }, + { + key: 'responseStatusCode', + displayValue: 'Status Code', + }, + { + key: 'rpcMethod', + displayValue: 'RPC Method', + }, { key: 'serviceName', displayValue: 'Service Name', diff --git a/frontend/src/utils/__tests__/__snapshots__/spanToTree.test.ts.snap b/frontend/src/utils/__tests__/__snapshots__/spanToTree.test.ts.snap new file mode 100644 index 0000000000..2c2ab402e2 --- /dev/null +++ b/frontend/src/utils/__tests__/__snapshots__/spanToTree.test.ts.snap @@ -0,0 +1,211 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`utils/spanToTree should return a single tree on valid trace data 1`] = ` +Object { + "missingSpanTree": Array [], + "spanTree": Array [ + Object { + "children": Array [ + Object { + "children": Array [ + Object { + "children": Array [], + "event": Array [ + Object { + "attributeMap": Object { + "event": "HTTP request received S3", + "level": "info", + "method": "GET", + "url": "/dispatch?customer=392&nonse=0.015296363321630757", + }, + "timeUnixNano": 1657275433246142000, + }, + ], + "hasError": false, + "id": "span_3", + "isProcessed": true, + "name": "HTTP GET SPAN 3", + "references": Array [ + Object { + "RefType": "CHILD_OF", + "SpanId": "span_2", + "TraceId": "0000000000000000span_1", + }, + ], + "serviceColour": "", + "serviceName": "frontend", + "startTime": 1657275433246, + "tags": Array [ + Object { + "key": "host.name.span3", + "value": "span_3", + }, + ], + "time": 683273000, + "value": 683273000, + }, + ], + "event": Array [ + Object { + "attributeMap": Object { + "event": "HTTP request received S2", + "level": "info", + "method": "GET", + "url": "/dispatch?customer=392&nonse=0.015296363321630757", + }, + "timeUnixNano": 1657275433246142000, + }, + ], + "hasError": false, + "id": "span_2", + "isProcessed": true, + "name": "HTTP GET SPAN 2", + "references": Array [ + Object { + "RefType": "CHILD_OF", + "SpanId": "span_1", + "TraceId": "0000000000000000span_1", + }, + ], + "serviceColour": "", + "serviceName": "frontend", + "startTime": 1657275433246, + "tags": Array [ + Object { + "key": "host.name.span2", + "value": "span_2", + }, + ], + "time": 683273000, + "value": 683273000, + }, + ], + "event": Array [ + Object { + "attributeMap": Object { + "event": "HTTP request received S1", + "level": "info", + "method": "GET", + "url": "/dispatch?customer=392&nonse=0.015296363321630757", + }, + "timeUnixNano": 1657275433246142000, + }, + ], + "hasError": false, + "id": "span_1", + "name": "HTTP GET SPAN 1", + "references": Array [ + Object { + "RefType": "CHILD_OF", + "SpanId": "", + "TraceId": "0000000000000000span_1", + }, + ], + "serviceColour": "", + "serviceName": "frontend", + "startTime": 1657275433246, + "tags": Array [ + Object { + "key": "host.name.span1", + "value": "span_1", + }, + ], + "time": 683273000, + "value": 683273000, + }, + ], +} +`; + +exports[`utils/spanToTree should return a single tree on valid trace data 2`] = ` +Object { + "missingSpanTree": Array [ + Object { + "children": Array [ + Object { + "children": Array [], + "event": Array [ + Object { + "attributeMap": Object { + "event": "HTTP request received S3", + "level": "info", + "method": "GET", + "url": "/dispatch?customer=392&nonse=0.015296363321630757", + }, + "timeUnixNano": 1657275433246142000, + }, + ], + "hasError": false, + "id": "span_3", + "isProcessed": true, + "name": "HTTP GET SPAN 3", + "references": Array [ + Object { + "RefType": "CHILD_OF", + "SpanId": "span_2", + "TraceId": "0000000000000000span_1", + }, + ], + "serviceColour": "", + "serviceName": "frontend", + "startTime": 1657275433246, + "tags": Array [ + Object { + "key": "host.name.span3", + "value": "span_3", + }, + ], + "time": 683273000, + "value": 683273000, + }, + ], + "id": "span_2", + "isMissing": true, + "name": "Missing Span (span_2)", + "serviceColour": "", + "serviceName": "", + "startTime": null, + "tags": Array [], + "time": null, + "value": null, + }, + ], + "spanTree": Array [ + Object { + "children": Array [], + "event": Array [ + Object { + "attributeMap": Object { + "event": "HTTP request received S1", + "level": "info", + "method": "GET", + "url": "/dispatch?customer=392&nonse=0.015296363321630757", + }, + "timeUnixNano": 1657275433246142000, + }, + ], + "hasError": false, + "id": "span_1", + "name": "HTTP GET SPAN 1", + "references": Array [ + Object { + "RefType": "CHILD_OF", + "SpanId": "", + "TraceId": "0000000000000000span_1", + }, + ], + "serviceColour": "", + "serviceName": "frontend", + "startTime": 1657275433246, + "tags": Array [ + Object { + "key": "host.name.span1", + "value": "span_1", + }, + ], + "time": 683273000, + "value": 683273000, + }, + ], +} +`; diff --git a/frontend/src/utils/__tests__/spanToTree.test.ts b/frontend/src/utils/__tests__/spanToTree.test.ts new file mode 100644 index 0000000000..4cf7a20fb4 --- /dev/null +++ b/frontend/src/utils/__tests__/spanToTree.test.ts @@ -0,0 +1,21 @@ +import { TraceData } from '../fixtures/TraceData'; +import { spanToTreeUtil } from '../spanToTree'; + +describe('utils/spanToTree', () => { + test('should return a single tree on valid trace data', () => { + const spanTree = spanToTreeUtil(TraceData); + expect(spanTree.spanTree.length).toBe(1); + expect(spanTree.missingSpanTree.length).toBe(0); + expect(spanTree).toMatchSnapshot(); + }); + test('should return a single tree on valid trace data', () => { + const MissingTraceData = [...TraceData]; + MissingTraceData.splice(1, 1); + + const spanTree = spanToTreeUtil(MissingTraceData); + + expect(spanTree.spanTree.length).toBe(1); + expect(spanTree.missingSpanTree.length).toBe(1); + expect(spanTree).toMatchSnapshot(); + }); +}); diff --git a/frontend/src/utils/fixtures/TraceData.ts b/frontend/src/utils/fixtures/TraceData.ts new file mode 100644 index 0000000000..289e91e949 --- /dev/null +++ b/frontend/src/utils/fixtures/TraceData.ts @@ -0,0 +1,52 @@ +import { Span } from 'types/api/trace/getTraceItem'; + +export const TraceData: Span[] = [ + [ + 1657275433246, + 'span_1', + '0000000000000000span_1', + 'frontend', + 'HTTP GET SPAN 1', + '2', + '683273000', + ['host.name.span1'], + ['span_1'], + ['{TraceId=0000000000000000span_1, SpanId=, RefType=CHILD_OF}'], + [ + '{"timeUnixNano":1657275433246142000,"attributeMap":{"event":"HTTP request received S1","level":"info","method":"GET","url":"/dispatch?customer=392\\u0026nonse=0.015296363321630757"}}', + ], + false, + ], + [ + 1657275433246, + 'span_2', + '0000000000000000span_1', + 'frontend', + 'HTTP GET SPAN 2', + '2', + '683273000', + ['host.name.span2'], + ['span_2'], + ['{TraceId=0000000000000000span_1, SpanId=span_1, RefType=CHILD_OF}'], + [ + '{"timeUnixNano":1657275433246142000,"attributeMap":{"event":"HTTP request received S2","level":"info","method":"GET","url":"/dispatch?customer=392\\u0026nonse=0.015296363321630757"}}', + ], + false, + ], + [ + 1657275433246, + 'span_3', + '0000000000000000span_1', + 'frontend', + 'HTTP GET SPAN 3', + '2', + '683273000', + ['host.name.span3'], + ['span_3'], + ['{TraceId=0000000000000000span_1, SpanId=span_2, RefType=CHILD_OF}'], + [ + '{"timeUnixNano":1657275433246142000,"attributeMap":{"event":"HTTP request received S3","level":"info","method":"GET","url":"/dispatch?customer=392\\u0026nonse=0.015296363321630757"}}', + ], + false, + ], +]; diff --git a/frontend/src/utils/getSpanTreeMetadata.ts b/frontend/src/utils/getSpanTreeMetadata.ts index d85480566d..e9b6c194ca 100644 --- a/frontend/src/utils/getSpanTreeMetadata.ts +++ b/frontend/src/utils/getSpanTreeMetadata.ts @@ -1,28 +1,31 @@ /* eslint-disable no-param-reassign */ import { errorColor } from 'lib/getRandomColor'; -import { ITraceTree } from 'types/api/trace/getTraceItem'; +import { ITraceForest, ITraceTree } from 'types/api/trace/getTraceItem'; /** * Traverses the Span Tree data and returns the relevant meta data. * Metadata includes globalStart, globalEnd, */ export const getSpanTreeMetadata = ( - treeData: ITraceTree, + treesData: ITraceForest, spanServiceColours: { [key: string]: string }, ): GetSpanTreeMetaData => { let globalStart = Number.POSITIVE_INFINITY; let globalEnd = Number.NEGATIVE_INFINITY; let totalSpans = 0; let levels = 1; + const traverse = (treeNode: ITraceTree, level = 0): void => { if (!treeNode) { return; } totalSpans += 1; levels = Math.max(levels, level); - const { startTime } = treeNode; - const endTime = startTime + treeNode.value / 1e6; - globalStart = Math.min(globalStart, startTime); - globalEnd = Math.max(globalEnd, endTime); + const { startTime, value } = treeNode; + if (startTime !== null && value !== null) { + const endTime = startTime + value / 1e6; + globalStart = Math.min(globalStart, startTime); + globalEnd = Math.max(globalEnd, endTime); + } if (treeNode.hasError) { treeNode.serviceColour = errorColor; } else treeNode.serviceColour = spanServiceColours[treeNode.serviceName]; @@ -30,7 +33,12 @@ export const getSpanTreeMetadata = ( traverse(childNode, level + 1); }); }; - traverse(treeData, 1); + treesData.spanTree.forEach((treeData) => { + traverse(treeData, 1); + }); + treesData.missingSpanTree.forEach((treeData) => { + traverse(treeData, 1); + }); return { globalStart, @@ -38,7 +46,7 @@ export const getSpanTreeMetadata = ( spread: globalEnd - globalStart, totalSpans, levels, - treeData, + treesData, }; }; @@ -48,5 +56,5 @@ interface GetSpanTreeMetaData { spread: number; totalSpans: number; levels: number; - treeData: ITraceTree; + treesData: ITraceForest; } diff --git a/frontend/src/utils/spanToTree.ts b/frontend/src/utils/spanToTree.ts index 8b82671060..142df3dec8 100644 --- a/frontend/src/utils/spanToTree.ts +++ b/frontend/src/utils/spanToTree.ts @@ -1,137 +1,131 @@ -/* eslint-disable */ -// @ts-nocheck - +/* eslint-disable no-restricted-syntax */ import { cloneDeep } from 'lodash-es'; -import { ITraceTree, Span } from 'types/api/trace/getTraceItem'; +import { ITraceForest, ITraceTree, Span } from 'types/api/trace/getTraceItem'; -export const spanToTreeUtil = (originalList: Span[]): ITraceTree => { - // Initializing tree. What should be returned is trace is empty? We should have better error handling - let tree: ITraceTree = { - id: 'empty', - name: 'default', - value: 0, - time: 0, - startTime: 0, - tags: [], - children: [], - serviceColour: '', - serviceName: '', - }; - - const spanlist = cloneDeep(originalList); - - // let spans :spanItem[]= trace.spans; - - if (spanlist) { - // Create a dict with spanIDs as keys - // PNOTE - // Can we now assign different strings as id - Yes - // https://stackoverflow.com/questions/15877362/declare-and-initialize-a-dictionary-in-typescript - - // May1 - // https://stackoverflow.com/questions/13315131/enforcing-the-type-of-the-indexed-members-of-a-typescript-object - - const mapped_array: { [id: string]: Span } = {}; - const originalListArray: { [id: string]: Span } = {}; - - for (let i = 0; i < spanlist.length; i++) { - originalListArray[spanlist[i][1]] = originalList[i]; - - mapped_array[spanlist[i][1]] = spanlist[i]; - mapped_array[spanlist[i][1]][10] = []; // initialising the 10th element in the Span data structure which is array - // of type ITraceTree - // console.log('IDs while creating mapped array') - // console.log(`SpanID is ${spanlist[i][1]}\n`); - } - - // console.log(`In SpanTreeUtil: mapped_arrayis ${mapped_array}`); - - for (const id in mapped_array) { - const child_span = mapped_array[id]; - - // mapping tags to new structure - const tags_temp = []; - if (child_span[7] !== null && child_span[8] !== null) { - if ( - typeof child_span[7] === 'string' && - typeof child_span[8] === 'string' - ) { - tags_temp.push({ key: child_span[7], value: child_span[8] }); - } else if (child_span[7].length > 0 && child_span[8].length > 0) { - for (let j = 0; j < child_span[7].length; j++) { - tags_temp.push({ key: child_span[7][j], value: child_span[8][j] }); - } - } - } - - const push_object: ITraceTree = { - id: child_span[1], - name: child_span[4], - value: parseInt(child_span[6]), - time: parseInt(child_span[6]), - startTime: child_span[0], - tags: tags_temp, - children: mapped_array[id][10], - serviceName: child_span[3], - hasError: !!child_span[11], - serviceColour: '', - event: originalListArray[id][10].map((e) => { - return JSON.parse(decodeURIComponent(e || '{}')) || {}; - }), - }; - - const referencesArr = mapped_array[id][9]; - let refArray = []; - if (typeof referencesArr === 'string') { - refArray.push(referencesArr); - } else { - refArray = referencesArr; - } - const references = []; - - refArray.forEach((element) => { - element = element - .replaceAll('{', '') - .replaceAll('}', '') - .replaceAll(' ', ''); - const arr = element.split(','); - const refItem = { traceID: '', spanID: '', refType: '' }; - arr.forEach((obj) => { - const arr2 = obj.split('='); - if (arr2[0] === 'TraceId') { - refItem.traceID = arr2[1]; - } else if (arr2[0] === 'SpanId') { - refItem.spanID = arr2[1]; - } else if (arr2[0] === 'RefType') { - refItem.refType = arr2[1]; - } - }); - - references.push(refItem); +const getSpanReferences = ( + rawReferences: string[] = [], +): Record[] => { + return rawReferences.map((rawRef) => { + const refObject: Record = {}; + rawRef + .replaceAll('{', '') + .replaceAll('}', '') + .replaceAll(' ', '') + .split(',') + .forEach((rawRefKeyPair) => { + const [key, value] = rawRefKeyPair.split('='); + refObject[key] = value; }); - if (references.length !== 0 && references[0].spanID.length !== 0) { - if (references[0].refType === 'CHILD_OF') { - const parentID = references[0].spanID; - // console.log(`In SpanTreeUtil: mapped_array[parentID] is ${mapped_array[parentID]}`); - - if (typeof mapped_array[parentID] !== 'undefined') { - // checking for undefined [10] issue - mapped_array[parentID][10].push(push_object); - } else { - // console.log( - // `In SpanTreeUtil: mapped_array[parentID] is undefined, parentID is ${parentID}`, - // ); - // console.log( - // `In SpanTreeUtil: mapped_array[parentID] is undefined, mapped_array[parentID] is ${mapped_array[parentID]}`, - // ); - } - } - } else { - tree = push_object; - } - } // end of for loop - } // end of if(spans) - - return { ...tree }; + return refObject; + }); +}; + +// This getSpanTags is migrated from the previous implementation. +const getSpanTags = (spanData: Span): { key: string; value: string }[] => { + const tags = []; + if (spanData[7] !== null && spanData[8] !== null) { + if (typeof spanData[7] === 'string' && typeof spanData[8] === 'string') { + tags.push({ key: spanData[7], value: spanData[8] }); + } else if (spanData[7].length > 0 && spanData[8].length > 0) { + for (let j = 0; j < spanData[7].length; j += 1) { + tags.push({ key: spanData[7][j], value: spanData[8][j] }); + } + } + } + return tags; +}; + +// eslint-disable-next-line sonarjs/cognitive-complexity +export const spanToTreeUtil = (inputSpanList: Span[]): ITraceForest => { + const spanList = cloneDeep(inputSpanList); + const traceIdSet: Set = new Set(); + const spanMap: Record = {}; + + const createTarceRootSpan = ( + spanReferences: Record[], + ): void => { + spanReferences.forEach(({ SpanId, TraceId }) => { + traceIdSet.add(TraceId); + if (SpanId && !spanMap[SpanId]) { + spanMap[SpanId] = { + id: SpanId, + name: `Missing Span (${SpanId})`, + children: [], + serviceColour: '', + serviceName: '', + startTime: null as never, + tags: [], + time: null as never, + value: null as never, + isMissing: true, + }; + } + }); + }; + + spanList.forEach((span) => { + const spanReferences = getSpanReferences(span[9] as string[]); + const spanObject = { + id: span[1], + name: span[4], + value: parseInt(span[6], 10), + time: parseInt(span[6], 10), + startTime: span[0], + tags: getSpanTags(span), + children: [], + serviceName: span[3], + hasError: !!span[11], + serviceColour: '', + event: span[10].map((e) => { + return ( + JSON.parse(decodeURIComponent((e as never) || ('{}' as never))) || + ({} as Record) + ); + }), + references: spanReferences, + }; + spanMap[span[1]] = spanObject; + }); + + for (const [, spanData] of Object.entries(spanMap)) { + if (spanData.references) { + createTarceRootSpan(spanData.references); + spanData.references.forEach(({ SpanId: parentSpanId }) => { + if (spanMap[parentSpanId]) { + spanData.isProcessed = true; + spanMap[parentSpanId].children.push(spanData); + } + }); + } + } + for (const [spanId, spanData] of Object.entries(spanMap)) { + if (spanData.isProcessed) { + delete spanMap[spanId]; + } + } + + const spanTree: ITraceTree[] = []; + const missingSpanTree: ITraceTree[] = []; + const referencedTraceIds: string[] = Array.from(traceIdSet); + Object.keys(spanMap).forEach((spanId) => { + const isRoot = spanMap[spanId].references?.some((refs) => refs.SpanId === ''); + if (isRoot) { + spanTree.push(spanMap[spanId]); + return; + } + + for (const traceId of referencedTraceIds) { + if (traceId.includes(spanId)) { + spanTree.push(spanMap[spanId]); + } else { + missingSpanTree.push(spanMap[spanId]); + } + } + }); + + return { + spanTree, + missingSpanTree, + }; }; diff --git a/frontend/tests/auth.json b/frontend/tests/auth.json new file mode 100644 index 0000000000..2dd3d40466 --- /dev/null +++ b/frontend/tests/auth.json @@ -0,0 +1,38 @@ +{ + "cookies": [], + "origins": [ + { + "origin": "http://localhost:3301", + "localStorage": [ + { + "name": "isSideBarCollapsed", + "value": "false" + }, + { + "name": "metricsTimeDurations", + "value": "{}" + }, + { + "name": "i18nextLng", + "value": "en-US" + }, + { + "name": "reactQueryDevtoolsSortFn", + "value": "\"Status > Last Updated\"" + }, + { + "name": "AUTH_TOKEN", + "value": "authtoken" + }, + { + "name": "IS_LOGGED_IN", + "value": "true" + }, + { + "name": "REFRESH_AUTH_TOKEN", + "value": "refreshJwt" + } + ] + } + ] +} \ No newline at end of file diff --git a/frontend/tests/fixtures/api/login/200.json b/frontend/tests/fixtures/api/login/200.json new file mode 100644 index 0000000000..2ea22f87d8 --- /dev/null +++ b/frontend/tests/fixtures/api/login/200.json @@ -0,0 +1,7 @@ +{ + "accessJwt": "authtoken", + "accessJwtExpiry": 1656609177, + "refreshJwt": "refreshJwt", + "refreshJwtExpiry": 1659199377, + "userId": "34917776-514b-4b95-a4f5-1a5cc06e34b6" +} diff --git a/frontend/tests/fixtures/api/organisation/201.json b/frontend/tests/fixtures/api/organisation/201.json new file mode 100644 index 0000000000..deea4b3512 --- /dev/null +++ b/frontend/tests/fixtures/api/organisation/201.json @@ -0,0 +1,3 @@ +{ + "data": "org updated successfully" +} diff --git a/frontend/tests/fixtures/api/register/200.json b/frontend/tests/fixtures/api/register/200.json new file mode 100644 index 0000000000..6088583942 --- /dev/null +++ b/frontend/tests/fixtures/api/register/200.json @@ -0,0 +1 @@ +{ "data": "user registered successfully" } diff --git a/frontend/tests/fixtures/api/register/401.json b/frontend/tests/fixtures/api/register/401.json new file mode 100644 index 0000000000..6fd241b44c --- /dev/null +++ b/frontend/tests/fixtures/api/register/401.json @@ -0,0 +1,5 @@ +{ + "status": "error", + "errorType": "unauthorized", + "error": "You are not allowed to create an account. Please ask your admin to send an invite link" +} diff --git a/frontend/tests/fixtures/api/userId/200.json b/frontend/tests/fixtures/api/userId/200.json new file mode 100644 index 0000000000..527c60eab6 --- /dev/null +++ b/frontend/tests/fixtures/api/userId/200.json @@ -0,0 +1,11 @@ +{ + "createdAt": 1651759141, + "email": "prashant@signoz.io", + "groupId": "36261238-3214-4ae9-9ef1-661a9f7be5d0", + "id": "509fab4a-2578-4f24-8245-1b77b2d6d937", + "name": "Prashant", + "orgId": "72b4024a-3301-4d90-951e-ee071b96dba5", + "organization": "Meta", + "profilePictureURL": "", + "role": "ADMIN" +} diff --git a/frontend/tests/fixtures/common.ts b/frontend/tests/fixtures/common.ts new file mode 100644 index 0000000000..d691cae423 --- /dev/null +++ b/frontend/tests/fixtures/common.ts @@ -0,0 +1,43 @@ +import { Page } from '@playwright/test'; +import { getVersion } from 'constants/api'; + +import loginApiResponse from './api/login/200.json'; +import updateOrgResponse from './api/organisation/201.json'; +import successLoginResponse from './api/register/200.json'; +import userLoginResponse from './api/userId/200.json'; +import { version } from './constant'; + +export const waitForVersionApiSuccess = async (page: Page): Promise => { + await page.route(`**/${getVersion}`, (route) => + route.fulfill({ + status: 200, + body: JSON.stringify({ version }), + }), + ); +}; + +export const loginApi = async (page: Page): Promise => { + await Promise.all([ + page.route(`**/register`, (route) => + route.fulfill({ + status: 200, + body: JSON.stringify(successLoginResponse), + }), + ), + page.route(`**/user/${loginApiResponse.userId}`, (route) => + route.fulfill({ status: 200, body: JSON.stringify(userLoginResponse) }), + ), + page.route('**/login', (route) => + route.fulfill({ + status: 200, + body: JSON.stringify(loginApiResponse), + }), + ), + page.route(`**/org/${userLoginResponse.orgId}`, (route) => + route.fulfill({ + status: 200, + body: JSON.stringify(updateOrgResponse), + }), + ), + ]); +}; diff --git a/frontend/tests/fixtures/constant.ts b/frontend/tests/fixtures/constant.ts new file mode 100644 index 0000000000..ac20029c4a --- /dev/null +++ b/frontend/tests/fixtures/constant.ts @@ -0,0 +1,8 @@ +export const version = 'v1.0.0'; +export const validemail = 'sample@signoz.io'; +export const validName = 'Palash'; +export const validCompanyName = 'Signoz'; +export const validPassword = 'SamplePassword98@@'; + +export const getStartedButtonSelector = 'button[data-attr="signup"]'; +export const confirmPasswordSelector = '#password-confirm-error'; diff --git a/frontend/tests/login/fail.spec.ts b/frontend/tests/login/fail.spec.ts new file mode 100644 index 0000000000..5366d7240c --- /dev/null +++ b/frontend/tests/login/fail.spec.ts @@ -0,0 +1,28 @@ +import { expect, test } from '@playwright/test'; +import { getVersion } from 'constants/api'; +import ROUTES from 'constants/routes'; + +test.describe('Version API fail while loading login page', async () => { + test('Something went wrong', async ({ page, baseURL }) => { + const loginPage = `${baseURL}${ROUTES.LOGIN}`; + + const text = 'Something went wrong'; + + await page.route(`**/${getVersion}`, (route) => + route.fulfill({ + status: 500, + body: JSON.stringify({ error: text }), + }), + ); + + await page.goto(loginPage, { + waitUntil: 'networkidle', + }); + + const el = page.locator(`text=${text}`); + + expect(el).toBeVisible(); + expect(el).toHaveText(`${text}`); + expect(await el.getAttribute('disabled')).toBe(null); + }); +}); diff --git a/frontend/tests/login/index.spec.ts b/frontend/tests/login/index.spec.ts new file mode 100644 index 0000000000..ec735460ab --- /dev/null +++ b/frontend/tests/login/index.spec.ts @@ -0,0 +1,49 @@ +import { expect, test } from '@playwright/test'; +import ROUTES from 'constants/routes'; + +import { waitForVersionApiSuccess } from '../fixtures/common'; +import { version } from '../fixtures/constant'; + +test.describe('Login Page', () => { + test.beforeEach(async ({ baseURL, page }) => { + const loginPage = `${baseURL}${ROUTES.LOGIN}`; + + await waitForVersionApiSuccess(page); + + await Promise.all([page.goto(loginPage), page.waitForRequest('**/version')]); + }); + + test('Login Page text should be visible', async ({ page }) => { + const signup = 'Monitor your applications. Find what is causing issues.'; + + // Click text=Monitor your applications. Find what is causing issues. + const el = page.locator(`text=${signup}`); + + expect(el).toBeVisible(); + }); + + test('Create an account button should be present', async ({ + page, + baseURL, + }) => { + const loginPage = `${baseURL}${ROUTES.LOGIN}`; + + // find button which has text=Create an account + const button = page.locator('text=Create an account'); + + expect(button).toBeVisible(); + expect(button).toHaveText('Create an account'); + expect(await button.getAttribute('disabled')).toBe(null); + + expect(await button.isEnabled()).toBe(true); + await expect(page).toHaveURL(loginPage); + }); + + test('Version of the application when api returns 200', async ({ page }) => { + // Click text=SigNoz ${version} + const element = page.locator(`text=SigNoz ${version}`); + element.isVisible(); + const text = await element.innerText(); + expect(text).toBe(`SigNoz ${version}`); + }); +}); diff --git a/frontend/tests/service/index.spec.ts b/frontend/tests/service/index.spec.ts new file mode 100644 index 0000000000..ae708322ed --- /dev/null +++ b/frontend/tests/service/index.spec.ts @@ -0,0 +1,22 @@ +import { expect, Page, test } from '@playwright/test'; +import ROUTES from 'constants/routes'; + +import { loginApi } from '../fixtures/common'; + +let page: Page; + +test.describe('Service Page', () => { + test.beforeEach(async ({ baseURL, browser }) => { + const context = await browser.newContext({ storageState: 'tests/auth.json' }); + const newPage = await context.newPage(); + + await loginApi(newPage); + + await newPage.goto(`${baseURL}${ROUTES.APPLICATION}`); + + page = newPage; + }); + test('Serice Page is rendered', async ({ baseURL }) => { + await expect(page).toHaveURL(`${baseURL}${ROUTES.APPLICATION}`); + }); +}); diff --git a/frontend/tests/signup/index.spec.ts b/frontend/tests/signup/index.spec.ts index a7e06f4fa6..afdc98f140 100644 --- a/frontend/tests/signup/index.spec.ts +++ b/frontend/tests/signup/index.spec.ts @@ -1,17 +1,224 @@ -import { expect, test } from '@playwright/test'; +import { expect, Page, PlaywrightTestOptions, test } from '@playwright/test'; import ROUTES from 'constants/routes'; -test('Login Page', async ({ page, baseURL }) => { - const loginPage = `${baseURL}${ROUTES.LOGIN}`; +import { loginApi, waitForVersionApiSuccess } from '../fixtures/common'; +import { + confirmPasswordSelector, + getStartedButtonSelector, + validCompanyName, + validemail, + validName, + validPassword, +} from '../fixtures/constant'; - await page.goto(loginPage, { - waitUntil: 'networkidle', +const waitForSignUpPageSuccess = async ( + baseURL: PlaywrightTestOptions['baseURL'], + page: Page, +): Promise => { + const signupPage = `${baseURL}${ROUTES.SIGN_UP}`; + + await page.goto(signupPage); + + await waitForVersionApiSuccess(page); +}; + +interface FillDetailsInSignUpFormProps { + page: Page; + email: string; + name: string; + companyName: string; + password: string; + confirmPassword: string; +} + +const fillDetailsInSignUpForm = async ({ + page, + email, + name, + companyName, + password, + confirmPassword, +}: FillDetailsInSignUpFormProps): Promise => { + const emailplaceholder = '[placeholder="name\\@yourcompany\\.com"]'; + const nameplaceholder = '[placeholder="Your Name"]'; + const companyPlaceholder = '[placeholder="Your Company"]'; + const currentPasswordId = '#currentPassword'; + const confirmPasswordId = '#confirmPassword'; + + // Fill [placeholder="name\@yourcompany\.com"] + await page.locator(emailplaceholder).fill(email); + + // Fill [placeholder="Your Name"] + await page.locator(nameplaceholder).fill(name); + + // Fill [placeholder="Your Company"] + await page.locator(companyPlaceholder).fill(companyName); + + // Fill #currentPassword + await page.locator(currentPasswordId).fill(password); + + // Fill #confirmPasswordId + await page.locator(confirmPasswordId).fill(confirmPassword); +}; + +test.describe('Sign Up Page', () => { + test('When User successfull signup and logged in, he should be redirected to dashboard', async ({ + page, + baseURL, + }) => { + const loginPage = `${baseURL}${ROUTES.LOGIN}`; + + await waitForVersionApiSuccess(page); + + await Promise.all([page.goto(loginPage), page.waitForRequest('**/version')]); + + const buttonSignupButton = page.locator('text=Create an account'); + + await buttonSignupButton.click(); + + expect(page).toHaveURL(`${baseURL}${ROUTES.SIGN_UP}`); }); - const signup = 'Monitor your applications. Find what is causing issues.'; + test('Invite link validation', async ({ baseURL, page }) => { + await waitForSignUpPageSuccess(baseURL, page); + const message = + 'This will create an admin account. If you are not an admin, please ask your admin for an invite link'; - // Click text=Monitor your applications. Find what is causing issues. - const el = page.locator(`text=${signup}`); + const messageText = await page.locator(`text=${message}`).innerText(); - expect(el).toBeVisible(); + expect(messageText).toBe(message); + }); + + test('User Sign up with valid details', async ({ baseURL, page, context }) => { + await waitForSignUpPageSuccess(baseURL, page); + + const gettingStartedButton = page.locator(getStartedButtonSelector); + + expect(await gettingStartedButton.isDisabled()).toBe(true); + + await fillDetailsInSignUpForm({ + companyName: validCompanyName, + confirmPassword: validPassword, + email: validemail, + name: validName, + page, + password: validPassword, + }); + + // password validation message is not present + const locator = await page.locator(confirmPasswordSelector).isVisible(); + expect(locator).toBe(false); + + const buttonText = await gettingStartedButton.evaluate((e) => e.innerHTML); + + expect(buttonText).toMatch(/Get Started/i); + + // Getting Started button is not disabled + expect(await gettingStartedButton.isDisabled()).toBe(false); + + await loginApi(page); + + await gettingStartedButton.click(); + + await expect(page).toHaveURL(`${baseURL}${ROUTES.APPLICATION}`); + + await context.storageState({ + path: 'tests/auth.json', + }); + }); + + test('Empty name with valid details', async ({ baseURL, page }) => { + await waitForSignUpPageSuccess(baseURL, page); + + await fillDetailsInSignUpForm({ + companyName: validCompanyName, + confirmPassword: validPassword, + email: validemail, + name: '', + page, + password: validPassword, + }); + + const gettingStartedButton = page.locator(getStartedButtonSelector); + + expect(await gettingStartedButton.isDisabled()).toBe(true); + }); + + test('Empty Company name with valid details', async ({ baseURL, page }) => { + await waitForSignUpPageSuccess(baseURL, page); + + await fillDetailsInSignUpForm({ + companyName: '', + confirmPassword: validPassword, + email: validemail, + name: validName, + page, + password: validPassword, + }); + + const gettingStartedButton = page.locator(getStartedButtonSelector); + + expect(await gettingStartedButton.isDisabled()).toBe(true); + }); + + test('Empty Email with valid details', async ({ baseURL, page }) => { + await waitForSignUpPageSuccess(baseURL, page); + + await fillDetailsInSignUpForm({ + companyName: validCompanyName, + confirmPassword: validPassword, + email: '', + name: validName, + page, + password: validPassword, + }); + + const gettingStartedButton = page.locator(getStartedButtonSelector); + + expect(await gettingStartedButton.isDisabled()).toBe(true); + }); + + test('Empty Password and confirm password with valid details', async ({ + baseURL, + page, + }) => { + await waitForSignUpPageSuccess(baseURL, page); + + await fillDetailsInSignUpForm({ + companyName: validCompanyName, + confirmPassword: '', + email: validemail, + name: validName, + page, + password: '', + }); + + const gettingStartedButton = page.locator(getStartedButtonSelector); + + expect(await gettingStartedButton.isDisabled()).toBe(true); + + // password validation message is not present + const locator = await page.locator(confirmPasswordSelector).isVisible(); + expect(locator).toBe(false); + }); + + test('Miss Match Password and confirm password with valid details', async ({ + baseURL, + page, + }) => { + await waitForSignUpPageSuccess(baseURL, page); + + await fillDetailsInSignUpForm({ + companyName: validCompanyName, + confirmPassword: validPassword, + email: validemail, + name: validName, + page, + password: '', + }); + + // password validation message is not present + const locator = await page.locator(confirmPasswordSelector).isVisible(); + expect(locator).toBe(true); + }); }); diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json index ca86de66b0..92ea1e3649 100644 --- a/frontend/tsconfig.json +++ b/frontend/tsconfig.json @@ -36,6 +36,7 @@ "./commitlint.config.ts", "./webpack.config.js", "./webpack.config.prod.js", - "./jest.setup.ts" + "./jest.setup.ts", + "./tests/**.ts", ] } diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 3a254370a1..f2d9ad04ad 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -4093,6 +4093,11 @@ chartjs-adapter-date-fns@^2.0.0: resolved "https://registry.yarnpkg.com/chartjs-adapter-date-fns/-/chartjs-adapter-date-fns-2.0.0.tgz#5e53b2f660b993698f936f509c86dddf9ed44c6b" integrity sha512-rmZINGLe+9IiiEB0kb57vH3UugAtYw33anRiw5kS2Tu87agpetDDoouquycWc9pRsKtQo5j+vLsYHyr8etAvFw== +chartjs-plugin-annotation@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/chartjs-plugin-annotation/-/chartjs-plugin-annotation-1.4.0.tgz#4c84cec1ec838bc09712f3686237866e6c3f4798" + integrity sha512-OC0eGoVvdxTtGGi8mV3Dr+G1YmMhtYYQWqGMb2uWcgcnyiBslaRKPofKwAYWPbh7ABnmQNsNDQLIKPH+XiaZLA== + "chokidar@>=3.0.0 <4.0.0", chokidar@^3.5.3: version "3.5.3" resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.3.tgz#1cf37c8707b932bd1af1ae22c0432e2acd1903bd" @@ -12408,6 +12413,11 @@ timed-out@^4.0.1: resolved "https://registry.yarnpkg.com/timed-out/-/timed-out-4.0.1.tgz#f32eacac5a175bea25d7fab565ab3ed8741ef56f" integrity sha1-8y6srFoXW+ol1/q1Zas+2HQe9W8= +timestamp-nano@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/timestamp-nano/-/timestamp-nano-1.0.0.tgz#03bf0b43c2bdcb913a6a02fbaae6f97d68650f3a" + integrity sha512-NO/1CZigzlCWQiWdIGv8ebXt6Uk77zdLz2NE7KcZRU5Egj2+947lzUpk30xQUQlq5dRY25j7ZulG4RfA2DHYfA== + tiny-invariant@^1.0.2: version "1.2.0" resolved "https://registry.yarnpkg.com/tiny-invariant/-/tiny-invariant-1.2.0.tgz#a1141f86b672a9148c72e978a19a73b9b94a15a9" diff --git a/pkg/query-service/README.md b/pkg/query-service/README.md index 72b69afc9a..bac8851855 100644 --- a/pkg/query-service/README.md +++ b/pkg/query-service/README.md @@ -1,6 +1,6 @@ # Query Service -Query service is the interface between forntend and databases. It is written in **Golang**. It will have modules for all supported databases. Query service is responsible to: +Query service is the interface between frontend and databases. It is written in **Golang**. It will have modules for all supported databases. Query service is responsible to: - parse the request from Frontend - create relevant Clickhouse queries (and all other supported database queries) - parse response from databases and handle error if any @@ -29,4 +29,4 @@ ClickHouseUrl=tcp://localhost:9001 STORAGE=clickhouse build/query-service ``` #### Docker Images -The docker images of query-service is available at https://hub.docker.com/r/signoz/query-service \ No newline at end of file +The docker images of query-service is available at https://hub.docker.com/r/signoz/query-service diff --git a/pkg/query-service/app/clickhouseReader/options.go b/pkg/query-service/app/clickhouseReader/options.go index 30f23b5cf3..99fe5080ae 100644 --- a/pkg/query-service/app/clickhouseReader/options.go +++ b/pkg/query-service/app/clickhouseReader/options.go @@ -22,7 +22,7 @@ const ( defaultTraceDB string = "signoz_traces" defaultOperationsTable string = "signoz_operations" defaultIndexTable string = "signoz_index_v2" - defaultErrorTable string = "signoz_error_index" + defaultErrorTable string = "signoz_error_index_v2" defaulDurationTable string = "durationSortMV" defaultSpansTable string = "signoz_spans" defaultWriteBatchDelay time.Duration = 5 * time.Second diff --git a/pkg/query-service/app/clickhouseReader/reader.go b/pkg/query-service/app/clickhouseReader/reader.go index 8a6e2e6d36..6716373c8b 100644 --- a/pkg/query-service/app/clickhouseReader/reader.go +++ b/pkg/query-service/app/clickhouseReader/reader.go @@ -3,16 +3,12 @@ package clickhouseReader import ( "bytes" "context" - "crypto/md5" - "database/sql" "encoding/json" - "flag" + "fmt" "io/ioutil" "math/rand" - "net" "net/http" - "net/url" "os" "reflect" "regexp" @@ -27,20 +23,16 @@ import ( "github.com/google/uuid" "github.com/oklog/oklog/pkg/group" "github.com/pkg/errors" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/promlog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" sd_config "github.com/prometheus/prometheus/discovery/config" - "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/promql" - "github.com/prometheus/prometheus/rules" + "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" "github.com/prometheus/prometheus/util/stats" - "github.com/prometheus/prometheus/util/strutil" - "github.com/prometheus/tsdb" "github.com/ClickHouse/clickhouse-go/v2" "github.com/ClickHouse/clickhouse-go/v2/lib/driver" @@ -50,6 +42,7 @@ import ( "go.signoz.io/query-service/constants" am "go.signoz.io/query-service/integrations/alertManager" "go.signoz.io/query-service/model" + "go.signoz.io/query-service/utils" "go.uber.org/zap" ) @@ -59,7 +52,7 @@ const ( signozTraceDBName = "signoz_traces" signozDurationMVTable = "durationSort" signozSpansTable = "signoz_spans" - signozErrorIndexTable = "signoz_error_index" + signozErrorIndexTable = "signoz_error_index_v2" signozTraceTableName = "signoz_index_v2" signozMetricDBName = "signoz_metrics" signozSampleTableName = "samples_v2" @@ -92,24 +85,30 @@ type ClickHouseReader struct { spansTable string queryEngine *promql.Engine remoteStorage *remote.Storage - ruleManager *rules.Manager - promConfig *config.Config - alertManager am.Manager + + promConfigFile string + promConfig *config.Config + alertManager am.Manager } // NewTraceReader returns a TraceReader for the database -func NewReader(localDB *sqlx.DB) *ClickHouseReader { +func NewReader(localDB *sqlx.DB, configFile string) *ClickHouseReader { datasource := os.Getenv("ClickHouseUrl") options := NewOptions(datasource, primaryNamespace, archiveNamespace) db, err := initialize(options) if err != nil { - zap.S().Error(err) + zap.S().Error("failed to initialize ClickHouse: ", err) os.Exit(1) } - alertManager := am.New("") + alertManager, err := am.New("") + if err != nil { + zap.S().Errorf("msg: failed to initialize alert manager: ", "/t error:", err) + zap.S().Errorf("msg: check if the alert manager URL is correctly set and valid") + os.Exit(1) + } return &ClickHouseReader{ db: db, @@ -121,6 +120,7 @@ func NewReader(localDB *sqlx.DB) *ClickHouseReader { errorTable: options.primary.ErrorTable, durationTable: options.primary.DurationTable, spansTable: options.primary.SpansTable, + promConfigFile: configFile, } } @@ -139,30 +139,14 @@ func (r *ClickHouseReader) Start() { startTime := func() (int64, error) { return int64(promModel.Latest), nil - } remoteStorage := remote.NewStorage(log.With(logger, "component", "remote"), startTime, time.Duration(1*time.Minute)) - // conf, err := config.LoadFile(*filename) - // if err != nil { - // zap.S().Error("couldn't load configuration (--config.file=%q): %v", filename, err) - // } - - // err = remoteStorage.ApplyConfig(conf) - // if err != nil { - // zap.S().Error("Error in remoteStorage.ApplyConfig: ", err) - // } cfg := struct { configFile string localStoragePath string - notifier notifier.Options - notifierTimeout promModel.Duration - forGracePeriod promModel.Duration - outageTolerance promModel.Duration - resendDelay promModel.Duration - tsdb tsdb.Options lookbackDelta promModel.Duration webTimeout promModel.Duration queryTimeout promModel.Duration @@ -174,39 +158,15 @@ func (r *ClickHouseReader) Start() { logLevel promlog.AllowedLevel }{ - notifier: notifier.Options{ - Registerer: prometheus.DefaultRegisterer, - }, + configFile: r.promConfigFile, } - flag.StringVar(&cfg.configFile, "config", "./config/prometheus.yml", "(prometheus config to read metrics)") - flag.Parse() - // fanoutStorage := remoteStorage fanoutStorage := storage.NewFanout(logger, remoteStorage) - localStorage := remoteStorage - - cfg.notifier.QueueCapacity = 10000 - cfg.notifierTimeout = promModel.Duration(time.Duration.Seconds(10)) - notifier := notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier")) - // notifier.ApplyConfig(conf) - - ExternalURL, err := computeExternalURL("", "0.0.0.0:3301") - if err != nil { - fmt.Fprintln(os.Stderr, errors.Wrapf(err, "parse external URL %q", ExternalURL.String())) - os.Exit(2) - } - - cfg.outageTolerance = promModel.Duration(time.Duration.Hours(1)) - cfg.forGracePeriod = promModel.Duration(time.Duration.Minutes(10)) - cfg.resendDelay = promModel.Duration(time.Duration.Minutes(1)) ctxScrape, cancelScrape := context.WithCancel(context.Background()) discoveryManagerScrape := discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), discovery.Name("scrape")) - ctxNotify, cancelNotify := context.WithCancel(context.Background()) - discoveryManagerNotify := discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), discovery.Name("notify")) - scrapeManager := scrape.NewManager(log.With(logger, "component", "scrape manager"), fanoutStorage) opts := promql.EngineOpts{ @@ -219,25 +179,10 @@ func (r *ClickHouseReader) Start() { queryEngine := promql.NewEngine(opts) - ruleManager := rules.NewManager(&rules.ManagerOptions{ - Appendable: fanoutStorage, - TSDB: localStorage, - QueryFunc: rules.EngineQueryFunc(queryEngine, fanoutStorage), - NotifyFunc: sendAlerts(notifier, ExternalURL.String()), - Context: context.Background(), - ExternalURL: ExternalURL, - Registerer: prometheus.DefaultRegisterer, - Logger: log.With(logger, "component", "rule manager"), - OutageTolerance: time.Duration(cfg.outageTolerance), - ForGracePeriod: time.Duration(cfg.forGracePeriod), - ResendDelay: time.Duration(cfg.resendDelay), - }) - reloaders := []func(cfg *config.Config) error{ remoteStorage.ApplyConfig, - // The Scrape and notifier managers need to reload before the Discovery manager as + // The Scrape managers need to reload before the Discovery manager as // they need to read the most updated config when receiving the new targets list. - notifier.ApplyConfig, scrapeManager.ApplyConfig, func(cfg *config.Config) error { c := make(map[string]sd_config.ServiceDiscoveryConfig) @@ -246,32 +191,6 @@ func (r *ClickHouseReader) Start() { } return discoveryManagerScrape.ApplyConfig(c) }, - func(cfg *config.Config) error { - c := make(map[string]sd_config.ServiceDiscoveryConfig) - for _, v := range cfg.AlertingConfig.AlertmanagerConfigs { - // AlertmanagerConfigs doesn't hold an unique identifier so we use the config hash as the identifier. - b, err := json.Marshal(v) - if err != nil { - return err - } - c[fmt.Sprintf("%x", md5.Sum(b))] = v.ServiceDiscoveryConfig - } - return discoveryManagerNotify.ApplyConfig(c) - }, - // func(cfg *config.Config) error { - // // Get all rule files matching the configuration oaths. - // var files []string - // for _, pat := range cfg.RuleFiles { - // fs, err := filepath.Glob(pat) - // if err != nil { - // // The only error can be a bad pattern. - // return fmt.Errorf("error retrieving rule files for %s: %s", pat, err) - // } - // files = append(files, fs...) - // } - // return ruleManager.Update(time.Duration(cfg.GlobalConfig.EvaluationInterval), files) - // }, - } // sync.Once is used to make sure we can close the channel at different execution stages(SIGTERM or when the config is loaded). @@ -305,20 +224,6 @@ func (r *ClickHouseReader) Start() { }, ) } - { - // Notify discovery manager. - g.Add( - func() error { - err := discoveryManagerNotify.Run() - level.Info(logger).Log("msg", "Notify discovery manager stopped") - return err - }, - func(err error) { - level.Info(logger).Log("msg", "Stopping notify discovery manager...") - cancelNotify() - }, - ) - } { // Scrape manager. g.Add( @@ -354,6 +259,7 @@ func (r *ClickHouseReader) Start() { // reloadReady.Close() // return nil // } + var err error r.promConfig, err = reloadConfig(cfg.configFile, logger, reloaders...) if err != nil { return fmt.Errorf("error loading config from %q: %s", cfg.configFile, err) @@ -361,29 +267,19 @@ func (r *ClickHouseReader) Start() { reloadReady.Close() - rules, apiErrorObj := r.GetRulesFromDB() + // ! commented the alert manager can now + // call query service to do this + // channels, apiErrorObj := r.GetChannels() - if apiErrorObj != nil { - zap.S().Errorf("Not able to read rules from DB") - } - for _, rule := range *rules { - apiErrorObj = r.LoadRule(rule) - if apiErrorObj != nil { - zap.S().Errorf("Not able to load rule with id=%d loaded from DB", rule.Id, rule.Data) - } - } - - channels, apiErrorObj := r.GetChannels() - - if apiErrorObj != nil { - zap.S().Errorf("Not able to read channels from DB") - } - for _, channel := range *channels { - apiErrorObj = r.LoadChannel(&channel) - if apiErrorObj != nil { - zap.S().Errorf("Not able to load channel with id=%d loaded from DB", channel.Id, channel.Data) - } - } + //if apiErrorObj != nil { + // zap.S().Errorf("Not able to read channels from DB") + //} + //for _, channel := range *channels { + //apiErrorObj = r.LoadChannel(&channel) + //if apiErrorObj != nil { + // zap.S().Errorf("Not able to load channel with id=%d loaded from DB", channel.Id, channel.Data) + //} + //} <-cancel @@ -394,48 +290,8 @@ func (r *ClickHouseReader) Start() { }, ) } - { - // Rule manager. - // TODO(krasi) refactor ruleManager.Run() to be blocking to avoid using an extra blocking channel. - cancel := make(chan struct{}) - g.Add( - func() error { - <-reloadReady.C - ruleManager.Run() - <-cancel - return nil - }, - func(err error) { - ruleManager.Stop() - close(cancel) - }, - ) - } - { - // Notifier. - - // Calling notifier.Stop() before ruleManager.Stop() will cause a panic if the ruleManager isn't running, - // so keep this interrupt after the ruleManager.Stop(). - g.Add( - func() error { - // When the notifier manager receives a new targets list - // it needs to read a valid config for each job. - // It depends on the config being in sync with the discovery manager - // so we wait until the config is fully loaded. - <-reloadReady.C - - notifier.Run(discoveryManagerNotify.SyncCh()) - level.Info(logger).Log("msg", "Notifier manager stopped") - return nil - }, - func(err error) { - notifier.Stop() - }, - ) - } r.queryEngine = queryEngine r.remoteStorage = remoteStorage - r.ruleManager = ruleManager if err := g.Run(); err != nil { level.Error(logger).Log("err", err) @@ -466,70 +322,6 @@ func reloadConfig(filename string, logger log.Logger, rls ...func(*config.Config return conf, nil } -func startsOrEndsWithQuote(s string) bool { - return strings.HasPrefix(s, "\"") || strings.HasPrefix(s, "'") || - strings.HasSuffix(s, "\"") || strings.HasSuffix(s, "'") -} - -// computeExternalURL computes a sanitized external URL from a raw input. It infers unset -// URL parts from the OS and the given listen address. -func computeExternalURL(u, listenAddr string) (*url.URL, error) { - if u == "" { - hostname, err := os.Hostname() - if err != nil { - return nil, err - } - _, port, err := net.SplitHostPort(listenAddr) - if err != nil { - return nil, err - } - u = fmt.Sprintf("http://%s:%s/", hostname, port) - } - - if startsOrEndsWithQuote(u) { - return nil, fmt.Errorf("URL must not begin or end with quotes") - } - - eu, err := url.Parse(u) - if err != nil { - return nil, err - } - - ppref := strings.TrimRight(eu.Path, "/") - if ppref != "" && !strings.HasPrefix(ppref, "/") { - ppref = "/" + ppref - } - eu.Path = ppref - - return eu, nil -} - -// sendAlerts implements the rules.NotifyFunc for a Notifier. -func sendAlerts(n *notifier.Manager, externalURL string) rules.NotifyFunc { - return func(ctx context.Context, expr string, alerts ...*rules.Alert) { - var res []*notifier.Alert - - for _, alert := range alerts { - a := ¬ifier.Alert{ - StartsAt: alert.FiredAt, - Labels: alert.Labels, - Annotations: alert.Annotations, - GeneratorURL: externalURL + strutil.TableLinkForExpression(expr), - } - if !alert.ResolvedAt.IsZero() { - a.EndsAt = alert.ResolvedAt - } else { - a.EndsAt = alert.ValidUntil - } - res = append(res, a) - } - - if len(alerts) > 0 { - n.Send(res...) - } - } -} - func initialize(options *Options) (clickhouse.Conn, error) { db, err := connect(options.getPrimary()) @@ -548,156 +340,8 @@ func connect(cfg *namespaceConfig) (clickhouse.Conn, error) { return cfg.Connector(cfg) } -type byAlertStateAndNameSorter struct { - alerts []*AlertingRuleWithGroup -} - -func (s byAlertStateAndNameSorter) Len() int { - return len(s.alerts) -} - -func (s byAlertStateAndNameSorter) Less(i, j int) bool { - return s.alerts[i].State() > s.alerts[j].State() || - (s.alerts[i].State() == s.alerts[j].State() && - s.alerts[i].Name() < s.alerts[j].Name()) -} - -func (s byAlertStateAndNameSorter) Swap(i, j int) { - s.alerts[i], s.alerts[j] = s.alerts[j], s.alerts[i] -} - -type AlertingRuleWithGroup struct { - rules.AlertingRule - Id int -} - -func (r *ClickHouseReader) GetRulesFromDB() (*[]model.RuleResponseItem, *model.ApiError) { - - rules := []model.RuleResponseItem{} - - query := fmt.Sprintf("SELECT id, updated_at, data FROM rules") - - err := r.localDB.Select(&rules, query) - - zap.S().Info(query) - - if err != nil { - zap.S().Debug("Error in processing sql query: ", err) - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - return &rules, nil -} - -func (r *ClickHouseReader) GetRule(id string) (*model.RuleResponseItem, *model.ApiError) { - - idInt, err := strconv.Atoi(id) - if err != nil { - zap.S().Debug("Error in parsing param: ", err) - return nil, &model.ApiError{Typ: model.ErrorBadData, Err: err} - } - - rule := &model.RuleResponseItem{} - - query := "SELECT id, updated_at, data FROM rules WHERE id=?" - rows, err := r.localDB.Query(query, idInt) - - if err != nil { - zap.S().Debug("Error in processing sql query: ", err) - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - count := 0 - // iterate over each row - for rows.Next() { - err = rows.Scan(&rule.Id, &rule.UpdatedAt, &rule.Data) - if err != nil { - zap.S().Debug(err) - return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - count += 1 - - } - - if count == 0 { - err = fmt.Errorf("no rule with id %d found", idInt) - zap.S().Debug(err) - return nil, &model.ApiError{Typ: model.ErrorNotFound, Err: err} - } - if count > 1 { - err = fmt.Errorf("multiple rules with id %d found", idInt) - zap.S().Debug(err) - return nil, &model.ApiError{Typ: model.ErrorConflict, Err: err} - } - - return rule, nil -} - -func (r *ClickHouseReader) ListRulesFromProm() (*model.AlertDiscovery, *model.ApiError) { - - groups := r.ruleManager.RuleGroups() - - alertingRulesWithGroupObjects := []*AlertingRuleWithGroup{} - - for _, group := range groups { - groupNameParts := strings.Split(group.Name(), "-groupname") - if len(groupNameParts) < 2 { - continue - } - id, _ := strconv.Atoi(groupNameParts[0]) - for _, rule := range group.Rules() { - if alertingRule, ok := rule.(*rules.AlertingRule); ok { - alertingRulesWithGroupObject := AlertingRuleWithGroup{ - *alertingRule, - id, - } - alertingRulesWithGroupObjects = append(alertingRulesWithGroupObjects, &alertingRulesWithGroupObject) - } - } - } - - // alertingRules := r.ruleManager.AlertingRules() - - alertsSorter := byAlertStateAndNameSorter{alerts: alertingRulesWithGroupObjects} - sort.Sort(alertsSorter) - alerts := []*model.AlertingRuleResponse{} - - for _, alertingRule := range alertsSorter.alerts { - - alertingRuleResponseObject := &model.AlertingRuleResponse{ - Labels: alertingRule.Labels(), - // Annotations: alertingRule.Annotations(), - Name: alertingRule.Name(), - Id: alertingRule.Id, - } - if len(alertingRule.ActiveAlerts()) == 0 { - alertingRuleResponseObject.State = rules.StateInactive.String() - } else { - alertingRuleResponseObject.State = (*(alertingRule.ActiveAlerts()[0])).State.String() - } - - alerts = append( - alerts, - alertingRuleResponseObject, - ) - } - - res := &model.AlertDiscovery{Alerts: alerts} - - return res, nil -} - -func (r *ClickHouseReader) LoadRule(rule model.RuleResponseItem) *model.ApiError { - - groupName := fmt.Sprintf("%d-groupname", rule.Id) - - err := r.ruleManager.AddGroup(time.Duration(r.promConfig.GlobalConfig.EvaluationInterval), rule.Data, groupName) - - if err != nil { - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - return nil +func (r *ClickHouseReader) GetConn() clickhouse.Conn { + return r.db } func (r *ClickHouseReader) LoadChannel(channel *model.ChannelItem) *model.ApiError { @@ -942,138 +586,6 @@ func (r *ClickHouseReader) CreateChannel(receiver *am.Receiver) (*am.Receiver, * } -func (r *ClickHouseReader) CreateRule(rule string) *model.ApiError { - - tx, err := r.localDB.Begin() - if err != nil { - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - var lastInsertId int64 - - { - stmt, err := tx.Prepare(`INSERT into rules (updated_at, data) VALUES($1,$2);`) - if err != nil { - zap.S().Errorf("Error in preparing statement for INSERT to rules\n", err) - tx.Rollback() - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - defer stmt.Close() - - result, err := stmt.Exec(time.Now(), rule) - if err != nil { - zap.S().Errorf("Error in Executing prepared statement for INSERT to rules\n", err) - tx.Rollback() // return an error too, we may want to wrap them - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - lastInsertId, _ = result.LastInsertId() - - groupName := fmt.Sprintf("%d-groupname", lastInsertId) - - err = r.ruleManager.AddGroup(time.Duration(r.promConfig.GlobalConfig.EvaluationInterval), rule, groupName) - - if err != nil { - tx.Rollback() - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - } - err = tx.Commit() - if err != nil { - zap.S().Errorf("Error in committing transaction for INSERT to rules\n", err) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - return nil -} - -func (r *ClickHouseReader) EditRule(rule string, id string) *model.ApiError { - - idInt, _ := strconv.Atoi(id) - - tx, err := r.localDB.Begin() - if err != nil { - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - { - stmt, err := tx.Prepare(`UPDATE rules SET updated_at=$1, data=$2 WHERE id=$3;`) - if err != nil { - zap.S().Errorf("Error in preparing statement for UPDATE to rules\n", err) - tx.Rollback() - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - defer stmt.Close() - - if _, err := stmt.Exec(time.Now(), rule, idInt); err != nil { - zap.S().Errorf("Error in Executing prepared statement for UPDATE to rules\n", err) - tx.Rollback() // return an error too, we may want to wrap them - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - groupName := fmt.Sprintf("%d-groupname", idInt) - - err = r.ruleManager.EditGroup(time.Duration(r.promConfig.GlobalConfig.EvaluationInterval), rule, groupName) - - if err != nil { - tx.Rollback() - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - } - - err = tx.Commit() - if err != nil { - zap.S().Errorf("Error in committing transaction for UPDATE to rules\n", err) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - return nil -} - -func (r *ClickHouseReader) DeleteRule(id string) *model.ApiError { - - idInt, _ := strconv.Atoi(id) - - tx, err := r.localDB.Begin() - if err != nil { - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - { - stmt, err := tx.Prepare(`DELETE FROM rules WHERE id=$1;`) - - if err != nil { - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - defer stmt.Close() - - if _, err := stmt.Exec(idInt); err != nil { - zap.S().Errorf("Error in Executing prepared statement for DELETE to rules\n", err) - tx.Rollback() // return an error too, we may want to wrap them - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - groupName := fmt.Sprintf("%d-groupname", idInt) - - rule := "" // dummy rule to pass to function - // err = r.ruleManager.UpdateGroupWithAction(time.Duration(r.promConfig.GlobalConfig.EvaluationInterval), rule, groupName, "delete") - err = r.ruleManager.DeleteGroup(time.Duration(r.promConfig.GlobalConfig.EvaluationInterval), rule, groupName) - - if err != nil { - tx.Rollback() - zap.S().Errorf("Error in deleting rule from rulemanager...\n", err) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - } - - err = tx.Commit() - if err != nil { - zap.S().Errorf("Error in committing transaction for deleting rules\n", err) - return &model.ApiError{Typ: model.ErrorInternal, Err: err} - } - - return nil -} - func (r *ClickHouseReader) GetInstantQueryMetricsResult(ctx context.Context, queryParams *model.InstantQueryMetricsParams) (*promql.Result, *stats.QueryStats, *model.ApiError) { qry, err := r.queryEngine.NewInstantQuery(r.remoteStorage, queryParams.Query, queryParams.Time) if err != nil { @@ -1222,8 +734,8 @@ func (r *ClickHouseReader) GetServices(ctx context.Context, queryParams *model.G serviceItems[i].Num4XX = val } serviceItems[i].CallRate = float64(serviceItems[i].NumCalls) / float64(queryParams.Period) - serviceItems[i].FourXXRate = float64(serviceItems[i].Num4XX) / float64(queryParams.Period) - serviceItems[i].ErrorRate = float64(serviceItems[i].NumErrors) / float64(queryParams.Period) + serviceItems[i].FourXXRate = float64(serviceItems[i].Num4XX) * 100 / float64(serviceItems[i].NumCalls) + serviceItems[i].ErrorRate = float64(serviceItems[i].NumErrors) * 100 / float64(serviceItems[i].NumCalls) } return &serviceItems, nil @@ -1348,6 +860,12 @@ func (r *ClickHouseReader) GetSpanFilters(ctx context.Context, queryParams *mode if len(queryParams.Operation) > 0 { args = buildFilterArrayQuery(ctx, excludeMap, queryParams.Operation, constants.OperationDB, &query, args) } + if len(queryParams.RPCMethod) > 0 { + args = buildFilterArrayQuery(ctx, excludeMap, queryParams.RPCMethod, constants.RPCMethod, &query, args) + } + if len(queryParams.ResponseStatusCode) > 0 { + args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ResponseStatusCode, constants.ResponseStatusCode, &query, args) + } if len(queryParams.MinDuration) != 0 { query = query + " AND durationNano >= @durationNanoMin" @@ -1361,16 +879,18 @@ func (r *ClickHouseReader) GetSpanFilters(ctx context.Context, queryParams *mode query = getStatusFilters(query, queryParams.Status, excludeMap) traceFilterReponse := model.SpanFiltersResponse{ - Status: map[string]uint64{}, - Duration: map[string]uint64{}, - ServiceName: map[string]uint64{}, - Operation: map[string]uint64{}, - HttpCode: map[string]uint64{}, - HttpMethod: map[string]uint64{}, - HttpUrl: map[string]uint64{}, - HttpRoute: map[string]uint64{}, - HttpHost: map[string]uint64{}, - Component: map[string]uint64{}, + Status: map[string]uint64{}, + Duration: map[string]uint64{}, + ServiceName: map[string]uint64{}, + Operation: map[string]uint64{}, + ResponseStatusCode: map[string]uint64{}, + RPCMethod: map[string]uint64{}, + HttpCode: map[string]uint64{}, + HttpMethod: map[string]uint64{}, + HttpUrl: map[string]uint64{}, + HttpRoute: map[string]uint64{}, + HttpHost: map[string]uint64{}, + Component: map[string]uint64{}, } for _, e := range queryParams.GetFilters { @@ -1571,6 +1091,42 @@ func (r *ClickHouseReader) GetSpanFilters(ctx context.Context, queryParams *mode if len(dBResponse2) > 0 { traceFilterReponse.Duration["maxDuration"] = dBResponse2[0].NumTotal } + case constants.RPCMethod: + finalQuery := fmt.Sprintf("SELECT rpcMethod, count() as count FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.traceDB, r.indexTable) + finalQuery += query + finalQuery += " GROUP BY rpcMethod" + var dBResponse []model.DBResponseRPCMethod + err := r.db.Select(ctx, &dBResponse, finalQuery, args...) + zap.S().Info(finalQuery) + + if err != nil { + zap.S().Debug("Error in processing sql query: ", err) + return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)} + } + for _, service := range dBResponse { + if service.RPCMethod != "" { + traceFilterReponse.RPCMethod[service.RPCMethod] = service.Count + } + } + + case constants.ResponseStatusCode: + finalQuery := fmt.Sprintf("SELECT responseStatusCode, count() as count FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.traceDB, r.indexTable) + finalQuery += query + finalQuery += " GROUP BY responseStatusCode" + var dBResponse []model.DBResponseStatusCodeMethod + err := r.db.Select(ctx, &dBResponse, finalQuery, args...) + zap.S().Info(finalQuery) + + if err != nil { + zap.S().Debug("Error in processing sql query: ", err) + return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)} + } + for _, service := range dBResponse { + if service.ResponseStatusCode != "" { + traceFilterReponse.ResponseStatusCode[service.ResponseStatusCode] = service.Count + } + } + default: return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("filter type: %s not supported", e)} } @@ -1639,6 +1195,14 @@ func (r *ClickHouseReader) GetFilteredSpans(ctx context.Context, queryParams *mo if len(queryParams.Operation) > 0 { args = buildFilterArrayQuery(ctx, excludeMap, queryParams.Operation, constants.OperationDB, &query, args) } + if len(queryParams.RPCMethod) > 0 { + args = buildFilterArrayQuery(ctx, excludeMap, queryParams.RPCMethod, constants.RPCMethod, &query, args) + } + + if len(queryParams.ResponseStatusCode) > 0 { + args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ResponseStatusCode, constants.ResponseStatusCode, &query, args) + } + if len(queryParams.MinDuration) != 0 { query = query + " AND durationNano >= @durationNanoMin" args = append(args, clickhouse.Named("durationNanoMin", queryParams.MinDuration)) @@ -1698,17 +1262,17 @@ func (r *ClickHouseReader) GetFilteredSpans(ctx context.Context, queryParams *mo var getFilterSpansResponseItems []model.GetFilterSpansResponseItem - baseQuery := fmt.Sprintf("SELECT timestamp, spanID, traceID, serviceName, name, durationNano, httpCode, gRPCCode, gRPCMethod, httpMethod FROM %s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryTable) + baseQuery := fmt.Sprintf("SELECT timestamp, spanID, traceID, serviceName, name, durationNano, httpCode, gRPCCode, gRPCMethod, httpMethod, rpcMethod, responseStatusCode FROM %s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryTable) baseQuery += query err := r.db.Select(ctx, &getFilterSpansResponseItems, baseQuery, args...) // Fill status and method for i, e := range getFilterSpansResponseItems { - if e.HttpCode == "" { + if e.GRPCode != "" { getFilterSpansResponseItems[i].StatusCode = e.GRPCode } else { getFilterSpansResponseItems[i].StatusCode = e.HttpCode } - if e.HttpMethod == "" { + if e.GRPMethod != "" { getFilterSpansResponseItems[i].Method = e.GRPMethod } else { getFilterSpansResponseItems[i].Method = e.HttpMethod @@ -1821,6 +1385,12 @@ func (r *ClickHouseReader) GetTagFilters(ctx context.Context, queryParams *model if len(queryParams.Operation) > 0 { args = buildFilterArrayQuery(ctx, excludeMap, queryParams.Operation, constants.OperationDB, &query, args) } + if len(queryParams.RPCMethod) > 0 { + args = buildFilterArrayQuery(ctx, excludeMap, queryParams.RPCMethod, constants.RPCMethod, &query, args) + } + if len(queryParams.ResponseStatusCode) > 0 { + args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ResponseStatusCode, constants.ResponseStatusCode, &query, args) + } if len(queryParams.MinDuration) != 0 { query = query + " AND durationNano >= @durationNanoMin" args = append(args, clickhouse.Named("durationNanoMin", queryParams.MinDuration)) @@ -2158,6 +1728,11 @@ func (r *ClickHouseReader) GetFilteredSpansAggregates(ctx context.Context, query query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %d minute) as time, dbSystem as groupBy, %s FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryParams.StepSeconds/60, aggregation_query, r.traceDB, r.indexTable) case constants.Component: query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %d minute) as time, component as groupBy, %s FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryParams.StepSeconds/60, aggregation_query, r.traceDB, r.indexTable) + case constants.RPCMethod: + query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %d minute) as time, rpcMethod as groupBy, %s FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryParams.StepSeconds/60, aggregation_query, r.traceDB, r.indexTable) + case constants.ResponseStatusCode: + query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %d minute) as time, responseStatusCode as groupBy, %s FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryParams.StepSeconds/60, aggregation_query, r.traceDB, r.indexTable) + default: return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("groupBy type: %s not supported", queryParams.GroupBy)} } @@ -2189,6 +1764,12 @@ func (r *ClickHouseReader) GetFilteredSpansAggregates(ctx context.Context, query if len(queryParams.Operation) > 0 { args = buildFilterArrayQuery(ctx, excludeMap, queryParams.Operation, constants.OperationDB, &query, args) } + if len(queryParams.RPCMethod) > 0 { + args = buildFilterArrayQuery(ctx, excludeMap, queryParams.RPCMethod, constants.RPCMethod, &query, args) + } + if len(queryParams.ResponseStatusCode) > 0 { + args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ResponseStatusCode, constants.ResponseStatusCode, &query, args) + } if len(queryParams.MinDuration) != 0 { query = query + " AND durationNano >= @durationNanoMin" args = append(args, clickhouse.Named("durationNanoMin", queryParams.MinDuration)) @@ -2237,6 +1818,11 @@ func (r *ClickHouseReader) GetFilteredSpansAggregates(ctx context.Context, query query = query + " GROUP BY time, dbSystem as groupBy ORDER BY time" case constants.Component: query = query + " GROUP BY time, component as groupBy ORDER BY time" + case constants.RPCMethod: + query = query + " GROUP BY time, rpcMethod as groupBy ORDER BY time" + case constants.ResponseStatusCode: + query = query + " GROUP BY time, responseStatusCode as groupBy ORDER BY time" + default: return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("groupBy type: %s not supported", queryParams.GroupBy)} } @@ -2633,15 +2219,30 @@ func (r *ClickHouseReader) GetTTL(ctx context.Context, ttlParams *model.GetTTLPa } -func (r *ClickHouseReader) GetErrors(ctx context.Context, queryParams *model.GetErrorsParams) (*[]model.Error, *model.ApiError) { +func (r *ClickHouseReader) ListErrors(ctx context.Context, queryParams *model.ListErrorsParams) (*[]model.Error, *model.ApiError) { - var getErrorReponses []model.Error + var getErrorResponses []model.Error - query := fmt.Sprintf("SELECT exceptionType, exceptionMessage, count() AS exceptionCount, min(timestamp) as firstSeen, max(timestamp) as lastSeen, serviceName FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU GROUP BY serviceName, exceptionType, exceptionMessage", r.traceDB, r.errorTable) + query := fmt.Sprintf("SELECT any(exceptionType) as exceptionType, any(exceptionMessage) as exceptionMessage, count() AS exceptionCount, min(timestamp) as firstSeen, max(timestamp) as lastSeen, any(serviceName) as serviceName, groupID FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU GROUP BY groupID", r.traceDB, r.errorTable) args := []interface{}{clickhouse.Named("timestampL", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), clickhouse.Named("timestampU", strconv.FormatInt(queryParams.End.UnixNano(), 10))} + if len(queryParams.OrderParam) != 0 { + if queryParams.Order == constants.Descending { + query = query + " ORDER BY " + queryParams.OrderParam + " DESC" + } else if queryParams.Order == constants.Ascending { + query = query + " ORDER BY " + queryParams.OrderParam + " ASC" + } + } + if queryParams.Limit > 0 { + query = query + " LIMIT @limit" + args = append(args, clickhouse.Named("limit", queryParams.Limit)) + } - err := r.db.Select(ctx, &getErrorReponses, query, args...) + if queryParams.Offset > 0 { + query = query + " OFFSET @offset" + args = append(args, clickhouse.Named("offset", queryParams.Offset)) + } + err := r.db.Select(ctx, &getErrorResponses, query, args...) zap.S().Info(query) if err != nil { @@ -2649,30 +2250,41 @@ func (r *ClickHouseReader) GetErrors(ctx context.Context, queryParams *model.Get return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} } - return &getErrorReponses, nil - + return &getErrorResponses, nil } -func (r *ClickHouseReader) GetErrorForId(ctx context.Context, queryParams *model.GetErrorParams) (*model.ErrorWithSpan, *model.ApiError) { +func (r *ClickHouseReader) CountErrors(ctx context.Context, queryParams *model.CountErrorsParams) (uint64, *model.ApiError) { + + var errorCount uint64 + + query := fmt.Sprintf("SELECT count(distinct(groupID)) FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.traceDB, r.errorTable) + args := []interface{}{clickhouse.Named("timestampL", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), clickhouse.Named("timestampU", strconv.FormatInt(queryParams.End.UnixNano(), 10))} + + err := r.db.QueryRow(ctx, query, args...).Scan(&errorCount) + zap.S().Info(query) + + if err != nil { + zap.S().Debug("Error in processing sql query: ", err) + return 0, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} + } + + return errorCount, nil +} + +func (r *ClickHouseReader) GetErrorFromErrorID(ctx context.Context, queryParams *model.GetErrorParams) (*model.ErrorWithSpan, *model.ApiError) { if queryParams.ErrorID == "" { zap.S().Debug("errorId missing from params") - return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("ErrorID missing from params")} + return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("ErrorID missing from params")} } var getErrorWithSpanReponse []model.ErrorWithSpan - // TODO: Optimize this query further - query := fmt.Sprintf("SELECT spanID, traceID, errorID, timestamp, serviceName, exceptionType, exceptionMessage, exceptionStacktrace, exceptionEscaped, olderErrorId, newerErrorId FROM (SELECT *, lagInFrame(toNullable(errorID)) over w as olderErrorId, leadInFrame(toNullable(errorID)) over w as newerErrorId FROM %s.%s window w as (ORDER BY exceptionType, serviceName, timestamp rows between unbounded preceding and unbounded following)) WHERE errorID = @errorID", r.traceDB, r.errorTable) - args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID)} + query := fmt.Sprintf("SELECT * FROM %s.%s WHERE timestamp = @timestamp AND groupID = @groupID AND errorID = @errorID LIMIT 1", r.traceDB, r.errorTable) + args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))} err := r.db.Select(ctx, &getErrorWithSpanReponse, query, args...) - zap.S().Info(query) - if err == sql.ErrNoRows { - return nil, nil - } - if err != nil { zap.S().Debug("Error in processing sql query: ", err) return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} @@ -2681,22 +2293,17 @@ func (r *ClickHouseReader) GetErrorForId(ctx context.Context, queryParams *model if len(getErrorWithSpanReponse) > 0 { return &getErrorWithSpanReponse[0], nil } else { - return &model.ErrorWithSpan{}, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("Error ID not found")} + return nil, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("Error/Exception not found")} } } -func (r *ClickHouseReader) GetErrorForType(ctx context.Context, queryParams *model.GetErrorParams) (*model.ErrorWithSpan, *model.ApiError) { +func (r *ClickHouseReader) GetErrorFromGroupID(ctx context.Context, queryParams *model.GetErrorParams) (*model.ErrorWithSpan, *model.ApiError) { - if queryParams.ErrorType == "" || queryParams.ServiceName == "" { - zap.S().Debug("errorType/serviceName missing from params") - return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("ErrorType/serviceName missing from params")} - } var getErrorWithSpanReponse []model.ErrorWithSpan - // TODO: Optimize this query further - query := fmt.Sprintf("SELECT spanID, traceID, errorID, timestamp , serviceName, exceptionType, exceptionMessage, exceptionStacktrace, exceptionEscaped, newerErrorId, olderErrorId FROM (SELECT *, lagInFrame(errorID) over w as olderErrorId, leadInFrame(errorID) over w as newerErrorId FROM %s.%s WHERE serviceName = @serviceName AND exceptionType = @errorType window w as (ORDER BY timestamp DESC rows between unbounded preceding and unbounded following))", r.traceDB, r.errorTable) - args := []interface{}{clickhouse.Named("serviceName", queryParams.ServiceName), clickhouse.Named("errorType", queryParams.ErrorType)} + query := fmt.Sprintf("SELECT * FROM %s.%s WHERE timestamp = @timestamp AND groupID = @groupID LIMIT 1", r.traceDB, r.errorTable) + args := []interface{}{clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))} err := r.db.Select(ctx, &getErrorWithSpanReponse, query, args...) @@ -2710,11 +2317,173 @@ func (r *ClickHouseReader) GetErrorForType(ctx context.Context, queryParams *mod if len(getErrorWithSpanReponse) > 0 { return &getErrorWithSpanReponse[0], nil } else { - return nil, &model.ApiError{Typ: model.ErrorUnavailable, Err: fmt.Errorf("Error/Exception not found")} + return nil, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("Error/Exception not found")} } } +func (r *ClickHouseReader) GetNextPrevErrorIDs(ctx context.Context, queryParams *model.GetErrorParams) (*model.NextPrevErrorIDs, *model.ApiError) { + + if queryParams.ErrorID == "" { + zap.S().Debug("errorId missing from params") + return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("ErrorID missing from params")} + } + var err *model.ApiError + getNextPrevErrorIDsResponse := model.NextPrevErrorIDs{ + GroupID: queryParams.GroupID, + } + getNextPrevErrorIDsResponse.NextErrorID, getNextPrevErrorIDsResponse.NextTimestamp, err = r.getNextErrorID(ctx, queryParams) + if err != nil { + zap.S().Debug("Unable to get next error ID due to err: ", err) + return nil, err + } + getNextPrevErrorIDsResponse.PrevErrorID, getNextPrevErrorIDsResponse.PrevTimestamp, err = r.getPrevErrorID(ctx, queryParams) + if err != nil { + zap.S().Debug("Unable to get prev error ID due to err: ", err) + return nil, err + } + return &getNextPrevErrorIDsResponse, nil + +} + +func (r *ClickHouseReader) getNextErrorID(ctx context.Context, queryParams *model.GetErrorParams) (string, time.Time, *model.ApiError) { + + var getNextErrorIDReponse []model.NextPrevErrorIDsDBResponse + + query := fmt.Sprintf("SELECT errorID as nextErrorID, timestamp as nextTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp >= @timestamp AND errorID != @errorID ORDER BY timestamp ASC LIMIT 2", r.traceDB, r.errorTable) + args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))} + + err := r.db.Select(ctx, &getNextErrorIDReponse, query, args...) + + zap.S().Info(query) + + if err != nil { + zap.S().Debug("Error in processing sql query: ", err) + return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} + } + if len(getNextErrorIDReponse) == 0 { + zap.S().Info("NextErrorID not found") + return "", time.Time{}, nil + } else if len(getNextErrorIDReponse) == 1 { + zap.S().Info("NextErrorID found") + return getNextErrorIDReponse[0].NextErrorID, getNextErrorIDReponse[0].NextTimestamp, nil + } else { + if getNextErrorIDReponse[0].Timestamp.UnixNano() == getNextErrorIDReponse[1].Timestamp.UnixNano() { + var getNextErrorIDReponse []model.NextPrevErrorIDsDBResponse + + query := fmt.Sprintf("SELECT errorID as nextErrorID, timestamp as nextTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp = @timestamp AND errorID > @errorID ORDER BY errorID ASC LIMIT 1", r.traceDB, r.errorTable) + args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))} + + err := r.db.Select(ctx, &getNextErrorIDReponse, query, args...) + + zap.S().Info(query) + + if err != nil { + zap.S().Debug("Error in processing sql query: ", err) + return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} + } + if len(getNextErrorIDReponse) == 0 { + var getNextErrorIDReponse []model.NextPrevErrorIDsDBResponse + + query := fmt.Sprintf("SELECT errorID as nextErrorID, timestamp as nextTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp > @timestamp ORDER BY timestamp ASC LIMIT 1", r.traceDB, r.errorTable) + args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))} + + err := r.db.Select(ctx, &getNextErrorIDReponse, query, args...) + + zap.S().Info(query) + + if err != nil { + zap.S().Debug("Error in processing sql query: ", err) + return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} + } + + if len(getNextErrorIDReponse) == 0 { + zap.S().Info("NextErrorID not found") + return "", time.Time{}, nil + } else { + zap.S().Info("NextErrorID found") + return getNextErrorIDReponse[0].NextErrorID, getNextErrorIDReponse[0].NextTimestamp, nil + } + } else { + zap.S().Info("NextErrorID found") + return getNextErrorIDReponse[0].NextErrorID, getNextErrorIDReponse[0].NextTimestamp, nil + } + } else { + zap.S().Info("NextErrorID found") + return getNextErrorIDReponse[0].NextErrorID, getNextErrorIDReponse[0].NextTimestamp, nil + } + } +} + +func (r *ClickHouseReader) getPrevErrorID(ctx context.Context, queryParams *model.GetErrorParams) (string, time.Time, *model.ApiError) { + + var getPrevErrorIDReponse []model.NextPrevErrorIDsDBResponse + + query := fmt.Sprintf("SELECT errorID as prevErrorID, timestamp as prevTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp <= @timestamp AND errorID != @errorID ORDER BY timestamp DESC LIMIT 2", r.traceDB, r.errorTable) + args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))} + + err := r.db.Select(ctx, &getPrevErrorIDReponse, query, args...) + + zap.S().Info(query) + + if err != nil { + zap.S().Debug("Error in processing sql query: ", err) + return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} + } + if len(getPrevErrorIDReponse) == 0 { + zap.S().Info("PrevErrorID not found") + return "", time.Time{}, nil + } else if len(getPrevErrorIDReponse) == 1 { + zap.S().Info("PrevErrorID found") + return getPrevErrorIDReponse[0].PrevErrorID, getPrevErrorIDReponse[0].PrevTimestamp, nil + } else { + if getPrevErrorIDReponse[0].Timestamp.UnixNano() == getPrevErrorIDReponse[1].Timestamp.UnixNano() { + var getPrevErrorIDReponse []model.NextPrevErrorIDsDBResponse + + query := fmt.Sprintf("SELECT errorID as prevErrorID, timestamp as prevTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp = @timestamp AND errorID < @errorID ORDER BY errorID DESC LIMIT 1", r.traceDB, r.errorTable) + args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))} + + err := r.db.Select(ctx, &getPrevErrorIDReponse, query, args...) + + zap.S().Info(query) + + if err != nil { + zap.S().Debug("Error in processing sql query: ", err) + return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} + } + if len(getPrevErrorIDReponse) == 0 { + var getPrevErrorIDReponse []model.NextPrevErrorIDsDBResponse + + query := fmt.Sprintf("SELECT errorID as prevErrorID, timestamp as prevTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp < @timestamp ORDER BY timestamp DESC LIMIT 1", r.traceDB, r.errorTable) + args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))} + + err := r.db.Select(ctx, &getPrevErrorIDReponse, query, args...) + + zap.S().Info(query) + + if err != nil { + zap.S().Debug("Error in processing sql query: ", err) + return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("Error in processing sql query")} + } + + if len(getPrevErrorIDReponse) == 0 { + zap.S().Info("PrevErrorID not found") + return "", time.Time{}, nil + } else { + zap.S().Info("PrevErrorID found") + return getPrevErrorIDReponse[0].PrevErrorID, getPrevErrorIDReponse[0].PrevTimestamp, nil + } + } else { + zap.S().Info("PrevErrorID found") + return getPrevErrorIDReponse[0].PrevErrorID, getPrevErrorIDReponse[0].PrevTimestamp, nil + } + } else { + zap.S().Info("PrevErrorID found") + return getPrevErrorIDReponse[0].PrevErrorID, getPrevErrorIDReponse[0].PrevTimestamp, nil + } + } +} + func (r *ClickHouseReader) GetMetricAutocompleteTagKey(ctx context.Context, params *model.MetricAutocompleteTagParams) (*[]string, *model.ApiError) { var query string @@ -2829,6 +2598,10 @@ func (r *ClickHouseReader) GetMetricAutocompleteMetricNames(ctx context.Context, // GetMetricResult runs the query and returns list of time series func (r *ClickHouseReader) GetMetricResult(ctx context.Context, query string) ([]*model.Series, error) { + defer utils.Elapsed("GetMetricResult")() + + zap.S().Infof("Executing metric result query: %s", query) + rows, err := r.db.Query(ctx, query) if err != nil { diff --git a/pkg/query-service/app/http_handler.go b/pkg/query-service/app/http_handler.go index 51ff99a8a9..5d7b8cce5c 100644 --- a/pkg/query-service/app/http_handler.go +++ b/pkg/query-service/app/http_handler.go @@ -20,10 +20,12 @@ import ( "go.signoz.io/query-service/app/parser" "go.signoz.io/query-service/auth" "go.signoz.io/query-service/constants" + "go.signoz.io/query-service/dao" am "go.signoz.io/query-service/integrations/alertManager" "go.signoz.io/query-service/interfaces" "go.signoz.io/query-service/model" + "go.signoz.io/query-service/rules" "go.signoz.io/query-service/telemetry" "go.signoz.io/query-service/version" "go.uber.org/zap" @@ -50,17 +52,22 @@ type APIHandler struct { reader *interfaces.Reader relationalDB dao.ModelDao alertManager am.Manager + ruleManager *rules.Manager ready func(http.HandlerFunc) http.HandlerFunc } // NewAPIHandler returns an APIHandler -func NewAPIHandler(reader *interfaces.Reader, relationalDB dao.ModelDao) (*APIHandler, error) { +func NewAPIHandler(reader *interfaces.Reader, relationalDB dao.ModelDao, ruleManager *rules.Manager) (*APIHandler, error) { - alertManager := am.New("") + alertManager, err := am.New("") + if err != nil { + return nil, err + } aH := &APIHandler{ reader: reader, relationalDB: relationalDB, alertManager: alertManager, + ruleManager: ruleManager, } aH.ready = aH.testReady @@ -180,10 +187,10 @@ func writeHttpResponse(w http.ResponseWriter, data interface{}) { } func (aH *APIHandler) RegisterMetricsRoutes(router *mux.Router) { subRouter := router.PathPrefix("/api/v2/metrics").Subrouter() - subRouter.HandleFunc("/query_range", aH.queryRangeMetricsV2).Methods(http.MethodPost) - subRouter.HandleFunc("/autocomplete/list", aH.metricAutocompleteMetricName).Methods(http.MethodGet) - subRouter.HandleFunc("/autocomplete/tagKey", aH.metricAutocompleteTagKey).Methods(http.MethodGet) - subRouter.HandleFunc("/autocomplete/tagValue", aH.metricAutocompleteTagValue).Methods(http.MethodGet) + subRouter.HandleFunc("/query_range", ViewAccess(aH.queryRangeMetricsV2)).Methods(http.MethodPost) + subRouter.HandleFunc("/autocomplete/list", ViewAccess(aH.metricAutocompleteMetricName)).Methods(http.MethodGet) + subRouter.HandleFunc("/autocomplete/tagKey", ViewAccess(aH.metricAutocompleteTagKey)).Methods(http.MethodGet) + subRouter.HandleFunc("/autocomplete/tagValue", ViewAccess(aH.metricAutocompleteTagValue)).Methods(http.MethodGet) } func (aH *APIHandler) respond(w http.ResponseWriter, data interface{}) { @@ -297,7 +304,7 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router) { router.HandleFunc("/api/v1/channels/{id}", AdminAccess(aH.deleteChannel)).Methods(http.MethodDelete) router.HandleFunc("/api/v1/channels", EditAccess(aH.createChannel)).Methods(http.MethodPost) router.HandleFunc("/api/v1/testChannel", EditAccess(aH.testChannel)).Methods(http.MethodPost) - router.HandleFunc("/api/v1/rules", ViewAccess(aH.listRulesFromProm)).Methods(http.MethodGet) + router.HandleFunc("/api/v1/rules", ViewAccess(aH.listRules)).Methods(http.MethodGet) router.HandleFunc("/api/v1/rules/{id}", ViewAccess(aH.getRule)).Methods(http.MethodGet) router.HandleFunc("/api/v1/rules", EditAccess(aH.createRule)).Methods(http.MethodPost) router.HandleFunc("/api/v1/rules/{id}", EditAccess(aH.editRule)).Methods(http.MethodPut) @@ -327,11 +334,13 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router) { router.HandleFunc("/api/v1/getTagFilters", ViewAccess(aH.getTagFilters)).Methods(http.MethodPost) router.HandleFunc("/api/v1/getFilteredSpans", ViewAccess(aH.getFilteredSpans)).Methods(http.MethodPost) router.HandleFunc("/api/v1/getFilteredSpans/aggregates", ViewAccess(aH.getFilteredSpanAggregates)).Methods(http.MethodPost) - router.HandleFunc("/api/v1/getTagValues", ViewAccess(aH.getTagValues)).Methods(http.MethodPost) - router.HandleFunc("/api/v1/errors", ViewAccess(aH.getErrors)).Methods(http.MethodGet) - router.HandleFunc("/api/v1/errorWithId", ViewAccess(aH.getErrorForId)).Methods(http.MethodGet) - router.HandleFunc("/api/v1/errorWithType", ViewAccess(aH.getErrorForType)).Methods(http.MethodGet) + + router.HandleFunc("/api/v1/listErrors", ViewAccess(aH.listErrors)).Methods(http.MethodGet) + router.HandleFunc("/api/v1/countErrors", ViewAccess(aH.countErrors)).Methods(http.MethodGet) + router.HandleFunc("/api/v1/errorFromErrorID", ViewAccess(aH.getErrorFromErrorID)).Methods(http.MethodGet) + router.HandleFunc("/api/v1/errorFromGroupID", ViewAccess(aH.getErrorFromGroupID)).Methods(http.MethodGet) + router.HandleFunc("/api/v1/nextPrevErrorIDs", ViewAccess(aH.getNextPrevErrorIDs)).Methods(http.MethodGet) router.HandleFunc("/api/v1/disks", ViewAccess(aH.getDisks)).Methods(http.MethodGet) @@ -379,12 +388,12 @@ func Intersection(a, b []int) (c []int) { func (aH *APIHandler) getRule(w http.ResponseWriter, r *http.Request) { id := mux.Vars(r)["id"] - alertList, apiErrorObj := (*aH.reader).GetRule(id) - if apiErrorObj != nil { - respondError(w, apiErrorObj, nil) + ruleResponse, err := aH.ruleManager.GetRule(id) + if err != nil { + respondError(w, &model.ApiError{Typ: model.ErrorInternal, Err: err}, nil) return } - aH.respond(w, alertList) + aH.respond(w, ruleResponse) } func (aH *APIHandler) metricAutocompleteMetricName(w http.ResponseWriter, r *http.Request) { @@ -615,13 +624,17 @@ func (aH *APIHandler) queryRangeMetricsV2(w http.ResponseWriter, r *http.Request aH.respond(w, resp) } -func (aH *APIHandler) listRulesFromProm(w http.ResponseWriter, r *http.Request) { - alertList, apiErrorObj := (*aH.reader).ListRulesFromProm() - if apiErrorObj != nil { - respondError(w, apiErrorObj, nil) +func (aH *APIHandler) listRules(w http.ResponseWriter, r *http.Request) { + + rules, err := aH.ruleManager.ListRuleStates() + if err != nil { + respondError(w, &model.ApiError{Typ: model.ErrorInternal, Err: err}, nil) return } - aH.respond(w, alertList) + + // todo(amol): need to add sorter + + aH.respond(w, rules) } func (aH *APIHandler) getDashboards(w http.ResponseWriter, r *http.Request) { @@ -757,32 +770,35 @@ func (aH *APIHandler) createDashboards(w http.ResponseWriter, r *http.Request) { } func (aH *APIHandler) deleteRule(w http.ResponseWriter, r *http.Request) { + id := mux.Vars(r)["id"] - apiErrorObj := (*aH.reader).DeleteRule(id) + err := aH.ruleManager.DeleteRule(id) - if apiErrorObj != nil { - respondError(w, apiErrorObj, nil) + if err != nil { + respondError(w, &model.ApiError{Typ: model.ErrorInternal, Err: err}, nil) return } aH.respond(w, "rule successfully deleted") } + func (aH *APIHandler) editRule(w http.ResponseWriter, r *http.Request) { id := mux.Vars(r)["id"] - var postData map[string]string - err := json.NewDecoder(r.Body).Decode(&postData) + defer r.Body.Close() + body, err := ioutil.ReadAll(r.Body) if err != nil { - respondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, "Error reading request body") + zap.S().Errorf("msg: error in getting req body of edit rule API\n", "\t error:", err) + respondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil) return } - apiErrorObj := (*aH.reader).EditRule(postData["data"], id) + err = aH.ruleManager.EditRule(string(body), id) - if apiErrorObj != nil { - respondError(w, apiErrorObj, nil) + if err != nil { + respondError(w, &model.ApiError{Typ: model.ErrorInternal, Err: err}, nil) return } @@ -906,20 +922,17 @@ func (aH *APIHandler) createChannel(w http.ResponseWriter, r *http.Request) { func (aH *APIHandler) createRule(w http.ResponseWriter, r *http.Request) { - decoder := json.NewDecoder(r.Body) - - var postData map[string]string - err := decoder.Decode(&postData) - + defer r.Body.Close() + body, err := ioutil.ReadAll(r.Body) if err != nil { + zap.S().Errorf("Error in getting req body for create rule API\n", err) respondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil) return } - apiErrorObj := (*aH.reader).CreateRule(postData["data"]) - - if apiErrorObj != nil { - respondError(w, apiErrorObj, nil) + err = aH.ruleManager.CreateRule(string(body)) + if err != nil { + respondError(w, &model.ApiError{Typ: model.ErrorBadData, Err: err}, nil) return } @@ -1177,49 +1190,78 @@ func (aH *APIHandler) searchTraces(w http.ResponseWriter, r *http.Request) { } -func (aH *APIHandler) getErrors(w http.ResponseWriter, r *http.Request) { +func (aH *APIHandler) listErrors(w http.ResponseWriter, r *http.Request) { - query, err := parseErrorsRequest(r) + query, err := parseListErrorsRequest(r) if aH.handleError(w, err, http.StatusBadRequest) { return } - result, apiErr := (*aH.reader).GetErrors(r.Context(), query) + result, apiErr := (*aH.reader).ListErrors(r.Context(), query) if apiErr != nil && aH.handleError(w, apiErr.Err, http.StatusInternalServerError) { return } aH.writeJSON(w, r, result) - } -func (aH *APIHandler) getErrorForId(w http.ResponseWriter, r *http.Request) { +func (aH *APIHandler) countErrors(w http.ResponseWriter, r *http.Request) { - query, err := parseErrorRequest(r) + query, err := parseCountErrorsRequest(r) if aH.handleError(w, err, http.StatusBadRequest) { return } - result, apiErr := (*aH.reader).GetErrorForId(r.Context(), query) - if apiErr != nil && aH.handleError(w, apiErr.Err, http.StatusInternalServerError) { + result, apiErr := (*aH.reader).CountErrors(r.Context(), query) + if apiErr != nil { + respondError(w, apiErr, nil) return } aH.writeJSON(w, r, result) - } -func (aH *APIHandler) getErrorForType(w http.ResponseWriter, r *http.Request) { +func (aH *APIHandler) getErrorFromErrorID(w http.ResponseWriter, r *http.Request) { - query, err := parseErrorRequest(r) + query, err := parseGetErrorRequest(r) if aH.handleError(w, err, http.StatusBadRequest) { return } - result, apiErr := (*aH.reader).GetErrorForType(r.Context(), query) - if apiErr != nil && aH.handleError(w, apiErr.Err, http.StatusInternalServerError) { + result, apiErr := (*aH.reader).GetErrorFromErrorID(r.Context(), query) + if apiErr != nil { + respondError(w, apiErr, nil) return } aH.writeJSON(w, r, result) +} +func (aH *APIHandler) getNextPrevErrorIDs(w http.ResponseWriter, r *http.Request) { + + query, err := parseGetErrorRequest(r) + if aH.handleError(w, err, http.StatusBadRequest) { + return + } + result, apiErr := (*aH.reader).GetNextPrevErrorIDs(r.Context(), query) + if apiErr != nil { + respondError(w, apiErr, nil) + return + } + + aH.writeJSON(w, r, result) +} + +func (aH *APIHandler) getErrorFromGroupID(w http.ResponseWriter, r *http.Request) { + + query, err := parseGetErrorRequest(r) + if aH.handleError(w, err, http.StatusBadRequest) { + return + } + result, apiErr := (*aH.reader).GetErrorFromGroupID(r.Context(), query) + if apiErr != nil { + respondError(w, apiErr, nil) + return + } + + aH.writeJSON(w, r, result) } func (aH *APIHandler) getSpanFilters(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/query-service/app/metrics/query_builder.go b/pkg/query-service/app/metrics/query_builder.go index bf1896af2e..26f57261b9 100644 --- a/pkg/query-service/app/metrics/query_builder.go +++ b/pkg/query-service/app/metrics/query_builder.go @@ -91,10 +91,9 @@ func BuildMetricsTimeSeriesFilterQuery(fs *model.FilterSet, groupTags []string, if fs != nil && len(fs.Items) != 0 { for _, item := range fs.Items { toFormat := item.Value + op := strings.ToLower(strings.TrimSpace(item.Operator)) // if the received value is an array for like/match op, just take the first value - if strings.ToLower(item.Operation) == "like" || - strings.ToLower(item.Operation) == "match" || - strings.ToLower(item.Operation) == "nlike" { + if op == "like" || op == "match" || op == "nlike" || op == "nmatch" { x, ok := item.Value.([]interface{}) if ok { if len(x) == 0 { @@ -104,7 +103,7 @@ func BuildMetricsTimeSeriesFilterQuery(fs *model.FilterSet, groupTags []string, } } fmtVal := formattedValue(toFormat) - switch op := strings.ToLower(item.Operation); op { + switch op { case "eq": conditions = append(conditions, fmt.Sprintf("labels_object.%s = %s", item.Key, fmtVal)) case "neq": @@ -119,6 +118,8 @@ func BuildMetricsTimeSeriesFilterQuery(fs *model.FilterSet, groupTags []string, conditions = append(conditions, fmt.Sprintf("notLike(labels_object.%s, %s)", item.Key, fmtVal)) case "match": conditions = append(conditions, fmt.Sprintf("match(labels_object.%s, %s)", item.Key, fmtVal)) + case "nmatch": + conditions = append(conditions, fmt.Sprintf("not match(labels_object.%s, %s)", item.Key, fmtVal)) default: return "", fmt.Errorf("unsupported operation") } @@ -416,6 +417,5 @@ func PrepareBuilderMetricQueries(qp *model.QueryRangeParamsV2, tableName string) if len(errs) != 0 { return &RunQueries{Err: fmt.Errorf("errors with formulas: %s", FormatErrs(errs, "\n"))} } - fmt.Println(namedQueries) return &RunQueries{Queries: namedQueries} } diff --git a/pkg/query-service/app/metrics/query_builder_test.go b/pkg/query-service/app/metrics/query_builder_test.go index 4530a01a79..f6f9944605 100644 --- a/pkg/query-service/app/metrics/query_builder_test.go +++ b/pkg/query-service/app/metrics/query_builder_test.go @@ -42,8 +42,9 @@ func TestBuildQueryWithFilters(t *testing.T) { "a": { QueryName: "a", MetricName: "name", - TagFilters: &model.FilterSet{Operation: "AND", Items: []model.FilterItem{ - {Key: "a", Value: "b", Operation: "neq"}, + TagFilters: &model.FilterSet{Operator: "AND", Items: []model.FilterItem{ + {Key: "a", Value: "b", Operator: "neq"}, + {Key: "code", Value: "ERROR_*", Operator: "nmatch"}, }}, AggregateOperator: model.RATE_MAX, Expression: "a", @@ -56,6 +57,7 @@ func TestBuildQueryWithFilters(t *testing.T) { So(queries["a"], ShouldContainSubstring, "WHERE metric_name = 'name' AND labels_object.a != 'b'") So(queries["a"], ShouldContainSubstring, "runningDifference(value)/runningDifference(ts)") + So(queries["a"], ShouldContainSubstring, "not match(labels_object.code, 'ERROR_*')") }) } @@ -70,8 +72,8 @@ func TestBuildQueryWithMultipleQueries(t *testing.T) { "a": { QueryName: "a", MetricName: "name", - TagFilters: &model.FilterSet{Operation: "AND", Items: []model.FilterItem{ - {Key: "in", Value: []interface{}{"a", "b", "c"}, Operation: "in"}, + TagFilters: &model.FilterSet{Operator: "AND", Items: []model.FilterItem{ + {Key: "in", Value: []interface{}{"a", "b", "c"}, Operator: "in"}, }}, AggregateOperator: model.RATE_AVG, Expression: "a", @@ -103,8 +105,8 @@ func TestBuildQueryWithMultipleQueriesAndFormula(t *testing.T) { "a": { QueryName: "a", MetricName: "name", - TagFilters: &model.FilterSet{Operation: "AND", Items: []model.FilterItem{ - {Key: "in", Value: []interface{}{"a", "b", "c"}, Operation: "in"}, + TagFilters: &model.FilterSet{Operator: "AND", Items: []model.FilterItem{ + {Key: "in", Value: []interface{}{"a", "b", "c"}, Operator: "in"}, }}, AggregateOperator: model.RATE_MAX, Expression: "a", diff --git a/pkg/query-service/app/parser.go b/pkg/query-service/app/parser.go index 9d3705da9f..e81b986a3d 100644 --- a/pkg/query-service/app/parser.go +++ b/pkg/query-service/app/parser.go @@ -360,28 +360,6 @@ func parseFilteredSpanAggregatesRequest(r *http.Request) (*model.GetFilteredSpan return postData, nil } -func parseErrorRequest(r *http.Request) (*model.GetErrorParams, error) { - - params := &model.GetErrorParams{} - - serviceName := r.URL.Query().Get("serviceName") - if len(serviceName) != 0 { - params.ServiceName = serviceName - } - - errorType := r.URL.Query().Get("errorType") - if len(errorType) != 0 { - params.ErrorType = errorType - } - - errorId := r.URL.Query().Get("errorId") - if len(errorId) != 0 { - params.ErrorID = errorId - } - - return params, nil -} - func parseTagFilterRequest(r *http.Request) (*model.TagFilterParams, error) { var postData *model.TagFilterParams err := json.NewDecoder(r.Body).Decode(&postData) @@ -427,7 +405,10 @@ func parseTagValueRequest(r *http.Request) (*model.TagFilterParams, error) { } -func parseErrorsRequest(r *http.Request) (*model.GetErrorsParams, error) { +func parseListErrorsRequest(r *http.Request) (*model.ListErrorsParams, error) { + + var allowedOrderParams = []string{"exceptionType", "exceptionCount", "firstSeen", "lastSeen", "serviceName"} + var allowedOrderDirections = []string{"ascending", "descending"} startTime, err := parseTime("start", r) if err != nil { @@ -438,9 +419,79 @@ func parseErrorsRequest(r *http.Request) (*model.GetErrorsParams, error) { return nil, err } - params := &model.GetErrorsParams{ - Start: startTime, - End: endTime, + order := r.URL.Query().Get("order") + if len(order) > 0 && !DoesExistInSlice(order, allowedOrderDirections) { + return nil, errors.New(fmt.Sprintf("given order: %s is not allowed in query", order)) + } + orderParam := r.URL.Query().Get("orderParam") + if len(order) > 0 && !DoesExistInSlice(orderParam, allowedOrderParams) { + return nil, errors.New(fmt.Sprintf("given orderParam: %s is not allowed in query", orderParam)) + } + limit := r.URL.Query().Get("limit") + offset := r.URL.Query().Get("offset") + + if len(offset) == 0 || len(limit) == 0 { + return nil, fmt.Errorf("offset or limit param cannot be empty from the query") + } + + limitInt, err := strconv.Atoi(limit) + if err != nil { + return nil, errors.New("limit param is not in correct format") + } + offsetInt, err := strconv.Atoi(offset) + if err != nil { + return nil, errors.New("offset param is not in correct format") + } + + params := &model.ListErrorsParams{ + Start: startTime, + End: endTime, + OrderParam: orderParam, + Order: order, + Limit: int64(limitInt), + Offset: int64(offsetInt), + } + + return params, nil +} + +func parseCountErrorsRequest(r *http.Request) (*model.CountErrorsParams, error) { + + startTime, err := parseTime("start", r) + if err != nil { + return nil, err + } + endTime, err := parseTimeMinusBuffer("end", r) + if err != nil { + return nil, err + } + + params := &model.CountErrorsParams{ + Start: startTime, + End: endTime, + } + + return params, nil +} + +func parseGetErrorRequest(r *http.Request) (*model.GetErrorParams, error) { + + timestamp, err := parseTime("timestamp", r) + if err != nil { + return nil, err + } + + groupID := r.URL.Query().Get("groupID") + + if len(groupID) == 0 { + return nil, fmt.Errorf("groupID param cannot be empty from the query") + } + errorID := r.URL.Query().Get("errorID") + + params := &model.GetErrorParams{ + Timestamp: timestamp, + GroupID: groupID, + ErrorID: errorID, } return params, nil diff --git a/pkg/query-service/app/server.go b/pkg/query-service/app/server.go index 5bccea66e2..1815e5c7f0 100644 --- a/pkg/query-service/app/server.go +++ b/pkg/query-service/app/server.go @@ -11,6 +11,7 @@ import ( "github.com/gorilla/handlers" "github.com/gorilla/mux" + "github.com/jmoiron/sqlx" "github.com/rs/cors" "github.com/soheilhy/cmux" @@ -19,15 +20,22 @@ import ( "go.signoz.io/query-service/constants" "go.signoz.io/query-service/dao" "go.signoz.io/query-service/healthcheck" + am "go.signoz.io/query-service/integrations/alertManager" "go.signoz.io/query-service/interfaces" + pqle "go.signoz.io/query-service/pqlEngine" + "go.signoz.io/query-service/rules" "go.signoz.io/query-service/telemetry" "go.signoz.io/query-service/utils" "go.uber.org/zap" ) type ServerOptions struct { + PromConfigPath string HTTPHostPort string PrivateHostPort string + // alert specific params + DisableRules bool + RuleRepoURL string } // Server runs HTTP, Mux and a grpc server @@ -35,6 +43,9 @@ type Server struct { // logger *zap.Logger // tracer opentracing.Tracer // TODO make part of flags.Service serverOptions *ServerOptions + conn net.Listener + ruleManager *rules.Manager + separatePorts bool // public http router httpConn net.Listener @@ -58,6 +69,7 @@ func NewServer(serverOptions *ServerOptions) (*Server, error) { if err := dao.InitDao("sqlite", constants.RELATIONAL_DATASOURCE_PATH); err != nil { return nil, err } + localDB, err := dashboards.InitDB(constants.RELATIONAL_DATASOURCE_PATH) if err != nil { @@ -70,16 +82,20 @@ func NewServer(serverOptions *ServerOptions) (*Server, error) { storage := os.Getenv("STORAGE") if storage == "clickhouse" { zap.S().Info("Using ClickHouse as datastore ...") - clickhouseReader := clickhouseReader.NewReader(localDB) + clickhouseReader := clickhouseReader.NewReader(localDB, serverOptions.PromConfigPath) go clickhouseReader.Start() reader = clickhouseReader } else { return nil, fmt.Errorf("Storage type: %s is not supported in query service", storage) } - telemetry.GetInstance().SetReader(reader) + rm, err := makeRulesManager(serverOptions.PromConfigPath, constants.GetAlertManagerApiPrefix(), serverOptions.RuleRepoURL, localDB, reader, serverOptions.DisableRules) + if err != nil { + return nil, err + } - apiHandler, err := NewAPIHandler(&reader, dao.DB()) + telemetry.GetInstance().SetReader(reader) + apiHandler, err := NewAPIHandler(&reader, dao.DB(), rm) if err != nil { return nil, err } @@ -87,6 +103,7 @@ func NewServer(serverOptions *ServerOptions) (*Server, error) { s := &Server{ // logger: logger, // tracer: tracer, + ruleManager: rm, serverOptions: serverOptions, unavailableChannel: make(chan healthcheck.Status), } @@ -262,6 +279,13 @@ func (s *Server) initListeners() error { // Start listening on http and private http port concurrently func (s *Server) Start() error { + // initiate rule manager first + if !s.serverOptions.DisableRules { + s.ruleManager.Start() + } else { + zap.S().Info("msg: Rules disabled as rules.disable is set to TRUE") + } + err := s.initListeners() if err != nil { return err @@ -315,3 +339,49 @@ func (s *Server) Start() error { return nil } + +func makeRulesManager( + promConfigPath, + alertManagerURL string, + ruleRepoURL string, + db *sqlx.DB, + ch interfaces.Reader, + disableRules bool) (*rules.Manager, error) { + + // create engine + pqle, err := pqle.FromConfigPath(promConfigPath) + if err != nil { + return nil, fmt.Errorf("failed to create pql engine : %v", err) + } + + // notifier opts + notifierOpts := am.NotifierOptions{ + QueueCapacity: 10000, + Timeout: 1 * time.Second, + AlertManagerURLs: []string{alertManagerURL}, + } + + // create manager opts + managerOpts := &rules.ManagerOptions{ + NotifierOpts: notifierOpts, + Queriers: &rules.Queriers{ + PqlEngine: pqle, + Ch: ch.GetConn(), + }, + RepoURL: ruleRepoURL, + DBConn: db, + Context: context.Background(), + Logger: nil, + DisableRules: disableRules, + } + + // create Manager + manager, err := rules.NewManager(managerOpts) + if err != nil { + return nil, fmt.Errorf("rule manager error: %v", err) + } + + zap.S().Info("rules manager is ready") + + return manager, nil +} diff --git a/pkg/query-service/constants/constants.go b/pkg/query-service/constants/constants.go index b4bc4b08ef..69ed855262 100644 --- a/pkg/query-service/constants/constants.go +++ b/pkg/query-service/constants/constants.go @@ -38,29 +38,35 @@ var AmChannelApiPath = GetOrDefaultEnv("ALERTMANAGER_API_CHANNEL_PATH", "v1/rout var RELATIONAL_DATASOURCE_PATH = GetOrDefaultEnv("SIGNOZ_LOCAL_DB_PATH", "/var/lib/signoz/signoz.db") const ( - ServiceName = "serviceName" - HttpRoute = "httpRoute" - HttpCode = "httpCode" - HttpHost = "httpHost" - HttpUrl = "httpUrl" - HttpMethod = "httpMethod" - Component = "component" - OperationDB = "name" - OperationRequest = "operation" - Status = "status" - Duration = "duration" - DBName = "dbName" - DBOperation = "dbOperation" - DBSystem = "dbSystem" - MsgSystem = "msgSystem" - MsgOperation = "msgOperation" - Timestamp = "timestamp" - Descending = "descending" - Ascending = "ascending" - ContextTimeout = 60 // seconds - StatusPending = "pending" - StatusFailed = "failed" - StatusSuccess = "success" + ServiceName = "serviceName" + HttpRoute = "httpRoute" + HttpCode = "httpCode" + HttpHost = "httpHost" + HttpUrl = "httpUrl" + HttpMethod = "httpMethod" + Component = "component" + OperationDB = "name" + OperationRequest = "operation" + Status = "status" + Duration = "duration" + DBName = "dbName" + DBOperation = "dbOperation" + DBSystem = "dbSystem" + MsgSystem = "msgSystem" + MsgOperation = "msgOperation" + Timestamp = "timestamp" + RPCMethod = "rpcMethod" + ResponseStatusCode = "responseStatusCode" + Descending = "descending" + Ascending = "ascending" + ContextTimeout = 60 // seconds + StatusPending = "pending" + StatusFailed = "failed" + StatusSuccess = "success" + ExceptionType = "exceptionType" + ExceptionCount = "exceptionCount" + LastSeen = "lastSeen" + FirstSeen = "firstSeen" ) const ( SIGNOZ_METRIC_DBNAME = "signoz_metrics" @@ -68,6 +74,12 @@ const ( SIGNOZ_TIMESERIES_TABLENAME = "time_series_v2" ) +// alert related constants +const ( + // AlertHelpPage is used in case default alert repo url is not set + AlertHelpPage = "https://signoz.io/docs/userguide/alerts-management/#generator-url" +) + func GetOrDefaultEnv(key string, fallback string) string { v := os.Getenv(key) if len(v) == 0 { diff --git a/pkg/query-service/integrations/alertManager/manager.go b/pkg/query-service/integrations/alertManager/manager.go index 47dc96f366..21b58174f9 100644 --- a/pkg/query-service/integrations/alertManager/manager.go +++ b/pkg/query-service/integrations/alertManager/manager.go @@ -5,35 +5,44 @@ import ( "bytes" "encoding/json" "fmt" - "net/http" - "go.signoz.io/query-service/constants" "go.signoz.io/query-service/model" "go.uber.org/zap" + "net/http" + neturl "net/url" ) const contentType = "application/json" type Manager interface { + URL() *neturl.URL + URLPath(path string) *neturl.URL AddRoute(receiver *Receiver) *model.ApiError EditRoute(receiver *Receiver) *model.ApiError DeleteRoute(name string) *model.ApiError TestReceiver(receiver *Receiver) *model.ApiError } -func New(url string) Manager { +func New(url string) (Manager, error) { if url == "" { url = constants.GetAlertManagerApiPrefix() } - return &manager{ - url: url, + urlParsed, err := neturl.Parse(url) + if err != nil { + return nil, err } + + return &manager{ + url: url, + parsedURL: urlParsed, + }, nil } type manager struct { - url string + url string + parsedURL *neturl.URL } func prepareAmChannelApiURL() string { @@ -52,6 +61,19 @@ func prepareTestApiURL() string { return fmt.Sprintf("%s%s", basePath, "v1/testReceiver") } +func (m *manager) URL() *neturl.URL { + return m.parsedURL +} + +func (m *manager) URLPath(path string) *neturl.URL { + upath, err := neturl.Parse(path) + if err != nil { + return nil + } + + return m.parsedURL.ResolveReference(upath) +} + func (m *manager) AddRoute(receiver *Receiver) *model.ApiError { receiverString, _ := json.Marshal(receiver) diff --git a/pkg/query-service/integrations/alertManager/model.go b/pkg/query-service/integrations/alertManager/model.go index 705b0492fd..bb709e430f 100644 --- a/pkg/query-service/integrations/alertManager/model.go +++ b/pkg/query-service/integrations/alertManager/model.go @@ -1,5 +1,11 @@ package alertManager +import ( + "fmt" + "go.signoz.io/query-service/utils/labels" + "time" +) + // Receiver configuration provides configuration on how to contact a receiver. type Receiver struct { // A unique identifier for this receiver. @@ -19,4 +25,51 @@ type Receiver struct { type ReceiverResponse struct { Status string `json:"status"` Data Receiver `json:"data"` -} \ No newline at end of file +} + +// Alert is a generic representation of an alert in the Prometheus eco-system. +type Alert struct { + // Label value pairs for purpose of aggregation, matching, and disposition + // dispatching. This must minimally include an "alertname" label. + Labels labels.BaseLabels `json:"labels"` + + // Extra key/value information which does not define alert identity. + Annotations labels.BaseLabels `json:"annotations"` + + // The known time range for this alert. Both ends are optional. + StartsAt time.Time `json:"startsAt,omitempty"` + EndsAt time.Time `json:"endsAt,omitempty"` + GeneratorURL string `json:"generatorURL,omitempty"` +} + +// Name returns the name of the alert. It is equivalent to the "alertname" label. +func (a *Alert) Name() string { + return a.Labels.Get(labels.AlertNameLabel) +} + +// Hash returns a hash over the alert. It is equivalent to the alert labels hash. +func (a *Alert) Hash() uint64 { + return a.Labels.Hash() +} + +func (a *Alert) String() string { + s := fmt.Sprintf("%s[%s]", a.Name(), fmt.Sprintf("%016x", a.Hash())[:7]) + if a.Resolved() { + return s + "[resolved]" + } + return s + "[active]" +} + +// Resolved returns true iff the activity interval ended in the past. +func (a *Alert) Resolved() bool { + return a.ResolvedAt(time.Now()) +} + +// ResolvedAt returns true off the activity interval ended before +// the given timestamp. +func (a *Alert) ResolvedAt(ts time.Time) bool { + if a.EndsAt.IsZero() { + return false + } + return !a.EndsAt.After(ts) +} diff --git a/pkg/query-service/integrations/alertManager/notifier.go b/pkg/query-service/integrations/alertManager/notifier.go new file mode 100644 index 0000000000..148d489ed0 --- /dev/null +++ b/pkg/query-service/integrations/alertManager/notifier.go @@ -0,0 +1,310 @@ +package alertManager + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "sync/atomic" + + "net/http" + "net/url" + "sync" + "time" + + old_ctx "golang.org/x/net/context" + + "github.com/go-kit/kit/log" + "github.com/go-kit/log/level" + + "go.uber.org/zap" + "golang.org/x/net/context/ctxhttp" +) + +const ( + alertPushEndpoint = "v1/alerts" + contentTypeJSON = "application/json" +) + +// Notifier is responsible for dispatching alert notifications to an +// alert manager service. +type Notifier struct { + queue []*Alert + opts *NotifierOptions + + more chan struct{} + mtx sync.RWMutex + ctx context.Context + cancel func() + + alertmanagers *alertmanagerSet + logger log.Logger +} + +// NotifierOptions are the configurable parameters of a Handler. +type NotifierOptions struct { + QueueCapacity int + // Used for sending HTTP requests to the Alertmanager. + Do func(ctx old_ctx.Context, client *http.Client, req *http.Request) (*http.Response, error) + // List of alert manager urls + AlertManagerURLs []string + // timeout limit on requests + Timeout time.Duration +} + +func (opts *NotifierOptions) String() string { + var urls string + for _, u := range opts.AlertManagerURLs { + urls = fmt.Sprintf("%s %s", urls, u) + } + return urls +} + +// todo(amol): add metrics + +func NewNotifier(o *NotifierOptions, logger log.Logger) (*Notifier, error) { + ctx, cancel := context.WithCancel(context.Background()) + if o.Do == nil { + o.Do = ctxhttp.Do + } + if logger == nil { + logger = log.NewNopLogger() + } + + n := &Notifier{ + queue: make([]*Alert, 0, o.QueueCapacity), + ctx: ctx, + cancel: cancel, + more: make(chan struct{}, 1), + opts: o, + logger: logger, + } + timeout := o.Timeout + + if int64(timeout) == 0 { + timeout = time.Duration(30 * time.Second) + } + + amset, err := newAlertmanagerSet(o.AlertManagerURLs, timeout, logger) + if err != nil { + zap.S().Errorf("failed to parse alert manager urls") + return n, err + } + n.alertmanagers = amset + zap.S().Info("Starting notifier with alert manager:", o.AlertManagerURLs) + return n, nil +} + +const maxBatchSize = 64 + +func (n *Notifier) queueLen() int { + n.mtx.RLock() + defer n.mtx.RUnlock() + + return len(n.queue) +} + +func (n *Notifier) nextBatch() []*Alert { + n.mtx.Lock() + defer n.mtx.Unlock() + + var alerts []*Alert + + if len(n.queue) > maxBatchSize { + alerts = append(make([]*Alert, 0, maxBatchSize), n.queue[:maxBatchSize]...) + n.queue = n.queue[maxBatchSize:] + } else { + alerts = append(make([]*Alert, 0, len(n.queue)), n.queue...) + n.queue = n.queue[:0] + } + + return alerts +} + +// Run dispatches notifications continuously. +func (n *Notifier) Run() { + zap.S().Info("msg: Initiating alert notifier...") + for { + select { + case <-n.ctx.Done(): + return + case <-n.more: + } + alerts := n.nextBatch() + + if !n.sendAll(alerts...) { + zap.S().Warn("msg: dropped alerts", "\t count:", len(alerts)) + // n.metrics.dropped.Add(float64(len(alerts))) + } + // If the queue still has items left, kick off the next iteration. + if n.queueLen() > 0 { + n.setMore() + } + } +} + +// Send queues the given notification requests for processing. +// Panics if called on a handler that is not running. +func (n *Notifier) Send(alerts ...*Alert) { + n.mtx.Lock() + defer n.mtx.Unlock() + + // Queue capacity should be significantly larger than a single alert + // batch could be. + if d := len(alerts) - n.opts.QueueCapacity; d > 0 { + alerts = alerts[d:] + + level.Warn(n.logger).Log("msg", "Alert batch larger than queue capacity, dropping alerts", "num_dropped", d) + //n.metrics.dropped.Add(float64(d)) + } + + // If the queue is full, remove the oldest alerts in favor + // of newer ones. + if d := (len(n.queue) + len(alerts)) - n.opts.QueueCapacity; d > 0 { + n.queue = n.queue[d:] + + level.Warn(n.logger).Log("msg", "Alert notification queue full, dropping alerts", "num_dropped", d) + //n.metrics.dropped.Add(float64(d)) + } + n.queue = append(n.queue, alerts...) + + // Notify sending goroutine that there are alerts to be processed. + n.setMore() +} + +// setMore signals that the alert queue has items. +func (n *Notifier) setMore() { + // If we cannot send on the channel, it means the signal already exists + // and has not been consumed yet. + select { + case n.more <- struct{}{}: + default: + } +} + +// Alertmanagers returns a slice of Alertmanager URLs. +func (n *Notifier) Alertmanagers() []*url.URL { + n.mtx.RLock() + amset := n.alertmanagers + n.mtx.RUnlock() + + var res []*url.URL + + amset.mtx.RLock() + for _, am := range amset.ams { + res = append(res, am.URLPath(alertPushEndpoint)) + } + amset.mtx.RUnlock() + + return res +} + +// sendAll sends the alerts to all configured Alertmanagers concurrently. +// It returns true if the alerts could be sent successfully to at least one Alertmanager. +func (n *Notifier) sendAll(alerts ...*Alert) bool { + + b, err := json.Marshal(alerts) + if err != nil { + zap.S().Errorf("msg", "Encoding alerts failed", "err", err) + return false + } + + n.mtx.RLock() + ams := n.alertmanagers + n.mtx.RUnlock() + + var ( + wg sync.WaitGroup + numSuccess uint64 + ) + + ams.mtx.RLock() + + for _, am := range ams.ams { + wg.Add(1) + + ctx, cancel := context.WithTimeout(n.ctx, time.Duration(ams.timeout)) + defer cancel() + + go func(ams *alertmanagerSet, am Manager) { + u := am.URLPath(alertPushEndpoint).String() + if err := n.sendOne(ctx, ams.client, u, b); err != nil { + zap.S().Errorf("alertmanager", u, "count", len(alerts), "msg", "Error calling alert API", "err", err) + } else { + atomic.AddUint64(&numSuccess, 1) + } + // n.metrics.latency.WithLabelValues(u).Observe(time.Since(begin).Seconds()) + // n.metrics.sent.WithLabelValues(u).Add(float64(len(alerts))) + + wg.Done() + }(ams, am) + } + ams.mtx.RUnlock() + + wg.Wait() + + return numSuccess > 0 +} + +func (n *Notifier) sendOne(ctx context.Context, c *http.Client, url string, b []byte) error { + req, err := http.NewRequest("POST", url, bytes.NewReader(b)) + if err != nil { + return err + } + req.Header.Set("Content-Type", contentTypeJSON) + resp, err := n.opts.Do(ctx, c, req) + if err != nil { + return err + } + defer resp.Body.Close() + + // Any HTTP status 2xx is OK. + if resp.StatusCode/100 != 2 { + return fmt.Errorf("bad response status %v", resp.Status) + } + return err +} + +// Stop shuts down the notification handler. +func (n *Notifier) Stop() { + level.Info(n.logger).Log("msg", "Stopping notification manager...") + n.cancel() +} + +// alertmanagerSet contains a set of Alertmanagers discovered via a group of service +// discovery definitions that have a common configuration on how alerts should be sent. +type alertmanagerSet struct { + urls []string + client *http.Client + timeout time.Duration + mtx sync.RWMutex + ams []Manager + + logger log.Logger +} + +func newAlertmanagerSet(urls []string, timeout time.Duration, logger log.Logger) (*alertmanagerSet, error) { + client := &http.Client{} + + s := &alertmanagerSet{ + client: client, + urls: urls, + logger: logger, + timeout: timeout, + } + + ams := []Manager{} + for _, u := range urls { + am, err := New(u) + if err != nil { + level.Error(s.logger).Log(fmt.Sprintf("invalid alert manager url %s: %s", u, err)) + } else { + ams = append(ams, am) + } + } + if len(ams) == 0 { + return s, fmt.Errorf("no alert managers") + } + s.ams = ams + return s, nil +} diff --git a/pkg/query-service/interfaces/interface.go b/pkg/query-service/interfaces/interface.go index 9c52a4497d..5e9d01be8b 100644 --- a/pkg/query-service/interfaces/interface.go +++ b/pkg/query-service/interfaces/interface.go @@ -3,6 +3,7 @@ package interfaces import ( "context" + "github.com/ClickHouse/clickhouse-go/v2" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/util/stats" am "go.signoz.io/query-service/integrations/alertManager" @@ -16,12 +17,6 @@ type Reader interface { CreateChannel(receiver *am.Receiver) (*am.Receiver, *model.ApiError) EditChannel(receiver *am.Receiver, id string) (*am.Receiver, *model.ApiError) - GetRule(id string) (*model.RuleResponseItem, *model.ApiError) - ListRulesFromProm() (*model.AlertDiscovery, *model.ApiError) - CreateRule(alert string) *model.ApiError - EditRule(alert string, id string) *model.ApiError - DeleteRule(id string) *model.ApiError - GetInstantQueryMetricsResult(ctx context.Context, query *model.InstantQueryMetricsParams) (*promql.Result, *stats.QueryStats, *model.ApiError) GetQueryRangeResult(ctx context.Context, query *model.QueryRangeParams) (*promql.Result, *stats.QueryStats, *model.ApiError) GetServiceOverview(ctx context.Context, query *model.GetServiceOverviewParams) (*[]model.ServiceOverviewItem, *model.ApiError) @@ -41,9 +36,12 @@ type Reader interface { GetFilteredSpans(ctx context.Context, query *model.GetFilteredSpansParams) (*model.GetFilterSpansResponse, *model.ApiError) GetFilteredSpansAggregates(ctx context.Context, query *model.GetFilteredSpanAggregatesParams) (*model.GetFilteredSpansAggregatesResponse, *model.ApiError) - GetErrors(ctx context.Context, params *model.GetErrorsParams) (*[]model.Error, *model.ApiError) - GetErrorForId(ctx context.Context, params *model.GetErrorParams) (*model.ErrorWithSpan, *model.ApiError) - GetErrorForType(ctx context.Context, params *model.GetErrorParams) (*model.ErrorWithSpan, *model.ApiError) + ListErrors(ctx context.Context, params *model.ListErrorsParams) (*[]model.Error, *model.ApiError) + CountErrors(ctx context.Context, params *model.CountErrorsParams) (uint64, *model.ApiError) + GetErrorFromErrorID(ctx context.Context, params *model.GetErrorParams) (*model.ErrorWithSpan, *model.ApiError) + GetErrorFromGroupID(ctx context.Context, params *model.GetErrorParams) (*model.ErrorWithSpan, *model.ApiError) + GetNextPrevErrorIDs(ctx context.Context, params *model.GetErrorParams) (*model.NextPrevErrorIDs, *model.ApiError) + // Search Interfaces SearchTraces(ctx context.Context, traceID string) (*[]model.SearchSpansResult, error) @@ -59,4 +57,7 @@ type Reader interface { GetSpansInLastHeartBeatInterval(ctx context.Context) (uint64, error) GetTimeSeriesInfo(ctx context.Context) (map[string]interface{}, error) GetSamplesInfoInLastHeartBeatInterval(ctx context.Context) (uint64, error) + + // Connection needed for rules, not ideal but required + GetConn() clickhouse.Conn } diff --git a/pkg/query-service/main.go b/pkg/query-service/main.go index b837560531..e23ff8785b 100644 --- a/pkg/query-service/main.go +++ b/pkg/query-service/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "flag" "os" "os/signal" "syscall" @@ -25,6 +26,18 @@ func initZapLog() *zap.Logger { } func main() { + var promConfigPath string + + // disables rule execution but allows change to the rule definition + var disableRules bool + + // the url used to build link in the alert messages in slack and other systems + var ruleRepoURL string + + flag.StringVar(&promConfigPath, "config", "./config/prometheus.yml", "(prometheus config to read metrics)") + flag.BoolVar(&disableRules, "rules.disable", false, "(disable rule evaluation)") + flag.StringVar(&ruleRepoURL, "rules.repo-url", constants.AlertHelpPage, "(host address used to build rule link in alert messages)") + flag.Parse() loggerMgr := initZapLog() zap.ReplaceGlobals(loggerMgr) @@ -35,7 +48,10 @@ func main() { serverOptions := &app.ServerOptions{ HTTPHostPort: constants.HTTPHostPort, + PromConfigPath: promConfigPath, PrivateHostPort: constants.PrivateHostPort, + DisableRules: disableRules, + RuleRepoURL: ruleRepoURL, } // Read the jwt secret key diff --git a/pkg/query-service/model/queryParams.go b/pkg/query-service/model/queryParams.go index d1763a0440..2b964597ab 100644 --- a/pkg/query-service/model/queryParams.go +++ b/pkg/query-service/model/queryParams.go @@ -181,94 +181,102 @@ type TagQuery struct { } type GetFilteredSpansParams struct { - ServiceName []string `json:"serviceName"` - Operation []string `json:"operation"` - Kind string `json:"kind"` - Status []string `json:"status"` - HttpRoute []string `json:"httpRoute"` - HttpCode []string `json:"httpCode"` - HttpUrl []string `json:"httpUrl"` - HttpHost []string `json:"httpHost"` - HttpMethod []string `json:"httpMethod"` - Component []string `json:"component"` - StartStr string `json:"start"` - EndStr string `json:"end"` - MinDuration string `json:"minDuration"` - MaxDuration string `json:"maxDuration"` - Limit int64 `json:"limit"` - OrderParam string `json:"orderParam"` - Order string `json:"order"` - Offset int64 `json:"offset"` - Tags []TagQuery `json:"tags"` - Exclude []string `json:"exclude"` - Start *time.Time - End *time.Time + ServiceName []string `json:"serviceName"` + Operation []string `json:"operation"` + Kind string `json:"kind"` + Status []string `json:"status"` + HttpRoute []string `json:"httpRoute"` + HttpCode []string `json:"httpCode"` + HttpUrl []string `json:"httpUrl"` + HttpHost []string `json:"httpHost"` + HttpMethod []string `json:"httpMethod"` + Component []string `json:"component"` + RPCMethod []string `json:"rpcMethod"` + ResponseStatusCode []string `json:"responseStatusCode"` + StartStr string `json:"start"` + EndStr string `json:"end"` + MinDuration string `json:"minDuration"` + MaxDuration string `json:"maxDuration"` + Limit int64 `json:"limit"` + OrderParam string `json:"orderParam"` + Order string `json:"order"` + Offset int64 `json:"offset"` + Tags []TagQuery `json:"tags"` + Exclude []string `json:"exclude"` + Start *time.Time + End *time.Time } type GetFilteredSpanAggregatesParams struct { - ServiceName []string `json:"serviceName"` - Operation []string `json:"operation"` - Kind string `json:"kind"` - Status []string `json:"status"` - HttpRoute []string `json:"httpRoute"` - HttpCode []string `json:"httpCode"` - HttpUrl []string `json:"httpUrl"` - HttpHost []string `json:"httpHost"` - HttpMethod []string `json:"httpMethod"` - Component []string `json:"component"` - MinDuration string `json:"minDuration"` - MaxDuration string `json:"maxDuration"` - Tags []TagQuery `json:"tags"` - StartStr string `json:"start"` - EndStr string `json:"end"` - StepSeconds int `json:"step"` - Dimension string `json:"dimension"` - AggregationOption string `json:"aggregationOption"` - GroupBy string `json:"groupBy"` - Function string `json:"function"` - Exclude []string `json:"exclude"` - Start *time.Time - End *time.Time + ServiceName []string `json:"serviceName"` + Operation []string `json:"operation"` + Kind string `json:"kind"` + Status []string `json:"status"` + HttpRoute []string `json:"httpRoute"` + HttpCode []string `json:"httpCode"` + HttpUrl []string `json:"httpUrl"` + HttpHost []string `json:"httpHost"` + HttpMethod []string `json:"httpMethod"` + Component []string `json:"component"` + RPCMethod []string `json:"rpcMethod"` + ResponseStatusCode []string `json:"responseStatusCode"` + MinDuration string `json:"minDuration"` + MaxDuration string `json:"maxDuration"` + Tags []TagQuery `json:"tags"` + StartStr string `json:"start"` + EndStr string `json:"end"` + StepSeconds int `json:"step"` + Dimension string `json:"dimension"` + AggregationOption string `json:"aggregationOption"` + GroupBy string `json:"groupBy"` + Function string `json:"function"` + Exclude []string `json:"exclude"` + Start *time.Time + End *time.Time } type SpanFilterParams struct { - Status []string `json:"status"` - ServiceName []string `json:"serviceName"` - HttpRoute []string `json:"httpRoute"` - HttpCode []string `json:"httpCode"` - HttpUrl []string `json:"httpUrl"` - HttpHost []string `json:"httpHost"` - HttpMethod []string `json:"httpMethod"` - Component []string `json:"component"` - Operation []string `json:"operation"` - GetFilters []string `json:"getFilters"` - Exclude []string `json:"exclude"` - MinDuration string `json:"minDuration"` - MaxDuration string `json:"maxDuration"` - StartStr string `json:"start"` - EndStr string `json:"end"` - Start *time.Time - End *time.Time + Status []string `json:"status"` + ServiceName []string `json:"serviceName"` + HttpRoute []string `json:"httpRoute"` + HttpCode []string `json:"httpCode"` + HttpUrl []string `json:"httpUrl"` + HttpHost []string `json:"httpHost"` + HttpMethod []string `json:"httpMethod"` + Component []string `json:"component"` + Operation []string `json:"operation"` + RPCMethod []string `json:"rpcMethod"` + ResponseStatusCode []string `json:"responseStatusCode"` + GetFilters []string `json:"getFilters"` + Exclude []string `json:"exclude"` + MinDuration string `json:"minDuration"` + MaxDuration string `json:"maxDuration"` + StartStr string `json:"start"` + EndStr string `json:"end"` + Start *time.Time + End *time.Time } type TagFilterParams struct { - Status []string `json:"status"` - ServiceName []string `json:"serviceName"` - HttpRoute []string `json:"httpRoute"` - HttpCode []string `json:"httpCode"` - HttpUrl []string `json:"httpUrl"` - HttpHost []string `json:"httpHost"` - HttpMethod []string `json:"httpMethod"` - Component []string `json:"component"` - Operation []string `json:"operation"` - Exclude []string `json:"exclude"` - MinDuration string `json:"minDuration"` - MaxDuration string `json:"maxDuration"` - StartStr string `json:"start"` - EndStr string `json:"end"` - TagKey string `json:"tagKey"` - Start *time.Time - End *time.Time + Status []string `json:"status"` + ServiceName []string `json:"serviceName"` + HttpRoute []string `json:"httpRoute"` + HttpCode []string `json:"httpCode"` + HttpUrl []string `json:"httpUrl"` + HttpHost []string `json:"httpHost"` + HttpMethod []string `json:"httpMethod"` + Component []string `json:"component"` + Operation []string `json:"operation"` + RPCMethod []string `json:"rpcMethod"` + ResponseStatusCode []string `json:"responseStatusCode"` + Exclude []string `json:"exclude"` + MinDuration string `json:"minDuration"` + MaxDuration string `json:"maxDuration"` + StartStr string `json:"start"` + EndStr string `json:"end"` + TagKey string `json:"tagKey"` + Start *time.Time + End *time.Time } type TTLParams struct { @@ -282,29 +290,33 @@ type GetTTLParams struct { Type string } -type GetErrorsParams struct { +type ListErrorsParams struct { + Start *time.Time + End *time.Time + Limit int64 + OrderParam string + Order string + Offset int64 +} + +type CountErrorsParams struct { Start *time.Time End *time.Time } type GetErrorParams struct { - ErrorType string - ErrorID string - ServiceName string + GroupID string + ErrorID string + Timestamp *time.Time } type FilterItem struct { - Key string `json:"key"` - Value interface{} `json:"value"` - Operation string `json:"op"` + Key string `json:"key"` + Value interface{} `json:"value"` + Operator string `json:"op"` } type FilterSet struct { - Operation string `json:"op,omitempty"` - Items []FilterItem `json:"items"` -} - -type RemoveTTLParams struct { - Type string - RemoveAllTTL bool + Operator string `json:"op,omitempty"` + Items []FilterItem `json:"items"` } diff --git a/pkg/query-service/model/response.go b/pkg/query-service/model/response.go index 523ad7e96e..0bdaf02dc7 100644 --- a/pkg/query-service/model/response.go +++ b/pkg/query-service/model/response.go @@ -118,18 +118,20 @@ type SearchSpansResult struct { } type GetFilterSpansResponseItem struct { - Timestamp time.Time `ch:"timestamp" json:"timestamp"` - SpanID string `ch:"spanID" json:"spanID"` - TraceID string `ch:"traceID" json:"traceID"` - ServiceName string `ch:"serviceName" json:"serviceName"` - Operation string `ch:"name" json:"operation"` - DurationNano uint64 `ch:"durationNano" json:"durationNano"` - HttpCode string `ch:"httpCode"` - HttpMethod string `ch:"httpMethod"` - GRPCode string `ch:"gRPCCode"` - GRPMethod string `ch:"gRPCMethod"` - StatusCode string `json:"statusCode"` - Method string `json:"method"` + Timestamp time.Time `ch:"timestamp" json:"timestamp"` + SpanID string `ch:"spanID" json:"spanID"` + TraceID string `ch:"traceID" json:"traceID"` + ServiceName string `ch:"serviceName" json:"serviceName"` + Operation string `ch:"name" json:"operation"` + DurationNano uint64 `ch:"durationNano" json:"durationNano"` + HttpCode string `ch:"httpCode"` + HttpMethod string `ch:"httpMethod"` + GRPCode string `ch:"gRPCCode"` + GRPMethod string `ch:"gRPCMethod"` + StatusCode string `json:"statusCode"` + Method string `json:"method"` + ResponseStatusCode string `ch:"responseStatusCode"` + RPCMethod string `ch:"rpcMethod"` } type GetFilterSpansResponse struct { @@ -303,6 +305,16 @@ type DBResponseHttpMethod struct { Count uint64 `ch:"count"` } +type DBResponseStatusCodeMethod struct { + ResponseStatusCode string `ch:"responseStatusCode"` + Count uint64 `ch:"count"` +} + +type DBResponseRPCMethod struct { + RPCMethod string `ch:"rpcMethod"` + Count uint64 `ch:"count"` +} + type DBResponseHttpHost struct { HttpHost string `ch:"httpHost"` Count uint64 `ch:"count"` @@ -323,16 +335,18 @@ type DBResponseTotal struct { } type SpanFiltersResponse struct { - ServiceName map[string]uint64 `json:"serviceName"` - Status map[string]uint64 `json:"status"` - Duration map[string]uint64 `json:"duration"` - Operation map[string]uint64 `json:"operation"` - HttpCode map[string]uint64 `json:"httpCode"` - HttpUrl map[string]uint64 `json:"httpUrl"` - HttpMethod map[string]uint64 `json:"httpMethod"` - HttpRoute map[string]uint64 `json:"httpRoute"` - HttpHost map[string]uint64 `json:"httpHost"` - Component map[string]uint64 `json:"component"` + ServiceName map[string]uint64 `json:"serviceName"` + Status map[string]uint64 `json:"status"` + Duration map[string]uint64 `json:"duration"` + Operation map[string]uint64 `json:"operation"` + HttpCode map[string]uint64 `json:"httpCode"` + ResponseStatusCode map[string]uint64 `json:"responseStatusCode"` + RPCMethod map[string]uint64 `json:"rpcMethod"` + HttpUrl map[string]uint64 `json:"httpUrl"` + HttpMethod map[string]uint64 `json:"httpMethod"` + HttpRoute map[string]uint64 `json:"httpRoute"` + HttpHost map[string]uint64 `json:"httpHost"` + Component map[string]uint64 `json:"component"` } type Error struct { ExceptionType string `json:"exceptionType" ch:"exceptionType"` @@ -341,20 +355,36 @@ type Error struct { LastSeen time.Time `json:"lastSeen" ch:"lastSeen"` FirstSeen time.Time `json:"firstSeen" ch:"firstSeen"` ServiceName string `json:"serviceName" ch:"serviceName"` + GroupID string `json:"groupID" ch:"groupID"` } type ErrorWithSpan struct { ErrorID string `json:"errorId" ch:"errorID"` ExceptionType string `json:"exceptionType" ch:"exceptionType"` ExceptionStacktrace string `json:"exceptionStacktrace" ch:"exceptionStacktrace"` - ExceptionEscaped string `json:"exceptionEscaped" ch:"exceptionEscaped"` + ExceptionEscaped bool `json:"exceptionEscaped" ch:"exceptionEscaped"` ExceptionMsg string `json:"exceptionMessage" ch:"exceptionMessage"` Timestamp time.Time `json:"timestamp" ch:"timestamp"` SpanID string `json:"spanID" ch:"spanID"` TraceID string `json:"traceID" ch:"traceID"` ServiceName string `json:"serviceName" ch:"serviceName"` - NewerErrorID string `json:"newerErrorId" ch:"newerErrorId"` - OlderErrorID string `json:"olderErrorId" ch:"olderErrorId"` + GroupID string `json:"groupID" ch:"groupID"` +} + +type NextPrevErrorIDsDBResponse struct { + NextErrorID string `ch:"nextErrorID"` + NextTimestamp time.Time `ch:"nextTimestamp"` + PrevErrorID string `ch:"prevErrorID"` + PrevTimestamp time.Time `ch:"prevTimestamp"` + Timestamp time.Time `ch:"timestamp"` +} + +type NextPrevErrorIDs struct { + NextErrorID string `json:"nextErrorID"` + NextTimestamp time.Time `json:"nextTimestamp"` + PrevErrorID string `json:"prevErrorID"` + PrevTimestamp time.Time `json:"prevTimestamp"` + GroupID string `json:"groupID"` } type Series struct { diff --git a/pkg/query-service/pqlEngine/engine.go b/pkg/query-service/pqlEngine/engine.go new file mode 100644 index 0000000000..e9a45ad542 --- /dev/null +++ b/pkg/query-service/pqlEngine/engine.go @@ -0,0 +1,85 @@ +package promql + +import ( + "context" + "fmt" + "github.com/go-kit/log" + pmodel "github.com/prometheus/common/model" + plog "github.com/prometheus/common/promlog" + pconfig "github.com/prometheus/prometheus/config" + plabels "github.com/prometheus/prometheus/pkg/labels" + pql "github.com/prometheus/prometheus/promql" + pstorage "github.com/prometheus/prometheus/storage" + premote "github.com/prometheus/prometheus/storage/remote" + "time" +) + +type PqlEngine struct { + engine *pql.Engine + fanoutStorage pstorage.Storage +} + +func FromConfigPath(promConfigPath string) (*PqlEngine, error) { + // load storage path + c, err := pconfig.LoadFile(promConfigPath) + if err != nil { + return nil, fmt.Errorf("couldn't load configuration (--config.file=%q): %v", promConfigPath, err) + } + + return NewPqlEngine(c) +} + +func NewPqlEngine(config *pconfig.Config) (*PqlEngine, error) { + + logLevel := plog.AllowedLevel{} + logLevel.Set("debug") + logger := plog.New(logLevel) + + opts := pql.EngineOpts{ + Logger: log.With(logger, "component", "promql evaluator"), + Reg: nil, + MaxConcurrent: 20, + MaxSamples: 50000000, + Timeout: time.Duration(2 * time.Minute), + } + + e := pql.NewEngine(opts) + startTime := func() (int64, error) { + return int64(pmodel.Latest), nil + } + + remoteStorage := premote.NewStorage(log.With(logger, "component", "remote"), startTime, time.Duration(1*time.Minute)) + fanoutStorage := pstorage.NewFanout(logger, remoteStorage) + + remoteStorage.ApplyConfig(config) + + return &PqlEngine{ + engine: e, + fanoutStorage: fanoutStorage, + }, nil +} + +func (p *PqlEngine) RunAlertQuery(ctx context.Context, qs string, t time.Time) (pql.Vector, error) { + q, err := p.engine.NewInstantQuery(p.fanoutStorage, qs, t) + if err != nil { + return nil, err + } + + res := q.Exec(ctx) + + if res.Err != nil { + return nil, res.Err + } + + switch v := res.Value.(type) { + case pql.Vector: + return v, nil + case pql.Scalar: + return pql.Vector{pql.Sample{ + Point: pql.Point(v), + Metric: plabels.Labels{}, + }}, nil + default: + return nil, fmt.Errorf("rule result is not a vector or scalar") + } +} diff --git a/pkg/query-service/rules/alerting.go b/pkg/query-service/rules/alerting.go new file mode 100644 index 0000000000..a4768b4036 --- /dev/null +++ b/pkg/query-service/rules/alerting.go @@ -0,0 +1,200 @@ +package rules + +import ( + "encoding/json" + "github.com/pkg/errors" + "go.signoz.io/query-service/model" + "go.signoz.io/query-service/utils/labels" + "time" +) + +// how long before re-sending the alert +const resolvedRetention = 15 * time.Minute + +const ( + // AlertMetricName is the metric name for synthetic alert timeseries. + alertMetricName = "ALERTS" + + // AlertForStateMetricName is the metric name for 'for' state of alert. + alertForStateMetricName = "ALERTS_FOR_STATE" +) + +type RuleType string + +const ( + RuleTypeThreshold = "threshold_rule" + RuleTypeProm = "promql_rule" +) + +type RuleHealth string + +const ( + HealthUnknown RuleHealth = "unknown" + HealthGood RuleHealth = "ok" + HealthBad RuleHealth = "err" +) + +// AlertState denotes the state of an active alert. +type AlertState int + +const ( + StateInactive AlertState = iota + StatePending + StateFiring +) + +func (s AlertState) String() string { + switch s { + case StateInactive: + return "inactive" + case StatePending: + return "pending" + case StateFiring: + return "firing" + } + panic(errors.Errorf("unknown alert state: %d", s)) +} + +type Alert struct { + State AlertState + + Labels labels.BaseLabels + Annotations labels.BaseLabels + + GeneratorURL string + + Value float64 + ActiveAt time.Time + FiredAt time.Time + ResolvedAt time.Time + LastSentAt time.Time + ValidUntil time.Time +} + +// todo(amol): need to review this with ankit +func (a *Alert) needsSending(ts time.Time, resendDelay time.Duration) bool { + if a.State == StatePending { + return false + } + + // if an alert has been resolved since the last send, resend it + if a.ResolvedAt.After(a.LastSentAt) { + return true + } + + return a.LastSentAt.Add(resendDelay).Before(ts) +} + +type NamedAlert struct { + Name string + *Alert +} + +type CompareOp string + +const ( + CompareOpNone CompareOp = "0" + ValueIsAbove CompareOp = "1" + ValueIsBelow CompareOp = "2" + ValueIsEq CompareOp = "3" + ValueIsNotEq CompareOp = "4" +) + +func ResolveCompareOp(cop CompareOp) string { + switch cop { + case ValueIsAbove: + return ">" + case ValueIsBelow: + return "<" + case ValueIsEq: + return "==" + case ValueIsNotEq: + return "!=" + } + return "" +} + +type MatchType string + +const ( + MatchTypeNone MatchType = "0" + AtleastOnce MatchType = "1" + AllTheTimes MatchType = "2" + OnAverage MatchType = "3" + InTotal MatchType = "4" +) + +type RuleCondition struct { + CompositeMetricQuery *model.CompositeMetricQuery `json:"compositeMetricQuery,omitempty" yaml:"compositeMetricQuery,omitempty"` + CompareOp CompareOp `yaml:"op,omitempty" json:"op,omitempty"` + Target *float64 `yaml:"target,omitempty" json:"target,omitempty"` + MatchType `json:"matchType,omitempty"` +} + +func (rc *RuleCondition) IsValid() bool { + + if rc.CompositeMetricQuery == nil { + return false + } + + if rc.QueryType() == model.QUERY_BUILDER { + if rc.Target == nil { + return false + } + if rc.CompareOp == "" { + return false + } + } + if rc.QueryType() == model.PROM { + + if len(rc.CompositeMetricQuery.PromQueries) == 0 { + return false + } + } + return true +} + +// QueryType is a short hand method to get query type +func (rc *RuleCondition) QueryType() model.QueryType { + if rc.CompositeMetricQuery != nil { + return rc.CompositeMetricQuery.QueryType + } + return 0 +} + +// String is useful in printing rule condition in logs +func (rc *RuleCondition) String() string { + if rc == nil { + return "" + } + data, _ := json.Marshal(*rc) + return string(data) +} + +type Duration time.Duration + +func (d Duration) MarshalJSON() ([]byte, error) { + return json.Marshal(time.Duration(d).String()) +} + +func (d *Duration) UnmarshalJSON(b []byte) error { + var v interface{} + if err := json.Unmarshal(b, &v); err != nil { + return err + } + switch value := v.(type) { + case float64: + *d = Duration(time.Duration(value)) + return nil + case string: + tmp, err := time.ParseDuration(value) + if err != nil { + return err + } + *d = Duration(tmp) + + return nil + default: + return errors.New("invalid duration") + } +} diff --git a/pkg/query-service/rules/apiParams.go b/pkg/query-service/rules/apiParams.go new file mode 100644 index 0000000000..6f3b466d11 --- /dev/null +++ b/pkg/query-service/rules/apiParams.go @@ -0,0 +1,230 @@ +package rules + +import ( + "context" + "encoding/json" + "fmt" + "github.com/pkg/errors" + "go.signoz.io/query-service/model" + "go.uber.org/zap" + "time" + "unicode/utf8" + + "go.signoz.io/query-service/utils/times" + "go.signoz.io/query-service/utils/timestamp" + yaml "gopkg.in/yaml.v2" +) + +// this file contains api request and responses to be +// served over http + +// PostableRule is used to create alerting rule from HTTP api +type PostableRule struct { + Alert string `yaml:"alert,omitempty" json:"alert,omitempty"` + Description string `yaml:"description,omitempty" json:"description,omitempty"` + RuleType RuleType `yaml:"ruleType,omitempty" json:"ruleType,omitempty"` + EvalWindow Duration `yaml:"evalWindow,omitempty" json:"evalWindow,omitempty"` + Frequency Duration `yaml:"frequency,omitempty" json:"frequency,omitempty"` + + RuleCondition *RuleCondition `yaml:"condition,omitempty" json:"condition,omitempty"` + Labels map[string]string `yaml:"labels,omitempty" json:"labels,omitempty"` + Annotations map[string]string `yaml:"annotations,omitempty" json:"annotations,omitempty"` + + // Source captures the source url where rule has been created + Source string `json:"source,omitempty"` + + // legacy + Expr string `yaml:"expr,omitempty" json:"expr,omitempty"` + OldYaml string `json:"yaml,omitempty"` +} + +func ParsePostableRule(content []byte) (*PostableRule, []error) { + return parsePostableRule(content, "json") +} + +func parsePostableRule(content []byte, kind string) (*PostableRule, []error) { + rule := PostableRule{} + + var err error + if kind == "json" { + if err = json.Unmarshal(content, &rule); err != nil { + zap.S().Debugf("postable rule content", string(content), "\t kind:", kind) + return nil, []error{fmt.Errorf("failed to load json")} + } + } else if kind == "yaml" { + if err = yaml.Unmarshal(content, &rule); err != nil { + zap.S().Debugf("postable rule content", string(content), "\t kind:", kind) + return nil, []error{fmt.Errorf("failed to load yaml")} + } + } else { + return nil, []error{fmt.Errorf("invalid data type")} + } + zap.S().Debugf("postable rule(parsed):", rule) + + if rule.RuleCondition == nil && rule.Expr != "" { + // account for legacy rules + rule.RuleType = RuleTypeProm + rule.EvalWindow = Duration(5 * time.Minute) + rule.Frequency = Duration(1 * time.Minute) + rule.RuleCondition = &RuleCondition{ + CompositeMetricQuery: &model.CompositeMetricQuery{ + QueryType: model.PROM, + PromQueries: map[string]*model.PromQuery{ + "A": &model.PromQuery{ + Query: rule.Expr, + }, + }, + }, + } + } + + if rule.EvalWindow == 0 { + rule.EvalWindow = Duration(5 * time.Minute) + } + + if rule.Frequency == 0 { + rule.Frequency = Duration(1 * time.Minute) + } + + if rule.RuleCondition != nil { + if rule.RuleCondition.CompositeMetricQuery.QueryType == model.QUERY_BUILDER { + rule.RuleType = RuleTypeThreshold + } else if rule.RuleCondition.CompositeMetricQuery.QueryType == model.PROM { + rule.RuleType = RuleTypeProm + } + + for qLabel, q := range rule.RuleCondition.CompositeMetricQuery.BuilderQueries { + if q.MetricName != "" && q.Expression == "" { + q.Expression = qLabel + } + } + } + + zap.S().Debugf("postable rule:", rule, "\t condition", rule.RuleCondition.String()) + + if errs := rule.Validate(); len(errs) > 0 { + return nil, errs + } + return &rule, []error{} +} + +func isValidLabelName(ln string) bool { + if len(ln) == 0 { + return false + } + for i, b := range ln { + if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) { + return false + } + } + return true +} + +func isValidLabelValue(v string) bool { + return utf8.ValidString(v) +} + +func (r *PostableRule) Validate() (errs []error) { + + if r.RuleCondition == nil { + errs = append(errs, errors.Errorf("rule condition is required")) + } else { + if r.RuleCondition.CompositeMetricQuery == nil { + errs = append(errs, errors.Errorf("composite metric query is required")) + } + } + + if r.RuleType == RuleTypeThreshold { + if r.RuleCondition.Target == nil { + errs = append(errs, errors.Errorf("rule condition missing the threshold")) + } + if r.RuleCondition.CompareOp == "" { + errs = append(errs, errors.Errorf("rule condition missing the compare op")) + } + if r.RuleCondition.MatchType == "" { + errs = append(errs, errors.Errorf("rule condition missing the match option")) + } + } + + for k, v := range r.Labels { + if !isValidLabelName(k) { + errs = append(errs, errors.Errorf("invalid label name: %s", k)) + } + + if !isValidLabelValue(v) { + errs = append(errs, errors.Errorf("invalid label value: %s", v)) + } + } + + for k := range r.Annotations { + if !isValidLabelName(k) { + errs = append(errs, errors.Errorf("invalid annotation name: %s", k)) + } + } + + errs = append(errs, testTemplateParsing(r)...) + return errs +} + +func testTemplateParsing(rl *PostableRule) (errs []error) { + if rl.Alert == "" { + // Not an alerting rule. + return errs + } + + // Trying to parse templates. + tmplData := AlertTemplateData(make(map[string]string), 0) + defs := "{{$labels := .Labels}}{{$value := .Value}}" + parseTest := func(text string) error { + tmpl := NewTemplateExpander( + context.TODO(), + defs+text, + "__alert_"+rl.Alert, + tmplData, + times.Time(timestamp.FromTime(time.Now())), + nil, + ) + return tmpl.ParseTest() + } + + // Parsing Labels. + for _, val := range rl.Labels { + err := parseTest(val) + if err != nil { + errs = append(errs, fmt.Errorf("msg=%s", err.Error())) + } + } + + // Parsing Annotations. + for _, val := range rl.Annotations { + err := parseTest(val) + if err != nil { + errs = append(errs, fmt.Errorf("msg=%s", err.Error())) + } + } + + return errs +} + +// GettableRules has info for all stored rules. +type GettableRules struct { + Rules []*GettableRule `json:"rules"` +} + +// GettableRule has info for an alerting rules. +type GettableRule struct { + Labels map[string]string `json:"labels"` + Annotations map[string]string `json:"annotations"` + State string `json:"state"` + Alert string `json:"alert"` + // Description string `yaml:"description,omitempty" json:"description,omitempty"` + + Id string `json:"id"` + RuleType RuleType `yaml:"ruleType,omitempty" json:"ruleType,omitempty"` + EvalWindow Duration `yaml:"evalWindow,omitempty" json:"evalWindow,omitempty"` + Frequency Duration `yaml:"frequency,omitempty" json:"frequency,omitempty"` + RuleCondition RuleCondition `yaml:"condition,omitempty" json:"condition,omitempty"` + + // ActiveAt *time.Time `json:"activeAt,omitempty"` + // Value float64 `json:"value"` +} diff --git a/pkg/query-service/rules/db.go b/pkg/query-service/rules/db.go new file mode 100644 index 0000000000..7070f23346 --- /dev/null +++ b/pkg/query-service/rules/db.go @@ -0,0 +1,187 @@ +package rules + +import ( + "fmt" + "github.com/jmoiron/sqlx" + "go.uber.org/zap" + "strconv" + "time" +) + +// Data store to capture user alert rule settings +type RuleDB interface { + // CreateRuleTx stores rule in the db and returns tx and group name (on success) + CreateRuleTx(rule string) (string, Tx, error) + + // EditRuleTx updates the given rule in the db and returns tx and group name (on success) + EditRuleTx(rule string, id string) (string, Tx, error) + + // DeleteRuleTx deletes the given rule in the db and returns tx and group name (on success) + DeleteRuleTx(id string) (string, Tx, error) + + // GetStoredRules fetches the rule definitions from db + GetStoredRules() ([]StoredRule, error) + + // GetStoredRule for a given ID from DB + GetStoredRule(id string) (*StoredRule, error) +} + +type StoredRule struct { + Id int `json:"id" db:"id"` + UpdatedAt time.Time `json:"updated_at" db:"updated_at"` + Data string `json:"data" db:"data"` +} + +type Tx interface { + Commit() error + Rollback() error +} + +type ruleDB struct { + *sqlx.DB +} + +// todo: move init methods for creating tables + +func newRuleDB(db *sqlx.DB) RuleDB { + return &ruleDB{ + db, + } +} + +// CreateRuleTx stores a given rule in db and returns task name, +// sql tx and error (if any) +func (r *ruleDB) CreateRuleTx(rule string) (string, Tx, error) { + + var groupName string + var lastInsertId int64 + + tx, err := r.Begin() + if err != nil { + return groupName, nil, err + } + + stmt, err := tx.Prepare(`INSERT into rules (updated_at, data) VALUES($1,$2);`) + if err != nil { + zap.S().Errorf("Error in preparing statement for INSERT to rules\n", err) + tx.Rollback() + return groupName, nil, err + } + + defer stmt.Close() + + result, err := stmt.Exec(time.Now(), rule) + if err != nil { + zap.S().Errorf("Error in Executing prepared statement for INSERT to rules\n", err) + tx.Rollback() // return an error too, we may want to wrap them + return groupName, nil, err + } + + lastInsertId, _ = result.LastInsertId() + + groupName = prepareTaskName(lastInsertId) + + return groupName, tx, nil + +} + +// EditRuleTx stores a given rule string in database and returns +// task name, sql tx and error (if any) +func (r *ruleDB) EditRuleTx(rule string, id string) (string, Tx, error) { + + var groupName string + idInt, _ := strconv.Atoi(id) + if idInt == 0 { + return groupName, nil, fmt.Errorf("failed to read alert id from parameters") + } + + groupName = prepareTaskName(int64(idInt)) + + // todo(amol): resolve this error - database locked when using + // edit transaction with sqlx + // tx, err := r.Begin() + //if err != nil { + // return groupName, tx, err + //} + stmt, err := r.Prepare(`UPDATE rules SET updated_at=$1, data=$2 WHERE id=$3;`) + if err != nil { + zap.S().Errorf("Error in preparing statement for UPDATE to rules\n", err) + // tx.Rollback() + return groupName, nil, err + } + defer stmt.Close() + + if _, err := stmt.Exec(time.Now(), rule, idInt); err != nil { + zap.S().Errorf("Error in Executing prepared statement for UPDATE to rules\n", err) + // tx.Rollback() // return an error too, we may want to wrap them + return groupName, nil, err + } + return groupName, nil, nil +} + +// DeleteRuleTx deletes a given rule with id and returns +// taskname, sql tx and error (if any) +func (r *ruleDB) DeleteRuleTx(id string) (string, Tx, error) { + + idInt, _ := strconv.Atoi(id) + groupName := prepareTaskName(int64(idInt)) + + // commented as this causes db locked error + // tx, err := r.Begin() + // if err != nil { + // return groupName, tx, err + // } + + stmt, err := r.Prepare(`DELETE FROM rules WHERE id=$1;`) + + if err != nil { + return groupName, nil, err + } + + defer stmt.Close() + + if _, err := stmt.Exec(idInt); err != nil { + zap.S().Errorf("Error in Executing prepared statement for DELETE to rules\n", err) + // tx.Rollback() + return groupName, nil, err + } + + return groupName, nil, nil +} + +func (r *ruleDB) GetStoredRules() ([]StoredRule, error) { + + rules := []StoredRule{} + + query := fmt.Sprintf("SELECT id, updated_at, data FROM rules") + + err := r.Select(&rules, query) + + if err != nil { + zap.S().Debug("Error in processing sql query: ", err) + return nil, err + } + + return rules, nil +} + +func (r *ruleDB) GetStoredRule(id string) (*StoredRule, error) { + intId, err := strconv.Atoi(id) + if err != nil { + return nil, fmt.Errorf("invalid id parameter") + } + + rule := &StoredRule{} + + query := fmt.Sprintf("SELECT id, updated_at, data FROM rules WHERE id=%d", intId) + err = r.Get(rule, query) + + // zap.S().Info(query) + + if err != nil { + zap.S().Error("Error in processing sql query: ", err) + return nil, err + } + + return rule, nil +} diff --git a/pkg/query-service/rules/manager.go b/pkg/query-service/rules/manager.go new file mode 100644 index 0000000000..9a040fdf74 --- /dev/null +++ b/pkg/query-service/rules/manager.go @@ -0,0 +1,595 @@ +package rules + +import ( + "context" + "encoding/json" + "fmt" + "sort" + "strconv" + "strings" + "sync" + "time" + + "github.com/go-kit/log" + + "go.uber.org/zap" + + "github.com/jmoiron/sqlx" + "github.com/pkg/errors" + + // opentracing "github.com/opentracing/opentracing-go" + am "go.signoz.io/query-service/integrations/alertManager" +) + +// namespace for prom metrics +const namespace = "signoz" +const taskNamesuffix = "webAppEditor" + +func ruleIdFromTaskName(n string) string { + return strings.Split(n, "-groupname")[0] +} + +func prepareTaskName(ruleId int64) string { + return fmt.Sprintf("%d-groupname", ruleId) +} + +// ManagerOptions bundles options for the Manager. +type ManagerOptions struct { + NotifierOpts am.NotifierOptions + Queriers *Queriers + + // RepoURL is used to generate a backlink in sent alert messages + RepoURL string + + // rule db conn + DBConn *sqlx.DB + + Context context.Context + Logger log.Logger + ResendDelay time.Duration + DisableRules bool +} + +// The Manager manages recording and alerting rules. +type Manager struct { + opts *ManagerOptions + tasks map[string]Task + rules map[string]Rule + mtx sync.RWMutex + block chan struct{} + // Notifier sends messages through alert manager + notifier *am.Notifier + + // datastore to store alert definitions + ruleDB RuleDB + + // pause all rule tasks + pause bool + logger log.Logger +} + +func defaultOptions(o *ManagerOptions) *ManagerOptions { + if o.NotifierOpts.QueueCapacity == 0 { + o.NotifierOpts.QueueCapacity = 10000 + } + if o.NotifierOpts.Timeout == 0 { + o.NotifierOpts.Timeout = 10 * time.Second + } + if o.ResendDelay == time.Duration(0) { + o.ResendDelay = 1 * time.Minute + } + return o +} + +// NewManager returns an implementation of Manager, ready to be started +// by calling the Run method. +func NewManager(o *ManagerOptions) (*Manager, error) { + + o = defaultOptions(o) + // here we just initiate notifier, it will be started + // in run() + notifier, err := am.NewNotifier(&o.NotifierOpts, nil) + if err != nil { + // todo(amol): rethink on this, the query service + // should not be down because alert manager is not available + return nil, err + } + + db := newRuleDB(o.DBConn) + + m := &Manager{ + tasks: map[string]Task{}, + rules: map[string]Rule{}, + notifier: notifier, + ruleDB: db, + opts: o, + block: make(chan struct{}), + logger: o.Logger, + } + return m, nil +} + +func (m *Manager) Start() { + if err := m.initiate(); err != nil { + zap.S().Errorf("failed to initialize alerting rules manager: %v", err) + } + m.run() +} + +func (m *Manager) Pause(b bool) { + m.mtx.Lock() + defer m.mtx.Unlock() + for _, t := range m.tasks { + t.Pause(b) + } +} + +func (m *Manager) initiate() error { + storedRules, err := m.ruleDB.GetStoredRules() + if err != nil { + return err + } + if len(storedRules) == 0 { + return nil + } + var loadErrors []error + + for _, rec := range storedRules { + taskName := fmt.Sprintf("%d-groupname", rec.Id) + parsedRule, errs := ParsePostableRule([]byte(rec.Data)) + + if len(errs) > 0 { + if errs[0].Error() == "failed to load json" { + zap.S().Info("failed to load rule in json format, trying yaml now:", rec.Data) + + // see if rule is stored in yaml format + parsedRule, errs = parsePostableRule([]byte(rec.Data), "yaml") + + if parsedRule == nil { + zap.S().Errorf("failed to parse and initialize yaml rule:", errs) + // just one rule is being parsed so expect just one error + loadErrors = append(loadErrors, errs[0]) + continue + } else { + // rule stored in yaml, so migrate it to json + zap.S().Info("msg:", "migrating rule from JSON to yaml", "\t rule:", rec.Data, "\t parsed rule:", parsedRule) + ruleJSON, err := json.Marshal(parsedRule) + if err == nil { + taskName, _, err := m.ruleDB.EditRuleTx(string(ruleJSON), fmt.Sprintf("%d", rec.Id)) + if err != nil { + zap.S().Errorf("msg: failed to migrate rule ", "/t error:", err) + } else { + zap.S().Info("msg:", "migrated rule from yaml to json", "/t rule:", taskName) + } + } + } + } else { + zap.S().Errorf("failed to parse and initialize rule:", errs) + // just one rule is being parsed so expect just one error + loadErrors = append(loadErrors, errs[0]) + continue + } + } + + err := m.addTask(parsedRule, taskName) + if err != nil { + zap.S().Errorf("failed to load the rule definition (%s): %v", taskName, err) + } + } + + return nil +} + +// Run starts processing of the rule manager. +func (m *Manager) run() { + // initiate notifier + go m.notifier.Run() + + // initiate blocked tasks + close(m.block) +} + +// Stop the rule manager's rule evaluation cycles. +func (m *Manager) Stop() { + m.mtx.Lock() + defer m.mtx.Unlock() + + zap.S().Info("msg: ", "Stopping rule manager...") + + for _, t := range m.tasks { + t.Stop() + } + + zap.S().Info("msg: ", "Rule manager stopped") +} + +// EditRuleDefinition writes the rule definition to the +// datastore and also updates the rule executor +func (m *Manager) EditRule(ruleStr string, id string) error { + // todo(amol): fetch recent rule from db first + parsedRule, errs := ParsePostableRule([]byte(ruleStr)) + + if len(errs) > 0 { + zap.S().Errorf("failed to parse rules:", errs) + // just one rule is being parsed so expect just one error + return errs[0] + } + + taskName, _, err := m.ruleDB.EditRuleTx(ruleStr, id) + if err != nil { + return err + } + + if !m.opts.DisableRules { + err = m.editTask(parsedRule, taskName) + if err != nil { + // todo(amol): using tx with sqllite3 is gets + // database locked. need to research and resolve this + //tx.Rollback() + return err + } + } + + // return tx.Commit() + return nil +} + +func (m *Manager) editTask(rule *PostableRule, taskName string) error { + m.mtx.Lock() + defer m.mtx.Unlock() + + newTask, err := m.prepareTask(false, rule, taskName) + + if err != nil { + zap.S().Errorf("msg:", "loading tasks failed", "\t err:", err) + return errors.New("error preparing rule with given parameters, previous rule set restored") + } + + // If there is an old task with the same identifier, stop it and wait for + // it to finish the current iteration. Then copy it into the new group. + oldTask, ok := m.tasks[taskName] + if !ok { + zap.S().Errorf("msg:", "rule task not found, edit task failed", "\t task name:", taskName) + return errors.New("rule task not found, edit task failed") + } + + delete(m.tasks, taskName) + + if ok { + oldTask.Stop() + newTask.CopyState(oldTask) + } + go func() { + // Wait with starting evaluation until the rule manager + // is told to run. This is necessary to avoid running + // queries against a bootstrapping storage. + <-m.block + newTask.Run(m.opts.Context) + }() + + m.tasks[taskName] = newTask + return nil +} + +func (m *Manager) DeleteRule(id string) error { + + idInt, err := strconv.Atoi(id) + if err != nil { + zap.S().Errorf("msg: ", "delete rule received an rule id in invalid format, must be a number", "\t ruleid:", id) + return fmt.Errorf("delete rule received an rule id in invalid format, must be a number") + } + + taskName := prepareTaskName(int64(idInt)) + if !m.opts.DisableRules { + if err := m.deleteTask(taskName); err != nil { + zap.S().Errorf("msg: ", "failed to unload the rule task from memory, please retry", "\t ruleid: ", id) + return err + } + } + + if _, _, err := m.ruleDB.DeleteRuleTx(id); err != nil { + zap.S().Errorf("msg: ", "failed to delete the rule from rule db", "\t ruleid: ", id) + return err + } + + return nil +} + +func (m *Manager) deleteTask(taskName string) error { + m.mtx.Lock() + defer m.mtx.Unlock() + + oldg, ok := m.tasks[taskName] + if ok { + oldg.Stop() + delete(m.tasks, taskName) + delete(m.rules, ruleIdFromTaskName(taskName)) + } else { + zap.S().Errorf("msg:", "rule not found for deletion", "\t name:", taskName) + return fmt.Errorf("rule not found") + } + + return nil +} + +// CreateRule stores rule def into db and also +// starts an executor for the rule +func (m *Manager) CreateRule(ruleStr string) error { + parsedRule, errs := ParsePostableRule([]byte(ruleStr)) + + if len(errs) > 0 { + zap.S().Errorf("failed to parse rules:", errs) + // just one rule is being parsed so expect just one error + return errs[0] + } + + taskName, tx, err := m.ruleDB.CreateRuleTx(ruleStr) + if err != nil { + return err + } + if !m.opts.DisableRules { + if err := m.addTask(parsedRule, taskName); err != nil { + tx.Rollback() + return err + } + } + return tx.Commit() +} + +func (m *Manager) addTask(rule *PostableRule, taskName string) error { + m.mtx.Lock() + defer m.mtx.Unlock() + + newTask, err := m.prepareTask(false, rule, taskName) + + if err != nil { + zap.S().Errorf("msg:", "creating rule task failed", "\t name:", taskName, "\t err", err) + return errors.New("error loading rules, previous rule set restored") + } + + // If there is an another task with the same identifier, raise an error + _, ok := m.tasks[taskName] + if ok { + return fmt.Errorf("a rule with the same name already exists") + } + + go func() { + // Wait with starting evaluation until the rule manager + // is told to run. This is necessary to avoid running + // queries against a bootstrapping storage. + <-m.block + newTask.Run(m.opts.Context) + }() + + m.tasks[taskName] = newTask + return nil +} + +// prepareTask prepares a rule task from postable rule +func (m *Manager) prepareTask(acquireLock bool, r *PostableRule, taskName string) (Task, error) { + + if acquireLock { + m.mtx.Lock() + defer m.mtx.Unlock() + } + + rules := make([]Rule, 0) + var task Task + + if r.Alert == "" { + zap.S().Errorf("msg:", "task load failed, at least one rule must be set", "\t task name:", taskName) + return task, fmt.Errorf("task load failed, at least one rule must be set") + } + + ruleId := ruleIdFromTaskName(taskName) + if r.RuleType == RuleTypeThreshold { + // create a threshold rule + tr, err := NewThresholdRule( + ruleId, + r.Alert, + r.RuleCondition, + time.Duration(r.EvalWindow), + r.Labels, + r.Annotations, + r.Source, + ) + + if err != nil { + return task, err + } + + rules = append(rules, tr) + + // create ch rule task for evalution + task = newTask(TaskTypeCh, taskName, taskNamesuffix, time.Duration(r.Frequency), rules, m.opts, m.prepareNotifyFunc()) + + // add rule to memory + m.rules[ruleId] = tr + + } else if r.RuleType == RuleTypeProm { + + // create promql rule + pr, err := NewPromRule( + ruleId, + r.Alert, + r.RuleCondition, + time.Duration(r.EvalWindow), + r.Labels, + r.Annotations, + // required as promql engine works with logger and not zap + log.With(m.logger, "alert", r.Alert), + r.Source, + ) + + if err != nil { + return task, err + } + + rules = append(rules, pr) + + // create promql rule task for evalution + task = newTask(TaskTypeProm, taskName, taskNamesuffix, time.Duration(r.Frequency), rules, m.opts, m.prepareNotifyFunc()) + + // add rule to memory + m.rules[ruleId] = pr + + } else { + return nil, fmt.Errorf(fmt.Sprintf("unsupported rule type. Supported types: %s, %s", RuleTypeProm, RuleTypeThreshold)) + } + + return task, nil +} + +// RuleTasks returns the list of manager's rule tasks. +func (m *Manager) RuleTasks() []Task { + m.mtx.RLock() + defer m.mtx.RUnlock() + + rgs := make([]Task, 0, len(m.tasks)) + for _, g := range m.tasks { + rgs = append(rgs, g) + } + + sort.Slice(rgs, func(i, j int) bool { + return rgs[i].Name() < rgs[j].Name() + }) + + return rgs +} + +// RuleTasks returns the list of manager's rule tasks. +func (m *Manager) RuleTasksWithoutLock() []Task { + + rgs := make([]Task, 0, len(m.tasks)) + for _, g := range m.tasks { + rgs = append(rgs, g) + } + + sort.Slice(rgs, func(i, j int) bool { + return rgs[i].Name() < rgs[j].Name() + }) + + return rgs +} + +// Rules returns the list of the manager's rules. +func (m *Manager) Rules() []Rule { + m.mtx.RLock() + defer m.mtx.RUnlock() + + rules := []Rule{} + for _, r := range m.rules { + rules = append(rules, r) + } + + return rules +} + +// TriggeredAlerts returns the list of the manager's rules. +func (m *Manager) TriggeredAlerts() []*NamedAlert { + // m.mtx.RLock() + // defer m.mtx.RUnlock() + + namedAlerts := []*NamedAlert{} + + for _, r := range m.rules { + active := r.ActiveAlerts() + + for _, a := range active { + awn := &NamedAlert{ + Alert: a, + Name: r.Name(), + } + namedAlerts = append(namedAlerts, awn) + } + } + + return namedAlerts +} + +// NotifyFunc sends notifications about a set of alerts generated by the given expression. +type NotifyFunc func(ctx context.Context, expr string, alerts ...*Alert) + +// prepareNotifyFunc implements the NotifyFunc for a Notifier. +func (m *Manager) prepareNotifyFunc() NotifyFunc { + return func(ctx context.Context, expr string, alerts ...*Alert) { + var res []*am.Alert + + for _, alert := range alerts { + generatorURL := alert.GeneratorURL + if generatorURL == "" { + generatorURL = m.opts.RepoURL + } + + a := &am.Alert{ + StartsAt: alert.FiredAt, + Labels: alert.Labels, + Annotations: alert.Annotations, + GeneratorURL: generatorURL, + } + if !alert.ResolvedAt.IsZero() { + a.EndsAt = alert.ResolvedAt + } else { + a.EndsAt = alert.ValidUntil + } + res = append(res, a) + } + + if len(alerts) > 0 { + m.notifier.Send(res...) + } + } +} + +func (m *Manager) ListActiveRules() ([]Rule, error) { + ruleList := []Rule{} + + for _, r := range m.rules { + ruleList = append(ruleList, r) + } + + return ruleList, nil +} + +func (m *Manager) ListRuleStates() (*GettableRules, error) { + + // fetch rules from DB + storedRules, err := m.ruleDB.GetStoredRules() + + // initiate response object + resp := make([]*GettableRule, 0) + + for _, s := range storedRules { + + ruleResponse := &GettableRule{} + if err := json.Unmarshal([]byte(s.Data), ruleResponse); err != nil { // Parse []byte to go struct pointer + zap.S().Errorf("msg:", "invalid rule data", "\t err:", err) + continue + } + + ruleResponse.Id = fmt.Sprintf("%d", s.Id) + + // fetch state of rule from memory + if rm, ok := m.rules[ruleResponse.Id]; !ok { + zap.S().Warnf("msg:", "invalid rule id found while fetching list of rules", "\t err:", err, "\t rule_id:", ruleResponse.Id) + } else { + ruleResponse.State = rm.State().String() + } + resp = append(resp, ruleResponse) + } + + return &GettableRules{Rules: resp}, nil +} + +func (m *Manager) GetRule(id string) (*GettableRule, error) { + s, err := m.ruleDB.GetStoredRule(id) + if err != nil { + return nil, err + } + r := &GettableRule{} + if err := json.Unmarshal([]byte(s.Data), r); err != nil { + return nil, err + } + r.Id = fmt.Sprintf("%d", s.Id) + return r, nil +} diff --git a/pkg/query-service/rules/manager_test.go b/pkg/query-service/rules/manager_test.go new file mode 100644 index 0000000000..e7b059dda9 --- /dev/null +++ b/pkg/query-service/rules/manager_test.go @@ -0,0 +1,155 @@ +package rules + +import ( + "context" + "fmt" + "os" + "os/signal" + "syscall" + + "github.com/jmoiron/sqlx" + _ "github.com/mattn/go-sqlite3" + "go.signoz.io/query-service/app/clickhouseReader" + am "go.signoz.io/query-service/integrations/alertManager" + "go.signoz.io/query-service/model" + pqle "go.signoz.io/query-service/pqlEngine" + "go.signoz.io/query-service/utils/value" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "net/url" + "testing" + "time" +) + +func initZapLog() *zap.Logger { + config := zap.NewDevelopmentConfig() + config.EncoderConfig.EncodeLevel = zapcore.CapitalColorLevelEncoder + config.EncoderConfig.TimeKey = "timestamp" + config.EncoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder + logger, _ := config.Build() + return logger +} + +func TestRules(t *testing.T) { + fmt.Println("starting test TestRules..") + loggerMgr := initZapLog() + zap.ReplaceGlobals(loggerMgr) + defer loggerMgr.Sync() // flushes buffer, if any + + logger := loggerMgr.Sugar() + + configFile := "../config/prometheus.yml" + // create engine + pqle, err := pqle.FromConfigPath(configFile) + if err != nil { + fmt.Println("failed to create pql:", err) + t.Errorf("failed to create pql engine : %v", err) + } + + // create db conn + db, err := sqlx.Open("sqlite3", "../signoz.db") + if err != nil { + fmt.Println("failed to create db conn:", err) + t.Errorf("failed to create db conn: %v", err) + } + + // create ch reader + ch := clickhouseReader.NewReader(db, configFile) + + // notifier opts + notifierOpts := am.NotifierOptions{ + QueueCapacity: 10000, + Timeout: 1 * time.Second, + AlertManagerURLs: []string{"http://localhost:9093/api/"}, + } + + externalURL, _ := url.Parse("http://signoz.io") + + // create manager opts + managerOpts := &ManagerOptions{ + NotifierOpts: notifierOpts, + Queriers: &Queriers{ + PqlEngine: pqle, + Ch: ch, + }, + ExternalURL: externalURL, + Conn: db, + Context: context.Background(), + Logger: nil, + } + + // create Manager + manager, err := NewManager(managerOpts) + if err != nil { + fmt.Println("manager error:", err) + t.Errorf("manager error: %v", err) + } + fmt.Println("manager is ready:", manager) + + manager.run() + + // test rules + // create promql rule + /* promql rule + postableRule := PostableRule{ + Alert: "test alert 1 - promql", + RuleType: RuleTypeProm, + EvalWindow: 5 * time.Minute, + Frequency: 30 * time.Second, + RuleCondition: RuleCondition{ + CompositeMetricQuery: &model.CompositeMetricQuery{ + QueryType: model.PROM, + PromQueries: map[string]*model.PromQuery{ + "A": &model.PromQuery{Query: `sum(signoz_latency_count{span_kind="SPAN_KIND_SERVER"}) by (service_name) > 100`}, + }, + }, + }, + Labels: map[string]string{}, + Annotations: map[string]string{}, + }*/ + // create builder rule + metricQuery := &model.MetricQuery{ + QueryName: "A", + MetricName: "signoz_latency_count", + TagFilters: &model.FilterSet{Operation: "AND", Items: []model.FilterItem{ + {Key: "span_kind", Value: "SPAN_KIND_SERVER", Operation: "neq"}, + }}, + GroupingTags: []string{"service_name"}, + AggregateOperator: model.RATE_SUM, + Expression: "A", + } + + postableRule := PostableRule{ + Alert: "test alert 2 - builder", + RuleType: RuleTypeThreshold, + EvalWindow: 5 * time.Minute, + Frequency: 30 * time.Second, + RuleCondition: RuleCondition{ + Target: value.Float64(500), + CompareOp: TargetIsMore, + CompositeMetricQuery: &model.CompositeMetricQuery{ + QueryType: model.QUERY_BUILDER, + BuilderQueries: map[string]*model.MetricQuery{ + "A": metricQuery, + }, + }, + }, + Labels: map[string]string{"host": "server1"}, + Annotations: map[string]string{}, + } + err = manager.addTask(&postableRule, postableRule.Alert) + if err != nil { + fmt.Println("failed to add rule: ", err) + t.Errorf("failed to add rule") + } + + signalsChannel := make(chan os.Signal, 1) + signal.Notify(signalsChannel, os.Interrupt, syscall.SIGTERM) + + for { + select { + case <-signalsChannel: + logger.Fatal("Received OS Interrupt Signal ... ") + } + } +} diff --git a/pkg/query-service/rules/promRule.go b/pkg/query-service/rules/promRule.go new file mode 100644 index 0000000000..669d6e3845 --- /dev/null +++ b/pkg/query-service/rules/promRule.go @@ -0,0 +1,445 @@ +package rules + +import ( + "context" + "fmt" + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "go.uber.org/zap" + "sync" + "time" + + plabels "github.com/prometheus/prometheus/pkg/labels" + pql "github.com/prometheus/prometheus/promql" + "go.signoz.io/query-service/model" + qslabels "go.signoz.io/query-service/utils/labels" + "go.signoz.io/query-service/utils/times" + "go.signoz.io/query-service/utils/timestamp" + yaml "gopkg.in/yaml.v2" +) + +type PromRule struct { + id string + name string + source string + ruleCondition *RuleCondition + + evalWindow time.Duration + holdDuration time.Duration + labels plabels.Labels + annotations plabels.Labels + + mtx sync.Mutex + evaluationDuration time.Duration + evaluationTimestamp time.Time + + health RuleHealth + + lastError error + + // map of active alerts + active map[uint64]*Alert + + logger log.Logger +} + +func NewPromRule( + id string, + name string, + ruleCondition *RuleCondition, + evalWindow time.Duration, + labels, annotations map[string]string, + logger log.Logger, + source string, +) (*PromRule, error) { + + if int64(evalWindow) == 0 { + evalWindow = 5 * time.Minute + } + + if ruleCondition == nil { + return nil, fmt.Errorf("no rule condition") + } else if !ruleCondition.IsValid() { + return nil, fmt.Errorf("invalid rule condition") + } + + zap.S().Info("msg:", "creating new alerting rule", "\t name:", name, "\t condition:", ruleCondition.String()) + + return &PromRule{ + id: id, + name: name, + source: source, + ruleCondition: ruleCondition, + evalWindow: evalWindow, + labels: plabels.FromMap(labels), + annotations: plabels.FromMap(annotations), + health: HealthUnknown, + active: map[uint64]*Alert{}, + logger: logger, + }, nil +} + +func (r *PromRule) Name() string { + return r.name +} + +func (r *PromRule) ID() string { + return r.id +} + +func (r *PromRule) Condition() *RuleCondition { + return r.ruleCondition +} + +func (r *PromRule) Type() RuleType { + return RuleTypeProm +} + +func (r *PromRule) GeneratorURL() string { + return r.source +} + +func (r *PromRule) SetLastError(err error) { + r.mtx.Lock() + defer r.mtx.Unlock() + r.lastError = err +} + +func (r *PromRule) LastError() error { + r.mtx.Lock() + defer r.mtx.Unlock() + return r.lastError +} + +func (r *PromRule) SetHealth(health RuleHealth) { + r.mtx.Lock() + defer r.mtx.Unlock() + r.health = health +} + +func (r *PromRule) Health() RuleHealth { + r.mtx.Lock() + defer r.mtx.Unlock() + return r.health +} + +// SetEvaluationDuration updates evaluationDuration to the duration it took to evaluate the rule on its last evaluation. +func (r *PromRule) SetEvaluationDuration(dur time.Duration) { + r.mtx.Lock() + defer r.mtx.Unlock() + r.evaluationDuration = dur +} + +func (r *PromRule) HoldDuration() time.Duration { + return r.holdDuration +} + +func (r *PromRule) EvalWindow() time.Duration { + return r.evalWindow +} + +// Labels returns the labels of the alerting rule. +func (r *PromRule) Labels() qslabels.BaseLabels { + return r.labels +} + +// Annotations returns the annotations of the alerting rule. +func (r *PromRule) Annotations() qslabels.BaseLabels { + return r.annotations +} + +func (r *PromRule) sample(alert *Alert, ts time.Time) pql.Sample { + lb := plabels.NewBuilder(r.labels) + + alertLabels := alert.Labels.(plabels.Labels) + for _, l := range alertLabels { + lb.Set(l.Name, l.Value) + } + + lb.Set(qslabels.MetricNameLabel, alertMetricName) + lb.Set(qslabels.AlertNameLabel, r.name) + lb.Set(qslabels.AlertStateLabel, alert.State.String()) + + s := pql.Sample{ + Metric: lb.Labels(), + Point: pql.Point{T: timestamp.FromTime(ts), V: 1}, + } + return s +} + +// forStateSample returns the sample for ALERTS_FOR_STATE. +func (r *PromRule) forStateSample(alert *Alert, ts time.Time, v float64) pql.Sample { + lb := plabels.NewBuilder(r.labels) + alertLabels := alert.Labels.(plabels.Labels) + for _, l := range alertLabels { + lb.Set(l.Name, l.Value) + } + + lb.Set(plabels.MetricName, alertForStateMetricName) + lb.Set(plabels.AlertName, r.name) + + s := pql.Sample{ + Metric: lb.Labels(), + Point: pql.Point{T: timestamp.FromTime(ts), V: v}, + } + return s +} + +// GetEvaluationDuration returns the time in seconds it took to evaluate the alerting rule. +func (r *PromRule) GetEvaluationDuration() time.Duration { + r.mtx.Lock() + defer r.mtx.Unlock() + return r.evaluationDuration +} + +// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule was last evaluated. +func (r *PromRule) SetEvaluationTimestamp(ts time.Time) { + r.mtx.Lock() + defer r.mtx.Unlock() + r.evaluationTimestamp = ts +} + +// GetEvaluationTimestamp returns the time the evaluation took place. +func (r *PromRule) GetEvaluationTimestamp() time.Time { + r.mtx.Lock() + defer r.mtx.Unlock() + return r.evaluationTimestamp +} + +// State returns the maximum state of alert instances for this rule. +// StateFiring > StatePending > StateInactive +func (r *PromRule) State() AlertState { + r.mtx.Lock() + defer r.mtx.Unlock() + + maxState := StateInactive + for _, a := range r.active { + if a.State > maxState { + maxState = a.State + } + } + return maxState +} + +func (r *PromRule) currentAlerts() []*Alert { + r.mtx.Lock() + defer r.mtx.Unlock() + + alerts := make([]*Alert, 0, len(r.active)) + + for _, a := range r.active { + anew := *a + alerts = append(alerts, &anew) + } + return alerts +} + +func (r *PromRule) ActiveAlerts() []*Alert { + var res []*Alert + for _, a := range r.currentAlerts() { + if a.ResolvedAt.IsZero() { + res = append(res, a) + } + } + return res +} + +// ForEachActiveAlert runs the given function on each alert. +// This should be used when you want to use the actual alerts from the ThresholdRule +// and not on its copy. +// If you want to run on a copy of alerts then don't use this, get the alerts from 'ActiveAlerts()'. +func (r *PromRule) ForEachActiveAlert(f func(*Alert)) { + r.mtx.Lock() + defer r.mtx.Unlock() + + for _, a := range r.active { + f(a) + } +} + +func (r *PromRule) SendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, interval time.Duration, notifyFunc NotifyFunc) { + alerts := []*Alert{} + r.ForEachActiveAlert(func(alert *Alert) { + if alert.needsSending(ts, resendDelay) { + alert.LastSentAt = ts + // Allow for two Eval or Alertmanager send failures. + delta := resendDelay + if interval > resendDelay { + delta = interval + } + alert.ValidUntil = ts.Add(4 * delta) + anew := *alert + alerts = append(alerts, &anew) + } + }) + notifyFunc(ctx, "", alerts...) +} + +func (r *PromRule) getPqlQuery() (string, error) { + + if r.ruleCondition.CompositeMetricQuery.QueryType == model.PROM { + if len(r.ruleCondition.CompositeMetricQuery.PromQueries) > 0 { + if promQuery, ok := r.ruleCondition.CompositeMetricQuery.PromQueries["A"]; ok { + query := promQuery.Query + if query == "" { + return query, fmt.Errorf("a promquery needs to be set for this rule to function") + } + + if r.ruleCondition.Target != nil && r.ruleCondition.CompareOp != CompareOpNone { + query = fmt.Sprintf("%s %s %f", query, ResolveCompareOp(r.ruleCondition.CompareOp), *r.ruleCondition.Target) + return query, nil + } else { + return query, nil + } + } + } + } + + return "", fmt.Errorf("invalid promql rule query") +} + +func (r *PromRule) Eval(ctx context.Context, ts time.Time, queriers *Queriers) (interface{}, error) { + + q, err := r.getPqlQuery() + if err != nil { + return nil, err + } + zap.S().Info("rule:", r.Name(), "\t evaluating promql query: ", q) + res, err := queriers.PqlEngine.RunAlertQuery(ctx, q, ts) + if err != nil { + r.SetHealth(HealthBad) + r.SetLastError(err) + return nil, err + } + + r.mtx.Lock() + defer r.mtx.Unlock() + + resultFPs := map[uint64]struct{}{} + var vec pql.Vector + var alerts = make(map[uint64]*Alert, len(res)) + + for _, smpl := range res { + l := make(map[string]string, len(smpl.Metric)) + for _, lbl := range smpl.Metric { + l[lbl.Name] = lbl.Value + } + + tmplData := AlertTemplateData(l, smpl.V) + // Inject some convenience variables that are easier to remember for users + // who are not used to Go's templating system. + defs := "{{$labels := .Labels}}{{$value := .Value}}" + + expand := func(text string) string { + + tmpl := NewTemplateExpander( + ctx, + defs+text, + "__alert_"+r.Name(), + tmplData, + times.Time(timestamp.FromTime(ts)), + nil, + ) + result, err := tmpl.Expand() + if err != nil { + result = fmt.Sprintf("", err) + level.Warn(r.logger).Log("msg", "Expanding alert template failed", "err", err, "data", tmplData) + } + return result + } + + lb := plabels.NewBuilder(smpl.Metric).Del(plabels.MetricName) + + for _, l := range r.labels { + lb.Set(l.Name, expand(l.Value)) + } + lb.Set(qslabels.AlertNameLabel, r.Name()) + lb.Set(qslabels.AlertRuleIdLabel, r.ID()) + lb.Set(qslabels.RuleSourceLabel, r.GeneratorURL()) + + annotations := make(plabels.Labels, 0, len(r.annotations)) + for _, a := range r.annotations { + annotations = append(annotations, plabels.Label{Name: a.Name, Value: expand(a.Value)}) + } + + lbs := lb.Labels() + h := lbs.Hash() + resultFPs[h] = struct{}{} + + if _, ok := alerts[h]; ok { + err = fmt.Errorf("vector contains metrics with the same labelset after applying alert labels") + // We have already acquired the lock above hence using SetHealth and + // SetLastError will deadlock. + r.health = HealthBad + r.lastError = err + return nil, err + } + + alerts[h] = &Alert{ + Labels: lbs, + Annotations: annotations, + ActiveAt: ts, + State: StatePending, + Value: smpl.V, + GeneratorURL: r.GeneratorURL(), + } + } + + // alerts[h] is ready, add or update active list now + for h, a := range alerts { + // Check whether we already have alerting state for the identifying label set. + // Update the last value and annotations if so, create a new alert entry otherwise. + if alert, ok := r.active[h]; ok && alert.State != StateInactive { + alert.Value = a.Value + alert.Annotations = a.Annotations + continue + } + + r.active[h] = a + + } + + // Check if any pending alerts should be removed or fire now. Write out alert timeseries. + for fp, a := range r.active { + if _, ok := resultFPs[fp]; !ok { + // If the alert was previously firing, keep it around for a given + // retention time so it is reported as resolved to the AlertManager. + if a.State == StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > resolvedRetention) { + delete(r.active, fp) + } + if a.State != StateInactive { + a.State = StateInactive + a.ResolvedAt = ts + } + continue + } + + if a.State == StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration { + a.State = StateFiring + a.FiredAt = ts + } + + } + r.health = HealthGood + r.lastError = err + return vec, nil + +} + +func (r *PromRule) String() string { + + ar := PostableRule{ + Alert: r.name, + RuleCondition: r.ruleCondition, + EvalWindow: Duration(r.evalWindow), + Labels: r.labels.Map(), + Annotations: r.annotations.Map(), + } + + byt, err := yaml.Marshal(ar) + if err != nil { + return fmt.Sprintf("error marshaling alerting rule: %s", err.Error()) + } + + return string(byt) +} diff --git a/pkg/query-service/rules/promRuleTask.go b/pkg/query-service/rules/promRuleTask.go new file mode 100644 index 0000000000..c06d3e1135 --- /dev/null +++ b/pkg/query-service/rules/promRuleTask.go @@ -0,0 +1,370 @@ +package rules + +import ( + "context" + "fmt" + "github.com/go-kit/log" + opentracing "github.com/opentracing/opentracing-go" + plabels "github.com/prometheus/prometheus/pkg/labels" + pql "github.com/prometheus/prometheus/promql" + "go.uber.org/zap" + "sort" + "sync" + "time" +) + +// PromRuleTask is a promql rule executor +type PromRuleTask struct { + name string + file string + frequency time.Duration + rules []Rule + seriesInPreviousEval []map[string]plabels.Labels // One per Rule. + staleSeries []plabels.Labels + opts *ManagerOptions + mtx sync.Mutex + evaluationDuration time.Duration + evaluationTime time.Duration + lastEvaluation time.Time + + markStale bool + done chan struct{} + terminated chan struct{} + managerDone chan struct{} + + pause bool + logger log.Logger + notify NotifyFunc +} + +// newPromRuleTask holds rules that have promql condition +// and evalutes the rule at a given frequency +func newPromRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) *PromRuleTask { + zap.S().Info("Initiating a new rule group:", name, "\t frequency:", frequency) + + if time.Now() == time.Now().Add(frequency) { + frequency = DefaultFrequency + } + + return &PromRuleTask{ + name: name, + file: file, + pause: false, + frequency: frequency, + rules: rules, + opts: opts, + seriesInPreviousEval: make([]map[string]plabels.Labels, len(rules)), + done: make(chan struct{}), + terminated: make(chan struct{}), + notify: notify, + logger: log.With(opts.Logger, "group", name), + } +} + +// Name returns the group name. +func (g *PromRuleTask) Name() string { return g.name } + +// Key returns the group key +func (g *PromRuleTask) Key() string { + return g.name + ";" + g.file +} + +func (g *PromRuleTask) Type() TaskType { return TaskTypeProm } + +// Rules returns the group's rules. +func (g *PromRuleTask) Rules() []Rule { return g.rules } + +// Interval returns the group's interval. +func (g *PromRuleTask) Interval() time.Duration { return g.frequency } + +func (g *PromRuleTask) Pause(b bool) { + g.mtx.Lock() + defer g.mtx.Unlock() + g.pause = b +} + +func (g *PromRuleTask) Run(ctx context.Context) { + defer close(g.terminated) + + // Wait an initial amount to have consistently slotted intervals. + evalTimestamp := g.EvalTimestamp(time.Now().UnixNano()).Add(g.frequency) + select { + case <-time.After(time.Until(evalTimestamp)): + case <-g.done: + return + } + + ctx = NewQueryOriginContext(ctx, map[string]interface{}{ + "ruleGroup": map[string]string{ + "name": g.Name(), + }, + }) + + iter := func() { + + start := time.Now() + g.Eval(ctx, evalTimestamp) + timeSinceStart := time.Since(start) + + g.setEvaluationTime(timeSinceStart) + g.setLastEvaluation(start) + } + + // The assumption here is that since the ticker was started after having + // waited for `evalTimestamp` to pass, the ticks will trigger soon + // after each `evalTimestamp + N * g.frequency` occurrence. + tick := time.NewTicker(g.frequency) + defer tick.Stop() + + // defer cleanup + defer func() { + if !g.markStale { + return + } + go func(now time.Time) { + for _, rule := range g.seriesInPreviousEval { + for _, r := range rule { + g.staleSeries = append(g.staleSeries, r) + } + } + // That can be garbage collected at this point. + g.seriesInPreviousEval = nil + + }(time.Now()) + + }() + + iter() + + // let the group iterate and run + for { + select { + case <-g.done: + return + default: + select { + case <-g.done: + return + case <-tick.C: + missed := (time.Since(evalTimestamp) / g.frequency) - 1 + evalTimestamp = evalTimestamp.Add((missed + 1) * g.frequency) + iter() + } + } + } +} + +func (g *PromRuleTask) Stop() { + close(g.done) + <-g.terminated +} + +func (g *PromRuleTask) hash() uint64 { + l := plabels.New( + plabels.Label{Name: "name", Value: g.name}, + ) + return l.Hash() +} + +// PromRules returns the list of the group's promql rules. +func (g *PromRuleTask) PromRules() []*PromRule { + g.mtx.Lock() + defer g.mtx.Unlock() + var alerts []*PromRule + for _, rule := range g.rules { + if tr, ok := rule.(*PromRule); ok { + alerts = append(alerts, tr) + } + } + sort.Slice(alerts, func(i, j int) bool { + return alerts[i].State() > alerts[j].State() || + (alerts[i].State() == alerts[j].State() && + alerts[i].Name() < alerts[j].Name()) + }) + return alerts +} + +// HasAlertingRules returns true if the group contains at least one AlertingRule. +func (g *PromRuleTask) HasAlertingRules() bool { + g.mtx.Lock() + defer g.mtx.Unlock() + + for _, rule := range g.rules { + if _, ok := rule.(*ThresholdRule); ok { + return true + } + } + return false +} + +// GetEvaluationDuration returns the time in seconds it took to evaluate the rule group. +func (g *PromRuleTask) GetEvaluationDuration() time.Duration { + g.mtx.Lock() + defer g.mtx.Unlock() + return g.evaluationDuration +} + +// SetEvaluationDuration sets the time in seconds the last evaluation took. +func (g *PromRuleTask) SetEvaluationDuration(dur time.Duration) { + g.mtx.Lock() + defer g.mtx.Unlock() + g.evaluationDuration = dur +} + +// GetEvaluationTime returns the time in seconds it took to evaluate the rule group. +func (g *PromRuleTask) GetEvaluationTime() time.Duration { + g.mtx.Lock() + defer g.mtx.Unlock() + return g.evaluationTime +} + +// setEvaluationTime sets the time in seconds the last evaluation took. +func (g *PromRuleTask) setEvaluationTime(dur time.Duration) { + g.mtx.Lock() + defer g.mtx.Unlock() + g.evaluationTime = dur +} + +// GetLastEvaluation returns the time the last evaluation of the rule group took place. +func (g *PromRuleTask) GetLastEvaluation() time.Time { + g.mtx.Lock() + defer g.mtx.Unlock() + return g.lastEvaluation +} + +// setLastEvaluation updates evaluationTimestamp to the timestamp of when the rule group was last evaluated. +func (g *PromRuleTask) setLastEvaluation(ts time.Time) { + g.mtx.Lock() + defer g.mtx.Unlock() + g.lastEvaluation = ts +} + +// EvalTimestamp returns the immediately preceding consistently slotted evaluation time. +func (g *PromRuleTask) EvalTimestamp(startTime int64) time.Time { + var ( + offset = int64(g.hash() % uint64(g.frequency)) + adjNow = startTime - offset + base = adjNow - (adjNow % int64(g.frequency)) + ) + + return time.Unix(0, base+offset).UTC() +} + +// CopyState copies the alerting rule and staleness related state from the given group. +// +// Rules are matched based on their name and labels. If there are duplicates, the +// first is matched with the first, second with the second etc. +func (g *PromRuleTask) CopyState(fromTask Task) error { + + from, ok := fromTask.(*PromRuleTask) + if !ok { + return fmt.Errorf("you can only copy rule groups with same type") + } + + g.evaluationTime = from.evaluationTime + g.lastEvaluation = from.lastEvaluation + + ruleMap := make(map[string][]int, len(from.rules)) + + for fi, fromRule := range from.rules { + nameAndLabels := nameAndLabels(fromRule) + l := ruleMap[nameAndLabels] + ruleMap[nameAndLabels] = append(l, fi) + } + + for i, rule := range g.rules { + nameAndLabels := nameAndLabels(rule) + indexes := ruleMap[nameAndLabels] + if len(indexes) == 0 { + continue + } + fi := indexes[0] + g.seriesInPreviousEval[i] = from.seriesInPreviousEval[fi] + ruleMap[nameAndLabels] = indexes[1:] + + ar, ok := rule.(*ThresholdRule) + if !ok { + continue + } + far, ok := from.rules[fi].(*ThresholdRule) + if !ok { + continue + } + + for fp, a := range far.active { + ar.active[fp] = a + } + } + + // Handle deleted and unmatched duplicate rules. + g.staleSeries = from.staleSeries + for fi, fromRule := range from.rules { + nameAndLabels := nameAndLabels(fromRule) + l := ruleMap[nameAndLabels] + if len(l) != 0 { + for _, series := range from.seriesInPreviousEval[fi] { + g.staleSeries = append(g.staleSeries, series) + } + } + } + return nil +} + +// Eval runs a single evaluation cycle in which all rules are evaluated sequentially. +func (g *PromRuleTask) Eval(ctx context.Context, ts time.Time) { + zap.S().Info("promql rule task:", g.name, "\t eval started at:", ts) + var samplesTotal float64 + for i, rule := range g.rules { + if rule == nil { + continue + } + select { + case <-g.done: + return + default: + } + + func(i int, rule Rule) { + sp, ctx := opentracing.StartSpanFromContext(ctx, "rule") + + sp.SetTag("name", rule.Name()) + defer func(t time.Time) { + sp.Finish() + + since := time.Since(t) + rule.SetEvaluationDuration(since) + rule.SetEvaluationTimestamp(t) + }(time.Now()) + + data, err := rule.Eval(ctx, ts, g.opts.Queriers) + if err != nil { + rule.SetHealth(HealthBad) + rule.SetLastError(err) + + zap.S().Warn("msg", "Evaluating rule failed", "rule", rule, "err", err) + + // Canceled queries are intentional termination of queries. This normally + // happens on shutdown and thus we skip logging of any errors here. + //! if _, ok := err.(promql.ErrQueryCanceled); !ok { + // level.Warn(g.logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err) + //} + return + } + vector := data.(pql.Vector) + samplesTotal += float64(len(vector)) + + rule.SendAlerts(ctx, ts, g.opts.ResendDelay, g.frequency, g.notify) + + seriesReturned := make(map[string]plabels.Labels, len(g.seriesInPreviousEval[i])) + + defer func() { + g.seriesInPreviousEval[i] = seriesReturned + }() + + for _, s := range vector { + seriesReturned[s.Metric.String()] = s.Metric + } + + }(i, rule) + } +} diff --git a/pkg/query-service/rules/queriers.go b/pkg/query-service/rules/queriers.go new file mode 100644 index 0000000000..c2444cff7a --- /dev/null +++ b/pkg/query-service/rules/queriers.go @@ -0,0 +1,21 @@ +package rules + +import ( + "github.com/ClickHouse/clickhouse-go/v2" + pqle "go.signoz.io/query-service/pqlEngine" +) + +// Queriers register the options for querying metrics or event sources +// which return a condition that results in a alert. Currently we support +// promql engine and clickhouse queries but in future we may include +// api readers for Machine Learning (ML) use cases. +// Note: each rule will pick up the querier it is interested in +// and use it. This allows rules to have flexibility in choosing +// the query engines. +type Queriers struct { + // promql engine + PqlEngine *pqle.PqlEngine + + // metric querier + Ch clickhouse.Conn +} diff --git a/pkg/query-service/rules/resultTypes.go b/pkg/query-service/rules/resultTypes.go new file mode 100644 index 0000000000..9a36a9759f --- /dev/null +++ b/pkg/query-service/rules/resultTypes.go @@ -0,0 +1,50 @@ +package rules + +import ( + "encoding/json" + "fmt" + "strconv" + + "go.signoz.io/query-service/utils/labels" +) + +// common result format of query + +type Vector []Sample + +type Sample struct { + Point + + Metric labels.Labels +} + +func (s Sample) String() string { + return fmt.Sprintf("%s => %s", s.Metric, s.Point) +} + +func (s Sample) MarshalJSON() ([]byte, error) { + v := struct { + M labels.Labels `json:"metric"` + V Point `json:"value"` + }{ + M: s.Metric, + V: s.Point, + } + return json.Marshal(v) +} + +type Point struct { + T int64 + V float64 +} + +func (p Point) String() string { + v := strconv.FormatFloat(p.V, 'f', -1, 64) + return fmt.Sprintf("%v @[%v]", v, p.T) +} + +// MarshalJSON implements json.Marshaler. +func (p Point) MarshalJSON() ([]byte, error) { + v := strconv.FormatFloat(p.V, 'f', -1, 64) + return json.Marshal([...]interface{}{float64(p.T) / 1000, v}) +} diff --git a/pkg/query-service/rules/rule.go b/pkg/query-service/rules/rule.go new file mode 100644 index 0000000000..ba5c934172 --- /dev/null +++ b/pkg/query-service/rules/rule.go @@ -0,0 +1,35 @@ +package rules + +import ( + "context" + "go.signoz.io/query-service/utils/labels" + "time" +) + +// A Rule encapsulates a vector expression which is evaluated at a specified +// interval and acted upon (currently used for alerting). +type Rule interface { + ID() string + Name() string + Type() RuleType + + Labels() labels.BaseLabels + Annotations() labels.BaseLabels + Condition() *RuleCondition + State() AlertState + ActiveAlerts() []*Alert + + Eval(context.Context, time.Time, *Queriers) (interface{}, error) + String() string + // Query() string + SetLastError(error) + LastError() error + SetHealth(RuleHealth) + Health() RuleHealth + SetEvaluationDuration(time.Duration) + GetEvaluationDuration() time.Duration + SetEvaluationTimestamp(time.Time) + GetEvaluationTimestamp() time.Time + + SendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, interval time.Duration, notifyFunc NotifyFunc) +} diff --git a/pkg/query-service/rules/ruleTask.go b/pkg/query-service/rules/ruleTask.go new file mode 100644 index 0000000000..59b25f05e0 --- /dev/null +++ b/pkg/query-service/rules/ruleTask.go @@ -0,0 +1,385 @@ +package rules + +import ( + "context" + "fmt" + opentracing "github.com/opentracing/opentracing-go" + "go.signoz.io/query-service/utils/labels" + "go.uber.org/zap" + "sort" + "sync" + "time" +) + +// RuleTask holds a rule (with composite queries) +// and evaluates the rule at a given frequency +type RuleTask struct { + name string + file string + frequency time.Duration + rules []Rule + seriesInPreviousEval []map[string]labels.Labels // One per Rule. + staleSeries []labels.Labels + opts *ManagerOptions + mtx sync.Mutex + evaluationDuration time.Duration + evaluationTime time.Duration + lastEvaluation time.Time + + markStale bool + done chan struct{} + terminated chan struct{} + managerDone chan struct{} + + pause bool + notify NotifyFunc +} + +const DefaultFrequency = 1 * time.Minute + +// newRuleTask makes a new RuleTask with the given name, options, and rules. +func newRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) *RuleTask { + + if time.Now() == time.Now().Add(frequency) { + frequency = DefaultFrequency + } + zap.S().Info("msg:", "initiating a new rule task", "\t name:", name, "\t frequency:", frequency) + + return &RuleTask{ + name: name, + file: file, + pause: false, + frequency: frequency, + rules: rules, + opts: opts, + seriesInPreviousEval: make([]map[string]labels.Labels, len(rules)), + done: make(chan struct{}), + terminated: make(chan struct{}), + notify: notify, + } +} + +// Name returns the group name. +func (g *RuleTask) Name() string { return g.name } + +// Key returns the group key +func (g *RuleTask) Key() string { + return g.name + ";" + g.file +} + +// Name returns the group name. +func (g *RuleTask) Type() TaskType { return TaskTypeCh } + +// Rules returns the group's rules. +func (g *RuleTask) Rules() []Rule { return g.rules } + +// Interval returns the group's interval. +func (g *RuleTask) Interval() time.Duration { return g.frequency } + +func (g *RuleTask) Pause(b bool) { + g.mtx.Lock() + defer g.mtx.Unlock() + g.pause = b +} + +type QueryOrigin struct{} + +func NewQueryOriginContext(ctx context.Context, data map[string]interface{}) context.Context { + return context.WithValue(ctx, QueryOrigin{}, data) +} + +func (g *RuleTask) Run(ctx context.Context) { + defer close(g.terminated) + + // Wait an initial amount to have consistently slotted intervals. + evalTimestamp := g.EvalTimestamp(time.Now().UnixNano()).Add(g.frequency) + zap.S().Debugf("group:", g.name, "\t group run to begin at: ", evalTimestamp) + select { + case <-time.After(time.Until(evalTimestamp)): + case <-g.done: + return + } + + ctx = NewQueryOriginContext(ctx, map[string]interface{}{ + "ruleRuleTask": map[string]string{ + "name": g.Name(), + }, + }) + + iter := func() { + if g.pause { + // todo(amol): remove in memory active alerts + // and last series state + return + } + start := time.Now() + g.Eval(ctx, evalTimestamp) + timeSinceStart := time.Since(start) + + g.setEvaluationTime(timeSinceStart) + g.setLastEvaluation(start) + } + + // The assumption here is that since the ticker was started after having + // waited for `evalTimestamp` to pass, the ticks will trigger soon + // after each `evalTimestamp + N * g.frequency` occurrence. + tick := time.NewTicker(g.frequency) + defer tick.Stop() + + // defer cleanup + defer func() { + if !g.markStale { + return + } + go func(now time.Time) { + for _, rule := range g.seriesInPreviousEval { + for _, r := range rule { + g.staleSeries = append(g.staleSeries, r) + } + } + // That can be garbage collected at this point. + g.seriesInPreviousEval = nil + + }(time.Now()) + + }() + + iter() + + // let the group iterate and run + for { + select { + case <-g.done: + return + default: + select { + case <-g.done: + return + case <-tick.C: + missed := (time.Since(evalTimestamp) / g.frequency) - 1 + evalTimestamp = evalTimestamp.Add((missed + 1) * g.frequency) + iter() + } + } + } +} + +func (g *RuleTask) Stop() { + close(g.done) + <-g.terminated +} + +func (g *RuleTask) hash() uint64 { + l := labels.New( + labels.Label{Name: "name", Value: g.name}, + ) + return l.Hash() +} + +// ThresholdRules returns the list of the group's threshold rules. +func (g *RuleTask) ThresholdRules() []*ThresholdRule { + g.mtx.Lock() + defer g.mtx.Unlock() + var alerts []*ThresholdRule + for _, rule := range g.rules { + if tr, ok := rule.(*ThresholdRule); ok { + alerts = append(alerts, tr) + } + } + sort.Slice(alerts, func(i, j int) bool { + return alerts[i].State() > alerts[j].State() || + (alerts[i].State() == alerts[j].State() && + alerts[i].Name() < alerts[j].Name()) + }) + return alerts +} + +// HasAlertingRules returns true if the group contains at least one AlertingRule. +func (g *RuleTask) HasAlertingRules() bool { + g.mtx.Lock() + defer g.mtx.Unlock() + + for _, rule := range g.rules { + if _, ok := rule.(*ThresholdRule); ok { + return true + } + } + return false +} + +// GetEvaluationDuration returns the time in seconds it took to evaluate the rule group. +func (g *RuleTask) GetEvaluationDuration() time.Duration { + g.mtx.Lock() + defer g.mtx.Unlock() + return g.evaluationDuration +} + +// SetEvaluationDuration sets the time in seconds the last evaluation took. +func (g *RuleTask) SetEvaluationDuration(dur time.Duration) { + g.mtx.Lock() + defer g.mtx.Unlock() + g.evaluationDuration = dur +} + +// GetEvaluationTime returns the time in seconds it took to evaluate the rule group. +func (g *RuleTask) GetEvaluationTime() time.Duration { + g.mtx.Lock() + defer g.mtx.Unlock() + return g.evaluationTime +} + +// setEvaluationTime sets the time in seconds the last evaluation took. +func (g *RuleTask) setEvaluationTime(dur time.Duration) { + g.mtx.Lock() + defer g.mtx.Unlock() + g.evaluationTime = dur +} + +// GetLastEvaluation returns the time the last evaluation of the rule group took place. +func (g *RuleTask) GetLastEvaluation() time.Time { + g.mtx.Lock() + defer g.mtx.Unlock() + return g.lastEvaluation +} + +// setLastEvaluation updates evaluationTimestamp to the timestamp of when the rule group was last evaluated. +func (g *RuleTask) setLastEvaluation(ts time.Time) { + g.mtx.Lock() + defer g.mtx.Unlock() + g.lastEvaluation = ts +} + +// EvalTimestamp returns the immediately preceding consistently slotted evaluation time. +func (g *RuleTask) EvalTimestamp(startTime int64) time.Time { + var ( + offset = int64(g.hash() % uint64(g.frequency)) + adjNow = startTime - offset + base = adjNow - (adjNow % int64(g.frequency)) + ) + + return time.Unix(0, base+offset).UTC() +} + +func nameAndLabels(rule Rule) string { + return rule.Name() + rule.Labels().String() +} + +// CopyState copies the alerting rule and staleness related state from the given group. +// +// Rules are matched based on their name and labels. If there are duplicates, the +// first is matched with the first, second with the second etc. +func (g *RuleTask) CopyState(fromTask Task) error { + + from, ok := fromTask.(*RuleTask) + if !ok { + return fmt.Errorf("invalid from task for copy") + } + g.evaluationTime = from.evaluationTime + g.lastEvaluation = from.lastEvaluation + + ruleMap := make(map[string][]int, len(from.rules)) + + for fi, fromRule := range from.rules { + nameAndLabels := nameAndLabels(fromRule) + l := ruleMap[nameAndLabels] + ruleMap[nameAndLabels] = append(l, fi) + } + + for i, rule := range g.rules { + nameAndLabels := nameAndLabels(rule) + indexes := ruleMap[nameAndLabels] + if len(indexes) == 0 { + continue + } + fi := indexes[0] + g.seriesInPreviousEval[i] = from.seriesInPreviousEval[fi] + ruleMap[nameAndLabels] = indexes[1:] + + // todo(amol): support other rules too here + ar, ok := rule.(*ThresholdRule) + if !ok { + continue + } + far, ok := from.rules[fi].(*ThresholdRule) + if !ok { + continue + } + + for fp, a := range far.active { + ar.active[fp] = a + } + } + + // Handle deleted and unmatched duplicate rules. + // todo(amol): possibly not needed any more + g.staleSeries = from.staleSeries + for fi, fromRule := range from.rules { + nameAndLabels := nameAndLabels(fromRule) + l := ruleMap[nameAndLabels] + if len(l) != 0 { + for _, series := range from.seriesInPreviousEval[fi] { + g.staleSeries = append(g.staleSeries, series) + } + } + } + return nil +} + +// Eval runs a single evaluation cycle in which all rules are evaluated sequentially. +func (g *RuleTask) Eval(ctx context.Context, ts time.Time) { + + zap.S().Debugf("msg:", "rule task eval started", "\t name:", g.name, "\t start time:", ts) + + var samplesTotal float64 + for i, rule := range g.rules { + if rule == nil { + continue + } + select { + case <-g.done: + return + default: + } + + func(i int, rule Rule) { + sp, ctx := opentracing.StartSpanFromContext(ctx, "rule") + + sp.SetTag("name", rule.Name()) + defer func(t time.Time) { + sp.Finish() + + since := time.Since(t) + rule.SetEvaluationDuration(since) + rule.SetEvaluationTimestamp(t) + }(time.Now()) + + data, err := rule.Eval(ctx, ts, g.opts.Queriers) + if err != nil { + rule.SetHealth(HealthBad) + rule.SetLastError(err) + + zap.S().Warn("msg:", "Evaluating rule failed", "\t rule:", rule, "\t err: ", err) + + // Canceled queries are intentional termination of queries. This normally + // happens on shutdown and thus we skip logging of any errors here. + //! if _, ok := err.(promql.ErrQueryCanceled); !ok { + // level.Warn(g.logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err) + //} + return + } + + vector := data.(Vector) + samplesTotal += float64(len(vector)) + + rule.SendAlerts(ctx, ts, g.opts.ResendDelay, g.frequency, g.notify) + + seriesReturned := make(map[string]labels.Labels, len(g.seriesInPreviousEval[i])) + + for _, s := range vector { + seriesReturned[s.Metric.String()] = s.Metric + } + + g.seriesInPreviousEval[i] = seriesReturned + }(i, rule) + } +} diff --git a/pkg/query-service/rules/task.go b/pkg/query-service/rules/task.go new file mode 100644 index 0000000000..bec4ff1c13 --- /dev/null +++ b/pkg/query-service/rules/task.go @@ -0,0 +1,37 @@ +package rules + +import ( + "context" + "time" +) + +type TaskType string + +const ( + TaskTypeProm = "promql_ruletask" + TaskTypeCh = "ch_ruletask" +) + +type Task interface { + Name() string + + // Key returns the group key + Key() string + + Type() TaskType + CopyState(from Task) error + Eval(ctx context.Context, ts time.Time) + Run(ctx context.Context) + Rules() []Rule + Stop() + Pause(b bool) +} + +// newTask returns an appropriate group for +// rule type +func newTask(taskType TaskType, name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) Task { + if taskType == TaskTypeCh { + return newRuleTask(name, file, frequency, rules, opts, notify) + } + return newPromRuleTask(name, file, frequency, rules, opts, notify) +} diff --git a/pkg/query-service/rules/templates.go b/pkg/query-service/rules/templates.go new file mode 100644 index 0000000000..4789780ffc --- /dev/null +++ b/pkg/query-service/rules/templates.go @@ -0,0 +1,290 @@ +package rules + +import ( + "bytes" + "context" + "errors" + "fmt" + "math" + "net/url" + "regexp" + "sort" + "strings" + + html_template "html/template" + text_template "text/template" + + "go.signoz.io/query-service/utils/times" +) + +type tmplQueryRecord struct { + Labels map[string]string + Value float64 +} +type tmplQueryResults []*tmplQueryRecord + +type tmplQueryResultsByLabelSorter struct { + results tmplQueryResults + by string +} + +func (q tmplQueryResultsByLabelSorter) Len() int { + return len(q.results) +} + +func (q tmplQueryResultsByLabelSorter) Less(i, j int) bool { + return q.results[i].Labels[q.by] < q.results[j].Labels[q.by] +} + +func (q tmplQueryResultsByLabelSorter) Swap(i, j int) { + q.results[i], q.results[j] = q.results[j], q.results[i] +} + +// Expander executes templates in text or HTML mode with a common set of Prometheus template functions. +type TemplateExpander struct { + text string + name string + data interface{} + funcMap text_template.FuncMap +} + +// NewTemplateExpander returns a template expander ready to use. +func NewTemplateExpander( + ctx context.Context, + text string, + name string, + data interface{}, + timestamp times.Time, + externalURL *url.URL, +) *TemplateExpander { + return &TemplateExpander{ + text: text, + name: name, + data: data, + funcMap: text_template.FuncMap{ + "first": func(v tmplQueryResults) (*tmplQueryRecord, error) { + if len(v) > 0 { + return v[0], nil + } + return nil, errors.New("first() called on vector with no elements") + }, + "label": func(label string, s *tmplQueryRecord) string { + return s.Labels[label] + }, + "value": func(s *tmplQueryRecord) float64 { + return s.Value + }, + "strvalue": func(s *tmplQueryRecord) string { + return s.Labels["__value__"] + }, + "args": func(args ...interface{}) map[string]interface{} { + result := make(map[string]interface{}) + for i, a := range args { + result[fmt.Sprintf("arg%d", i)] = a + } + return result + }, + "reReplaceAll": func(pattern, repl, text string) string { + re := regexp.MustCompile(pattern) + return re.ReplaceAllString(text, repl) + }, + "safeHtml": func(text string) html_template.HTML { + return html_template.HTML(text) + }, + "match": regexp.MatchString, + "title": strings.Title, + "toUpper": strings.ToUpper, + "toLower": strings.ToLower, + "sortByLabel": func(label string, v tmplQueryResults) tmplQueryResults { + sorter := tmplQueryResultsByLabelSorter{v[:], label} + sort.Stable(sorter) + return v + }, + "humanize": func(v float64) string { + if v == 0 || math.IsNaN(v) || math.IsInf(v, 0) { + return fmt.Sprintf("%.4g", v) + } + if math.Abs(v) >= 1 { + prefix := "" + for _, p := range []string{"k", "M", "G", "T", "P", "E", "Z", "Y"} { + if math.Abs(v) < 1000 { + break + } + prefix = p + v /= 1000 + } + return fmt.Sprintf("%.4g%s", v, prefix) + } + prefix := "" + for _, p := range []string{"m", "u", "n", "p", "f", "a", "z", "y"} { + if math.Abs(v) >= 1 { + break + } + prefix = p + v *= 1000 + } + return fmt.Sprintf("%.4g%s", v, prefix) + }, + "humanize1024": func(v float64) string { + if math.Abs(v) <= 1 || math.IsNaN(v) || math.IsInf(v, 0) { + return fmt.Sprintf("%.4g", v) + } + prefix := "" + for _, p := range []string{"ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"} { + if math.Abs(v) < 1024 { + break + } + prefix = p + v /= 1024 + } + return fmt.Sprintf("%.4g%s", v, prefix) + }, + "humanizeDuration": func(v float64) string { + if math.IsNaN(v) || math.IsInf(v, 0) { + return fmt.Sprintf("%.4g", v) + } + if v == 0 { + return fmt.Sprintf("%.4gs", v) + } + if math.Abs(v) >= 1 { + sign := "" + if v < 0 { + sign = "-" + v = -v + } + seconds := int64(v) % 60 + minutes := (int64(v) / 60) % 60 + hours := (int64(v) / 60 / 60) % 24 + days := (int64(v) / 60 / 60 / 24) + // For days to minutes, we display seconds as an integer. + if days != 0 { + return fmt.Sprintf("%s%dd %dh %dm %ds", sign, days, hours, minutes, seconds) + } + if hours != 0 { + return fmt.Sprintf("%s%dh %dm %ds", sign, hours, minutes, seconds) + } + if minutes != 0 { + return fmt.Sprintf("%s%dm %ds", sign, minutes, seconds) + } + // For seconds, we display 4 significant digts. + return fmt.Sprintf("%s%.4gs", sign, v) + } + prefix := "" + for _, p := range []string{"m", "u", "n", "p", "f", "a", "z", "y"} { + if math.Abs(v) >= 1 { + break + } + prefix = p + v *= 1000 + } + return fmt.Sprintf("%.4g%ss", v, prefix) + }, + "humanizeTimestamp": func(v float64) string { + if math.IsNaN(v) || math.IsInf(v, 0) { + return fmt.Sprintf("%.4g", v) + } + t := times.TimeFromUnixNano(int64(v * 1e9)).Time().UTC() + return fmt.Sprint(t) + }, + "pathPrefix": func() string { + return externalURL.Path + }, + "externalURL": func() string { + return externalURL.String() + }, + }, + } +} + +// AlertTemplateData returns the interface to be used in expanding the template. +func AlertTemplateData(labels map[string]string, value float64) interface{} { + return struct { + Labels map[string]string + Value float64 + }{ + Labels: labels, + Value: value, + } +} + +// Funcs adds the functions in fm to the Expander's function map. +// Existing functions will be overwritten in case of conflict. +func (te TemplateExpander) Funcs(fm text_template.FuncMap) { + for k, v := range fm { + te.funcMap[k] = v + } +} + +// Expand expands a template in text (non-HTML) mode. +func (te TemplateExpander) Expand() (result string, resultErr error) { + // It'd better to have no alert description than to kill the whole process + // if there's a bug in the template. + defer func() { + if r := recover(); r != nil { + var ok bool + resultErr, ok = r.(error) + if !ok { + resultErr = fmt.Errorf("panic expanding template %v: %v", te.name, r) + } + } + }() + + tmpl, err := text_template.New(te.name).Funcs(te.funcMap).Option("missingkey=zero").Parse(te.text) + if err != nil { + return "", fmt.Errorf("error parsing template %v: %v", te.name, err) + } + var buffer bytes.Buffer + err = tmpl.Execute(&buffer, te.data) + if err != nil { + return "", fmt.Errorf("error executing template %v: %v", te.name, err) + } + return buffer.String(), nil +} + +// ExpandHTML expands a template with HTML escaping, with templates read from the given files. +func (te TemplateExpander) ExpandHTML(templateFiles []string) (result string, resultErr error) { + defer func() { + if r := recover(); r != nil { + var ok bool + resultErr, ok = r.(error) + if !ok { + resultErr = fmt.Errorf("panic expanding template %v: %v", te.name, r) + } + } + }() + + tmpl := html_template.New(te.name).Funcs(html_template.FuncMap(te.funcMap)) + tmpl.Option("missingkey=zero") + tmpl.Funcs(html_template.FuncMap{ + "tmpl": func(name string, data interface{}) (html_template.HTML, error) { + var buffer bytes.Buffer + err := tmpl.ExecuteTemplate(&buffer, name, data) + return html_template.HTML(buffer.String()), err + }, + }) + tmpl, err := tmpl.Parse(te.text) + if err != nil { + return "", fmt.Errorf("error parsing template %v: %v", te.name, err) + } + if len(templateFiles) > 0 { + _, err = tmpl.ParseFiles(templateFiles...) + if err != nil { + return "", fmt.Errorf("error parsing template files for %v: %v", te.name, err) + } + } + var buffer bytes.Buffer + err = tmpl.Execute(&buffer, te.data) + if err != nil { + return "", fmt.Errorf("error executing template %v: %v", te.name, err) + } + return buffer.String(), nil +} + +// ParseTest parses the templates and returns the error if any. +func (te TemplateExpander) ParseTest() error { + _, err := text_template.New(te.name).Funcs(te.funcMap).Option("missingkey=zero").Parse(te.text) + if err != nil { + return err + } + return nil +} diff --git a/pkg/query-service/rules/thresholdRule.go b/pkg/query-service/rules/thresholdRule.go new file mode 100644 index 0000000000..8f734c113d --- /dev/null +++ b/pkg/query-service/rules/thresholdRule.go @@ -0,0 +1,679 @@ +package rules + +import ( + "context" + "fmt" + "go.uber.org/zap" + "math" + "reflect" + "sort" + "sync" + "time" + + "github.com/ClickHouse/clickhouse-go/v2" + "go.signoz.io/query-service/app/metrics" + "go.signoz.io/query-service/constants" + qsmodel "go.signoz.io/query-service/model" + "go.signoz.io/query-service/utils/labels" + "go.signoz.io/query-service/utils/times" + "go.signoz.io/query-service/utils/timestamp" + "go.signoz.io/query-service/utils/value" + + yaml "gopkg.in/yaml.v2" +) + +type ThresholdRule struct { + id string + name string + source string + ruleCondition *RuleCondition + evalWindow time.Duration + holdDuration time.Duration + labels labels.Labels + annotations labels.Labels + + mtx sync.Mutex + evaluationDuration time.Duration + evaluationTimestamp time.Time + + health RuleHealth + + lastError error + + // map of active alerts + active map[uint64]*Alert +} + +func NewThresholdRule( + id string, + name string, + ruleCondition *RuleCondition, + evalWindow time.Duration, + l, a map[string]string, + source string, +) (*ThresholdRule, error) { + + if int64(evalWindow) == 0 { + evalWindow = 5 * time.Minute + } + + if ruleCondition == nil { + return nil, fmt.Errorf("no rule condition") + } else if !ruleCondition.IsValid() { + return nil, fmt.Errorf("invalid rule condition") + } + + zap.S().Info("msg:", "creating new alerting rule", "\t name:", name, "\t condition:", ruleCondition.String()) + + return &ThresholdRule{ + id: id, + name: name, + source: source, + ruleCondition: ruleCondition, + evalWindow: evalWindow, + labels: labels.FromMap(l), + annotations: labels.FromMap(a), + + health: HealthUnknown, + active: map[uint64]*Alert{}, + }, nil +} + +func (r *ThresholdRule) Name() string { + return r.name +} + +func (r *ThresholdRule) ID() string { + return r.id +} + +func (r *ThresholdRule) Condition() *RuleCondition { + return r.ruleCondition +} + +func (r *ThresholdRule) GeneratorURL() string { + return r.source +} + +func (r *ThresholdRule) target() *float64 { + if r.ruleCondition == nil { + return nil + } + return r.ruleCondition.Target +} + +func (r *ThresholdRule) matchType() MatchType { + if r.ruleCondition == nil { + return AtleastOnce + } + return r.ruleCondition.MatchType +} + +func (r *ThresholdRule) compareOp() CompareOp { + if r.ruleCondition == nil { + return ValueIsEq + } + return r.ruleCondition.CompareOp +} + +func (r *ThresholdRule) Type() RuleType { + return RuleTypeThreshold +} + +func (r *ThresholdRule) SetLastError(err error) { + r.mtx.Lock() + defer r.mtx.Unlock() + r.lastError = err +} + +func (r *ThresholdRule) LastError() error { + r.mtx.Lock() + defer r.mtx.Unlock() + return r.lastError +} + +func (r *ThresholdRule) SetHealth(health RuleHealth) { + r.mtx.Lock() + defer r.mtx.Unlock() + r.health = health +} + +func (r *ThresholdRule) Health() RuleHealth { + r.mtx.Lock() + defer r.mtx.Unlock() + return r.health +} + +// SetEvaluationDuration updates evaluationDuration to the duration it took to evaluate the rule on its last evaluation. +func (r *ThresholdRule) SetEvaluationDuration(dur time.Duration) { + r.mtx.Lock() + defer r.mtx.Unlock() + r.evaluationDuration = dur +} + +func (r *ThresholdRule) HoldDuration() time.Duration { + return r.holdDuration +} + +func (r *ThresholdRule) EvalWindow() time.Duration { + return r.evalWindow +} + +// Labels returns the labels of the alerting rule. +func (r *ThresholdRule) Labels() labels.BaseLabels { + return r.labels +} + +// Annotations returns the annotations of the alerting rule. +func (r *ThresholdRule) Annotations() labels.BaseLabels { + return r.annotations +} + +func (r *ThresholdRule) sample(alert *Alert, ts time.Time) Sample { + lb := labels.NewBuilder(r.labels) + alertLabels := alert.Labels.(labels.Labels) + for _, l := range alertLabels { + lb.Set(l.Name, l.Value) + } + + lb.Set(labels.MetricNameLabel, alertMetricName) + lb.Set(labels.AlertNameLabel, r.name) + lb.Set(labels.AlertRuleIdLabel, r.ID()) + lb.Set(labels.AlertStateLabel, alert.State.String()) + + s := Sample{ + Metric: lb.Labels(), + Point: Point{T: timestamp.FromTime(ts), V: 1}, + } + return s +} + +// forStateSample returns the sample for ALERTS_FOR_STATE. +func (r *ThresholdRule) forStateSample(alert *Alert, ts time.Time, v float64) Sample { + lb := labels.NewBuilder(r.labels) + + alertLabels := alert.Labels.(labels.Labels) + for _, l := range alertLabels { + lb.Set(l.Name, l.Value) + } + + lb.Set(labels.MetricNameLabel, alertForStateMetricName) + lb.Set(labels.AlertNameLabel, r.name) + + s := Sample{ + Metric: lb.Labels(), + Point: Point{T: timestamp.FromTime(ts), V: v}, + } + return s +} + +// GetEvaluationDuration returns the time in seconds it took to evaluate the alerting rule. +func (r *ThresholdRule) GetEvaluationDuration() time.Duration { + r.mtx.Lock() + defer r.mtx.Unlock() + return r.evaluationDuration +} + +// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule was last evaluated. +func (r *ThresholdRule) SetEvaluationTimestamp(ts time.Time) { + r.mtx.Lock() + defer r.mtx.Unlock() + r.evaluationTimestamp = ts +} + +// GetEvaluationTimestamp returns the time the evaluation took place. +func (r *ThresholdRule) GetEvaluationTimestamp() time.Time { + r.mtx.Lock() + defer r.mtx.Unlock() + return r.evaluationTimestamp +} + +// State returns the maximum state of alert instances for this rule. +// StateFiring > StatePending > StateInactive +func (r *ThresholdRule) State() AlertState { + r.mtx.Lock() + defer r.mtx.Unlock() + + maxState := StateInactive + for _, a := range r.active { + if a.State > maxState { + maxState = a.State + } + } + return maxState +} + +func (r *ThresholdRule) currentAlerts() []*Alert { + r.mtx.Lock() + defer r.mtx.Unlock() + + alerts := make([]*Alert, 0, len(r.active)) + + for _, a := range r.active { + anew := *a + alerts = append(alerts, &anew) + } + return alerts +} + +func (r *ThresholdRule) ActiveAlerts() []*Alert { + var res []*Alert + for _, a := range r.currentAlerts() { + if a.ResolvedAt.IsZero() { + res = append(res, a) + } + } + return res +} + +// ForEachActiveAlert runs the given function on each alert. +// This should be used when you want to use the actual alerts from the ThresholdRule +// and not on its copy. +// If you want to run on a copy of alerts then don't use this, get the alerts from 'ActiveAlerts()'. +func (r *ThresholdRule) ForEachActiveAlert(f func(*Alert)) { + r.mtx.Lock() + defer r.mtx.Unlock() + + for _, a := range r.active { + f(a) + } +} + +func (r *ThresholdRule) SendAlerts(ctx context.Context, ts time.Time, resendDelay time.Duration, interval time.Duration, notifyFunc NotifyFunc) { + zap.S().Info("msg:", "initiating send alerts (if any)", "\t rule:", r.Name()) + alerts := []*Alert{} + r.ForEachActiveAlert(func(alert *Alert) { + if alert.needsSending(ts, resendDelay) { + alert.LastSentAt = ts + // Allow for two Eval or Alertmanager send failures. + delta := resendDelay + if interval > resendDelay { + delta = interval + } + alert.ValidUntil = ts.Add(4 * delta) + anew := *alert + alerts = append(alerts, &anew) + } else { + zap.S().Debugf("msg: skipping send alert due to resend delay", "\t rule: ", r.Name(), "\t alert:", alert.Labels) + } + }) + notifyFunc(ctx, "", alerts...) +} +func (r *ThresholdRule) CheckCondition(v float64) bool { + + if value.IsNaN(v) { + zap.S().Debugf("msg:", "found NaN in rule condition", "\t rule name:", r.Name()) + return false + } + + if r.ruleCondition.Target == nil { + zap.S().Debugf("msg:", "found null target in rule condition", "\t rulename:", r.Name()) + return false + } + + switch r.ruleCondition.CompareOp { + case ValueIsEq: + return v == *r.ruleCondition.Target + case ValueIsNotEq: + return v != *r.ruleCondition.Target + case ValueIsBelow: + return v < *r.ruleCondition.Target + case ValueIsAbove: + return v > *r.ruleCondition.Target + default: + return false + } +} + +func (r *ThresholdRule) prepareQueryRange(ts time.Time) *qsmodel.QueryRangeParamsV2 { + // todo(amol): add 30 seconds to evalWindow for rate calc + tsEnd := ts.UnixNano() / int64(time.Millisecond) + tsStart := ts.Add(-time.Duration(r.evalWindow)).UnixNano() / int64(time.Millisecond) + + // for k, v := range r.ruleCondition.CompositeMetricQuery.BuilderQueries { + // v.ReduceTo = qsmodel.RMAX + // r.ruleCondition.CompositeMetricQuery.BuilderQueries[k] = v + // } + + return &qsmodel.QueryRangeParamsV2{ + Start: tsStart, + End: tsEnd, + Step: 30, + CompositeMetricQuery: r.ruleCondition.CompositeMetricQuery, + } +} + +// queryClickhouse runs actual query against clickhouse +func (r *ThresholdRule) runChQuery(ctx context.Context, db clickhouse.Conn, query string) (Vector, error) { + rows, err := db.Query(ctx, query) + if err != nil { + zap.S().Errorf("rule:", r.Name(), "\t failed to get alert query result") + return nil, err + } + + columnTypes := rows.ColumnTypes() + if err != nil { + return nil, err + } + columnNames := rows.Columns() + if err != nil { + return nil, err + } + vars := make([]interface{}, len(columnTypes)) + + for i := range columnTypes { + vars[i] = reflect.New(columnTypes[i].ScanType()).Interface() + } + + // []sample list + var result Vector + + // map[fingerprint]sample + resultMap := make(map[uint64]Sample, 0) + + // for rates we want to skip the first record + // but we dont know when the rates are being used + // so we always pick timeframe - 30 seconds interval + // and skip the first record for a given label combo + skipFirstRecord := make(map[uint64]bool, 0) + + defer rows.Close() + for rows.Next() { + + if err := rows.Scan(vars...); err != nil { + return nil, err + } + + sample := Sample{} + lbls := labels.NewBuilder(labels.Labels{}) + + for i, v := range vars { + + colName := columnNames[i] + + switch v := v.(type) { + case *string: + lbls.Set(colName, *v) + case *time.Time: + timval := *v + + if colName == "ts" { + sample.Point.T = timval.Unix() + } else { + lbls.Set(colName, timval.Format("2006-01-02 15:04:05")) + } + + case *float64: + if colName == "res" || colName == "value" { + sample.Point.V = *v + + } else { + lbls.Set(colName, fmt.Sprintf("%f", *v)) + } + case *uint64: + intv := *v + if colName == "res" || colName == "value" { + sample.Point.V = float64(intv) + } else { + lbls.Set(colName, fmt.Sprintf("%d", intv)) + } + case *uint8: + intv := *v + if colName == "res" || colName == "value" { + sample.Point.V = float64(intv) + } else { + lbls.Set(colName, fmt.Sprintf("%d", intv)) + } + default: + zap.S().Errorf("ruleId:", r.ID(), "\t error: invalid var found in query result", v, columnNames[i]) + } + } + + if value.IsNaN(sample.Point.V) { + continue + } + + // capture lables in result + sample.Metric = lbls.Labels() + + labelHash := lbls.Labels().Hash() + + // here we walk through values of time series + // and calculate the final value used to compare + // with rule target + if existing, ok := resultMap[labelHash]; ok { + + switch r.matchType() { + case AllTheTimes: + if r.compareOp() == ValueIsAbove { + sample.Point.V = math.Min(existing.Point.V, sample.Point.V) + resultMap[labelHash] = sample + } else if r.compareOp() == ValueIsBelow { + sample.Point.V = math.Max(existing.Point.V, sample.Point.V) + resultMap[labelHash] = sample + } + case AtleastOnce: + if r.compareOp() == ValueIsAbove { + sample.Point.V = math.Max(existing.Point.V, sample.Point.V) + resultMap[labelHash] = sample + } else if r.compareOp() == ValueIsBelow { + sample.Point.V = math.Min(existing.Point.V, sample.Point.V) + resultMap[labelHash] = sample + } + case OnAverage: + sample.Point.V = (existing.Point.V + sample.Point.V) / 2 + resultMap[labelHash] = sample + case InTotal: + sample.Point.V = (existing.Point.V + sample.Point.V) + resultMap[labelHash] = sample + } + + } else { + if exists, _ := skipFirstRecord[labelHash]; exists { + resultMap[labelHash] = sample + } else { + // looks like the first record for this label combo, skip it + skipFirstRecord[labelHash] = true + } + } + } + + for _, sample := range resultMap { + // check alert rule condition before dumping results + if r.CheckCondition(sample.Point.V) { + result = append(result, sample) + } + } + + return result, nil +} + +// query looks if alert condition is being +// satisfied and returns the signals +func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, ts time.Time, ch clickhouse.Conn) (Vector, error) { + params := r.prepareQueryRange(ts) + + runQueries := metrics.PrepareBuilderMetricQueries(params, constants.SIGNOZ_TIMESERIES_TABLENAME) + if runQueries.Err != nil { + return nil, fmt.Errorf("failed to prepare metric queries: %v", runQueries.Err) + } + + if len(runQueries.Queries) == 0 { + return nil, fmt.Errorf("no queries could be built with the rule config") + } + + zap.S().Debugf("ruleid:", r.ID(), "\t runQueries:", runQueries.Queries) + + // find target query label + if query, ok := runQueries.Queries["F1"]; ok { + // found a formula query, run with it + return r.runChQuery(ctx, ch, query) + } + + // no formula in rule condition, now look for + // query label with max ascii val + keys := make([]string, 0, len(runQueries.Queries)) + for k := range runQueries.Queries { + keys = append(keys, k) + } + sort.Strings(keys) + + queryLabel := keys[len(keys)-1] + + zap.S().Debugf("ruleId: ", r.ID(), "\t result query label:", queryLabel) + + if queryString, ok := runQueries.Queries[queryLabel]; ok { + return r.runChQuery(ctx, ch, queryString) + } + + zap.S().Errorf("ruleId: ", r.ID(), "\t invalid query label:", queryLabel, "\t queries:", runQueries.Queries) + return nil, fmt.Errorf("this is unexpected, invalid query label") +} + +func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time, queriers *Queriers) (interface{}, error) { + + res, err := r.buildAndRunQuery(ctx, ts, queriers.Ch) + + if err != nil { + r.SetHealth(HealthBad) + r.SetLastError(err) + zap.S().Debugf("ruleid:", r.ID(), "\t failure in buildAndRunQuery:", err) + return nil, err + } + + r.mtx.Lock() + defer r.mtx.Unlock() + + resultFPs := map[uint64]struct{}{} + var vec Vector + var alerts = make(map[uint64]*Alert, len(res)) + + for _, smpl := range res { + l := make(map[string]string, len(smpl.Metric)) + for _, lbl := range smpl.Metric { + l[lbl.Name] = lbl.Value + } + + tmplData := AlertTemplateData(l, smpl.V) + // Inject some convenience variables that are easier to remember for users + // who are not used to Go's templating system. + defs := "{{$labels := .Labels}}{{$value := .Value}}" + + expand := func(text string) string { + + tmpl := NewTemplateExpander( + ctx, + defs+text, + "__alert_"+r.Name(), + tmplData, + times.Time(timestamp.FromTime(ts)), + nil, + ) + result, err := tmpl.Expand() + if err != nil { + result = fmt.Sprintf("", err) + zap.S().Errorf("msg:", "Expanding alert template failed", "\t err", err, "\t data", tmplData) + } + return result + } + + lb := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel) + + for _, l := range r.labels { + lb.Set(l.Name, expand(l.Value)) + } + + lb.Set(labels.AlertNameLabel, r.Name()) + lb.Set(labels.AlertRuleIdLabel, r.ID()) + lb.Set(labels.RuleSourceLabel, r.GeneratorURL()) + + annotations := make(labels.Labels, 0, len(r.annotations)) + for _, a := range r.annotations { + annotations = append(annotations, labels.Label{Name: a.Name, Value: expand(a.Value)}) + } + + lbs := lb.Labels() + h := lbs.Hash() + resultFPs[h] = struct{}{} + + if _, ok := alerts[h]; ok { + zap.S().Errorf("ruleId: ", r.ID(), "\t msg:", "the alert query returns duplicate records:", alerts[h]) + err = fmt.Errorf("duplicate alert found, vector contains metrics with the same labelset after applying alert labels") + // We have already acquired the lock above hence using SetHealth and + // SetLastError will deadlock. + r.health = HealthBad + r.lastError = err + return nil, err + } + + alerts[h] = &Alert{ + Labels: lbs, + Annotations: annotations, + ActiveAt: ts, + State: StatePending, + Value: smpl.V, + GeneratorURL: r.GeneratorURL(), + } + } + + zap.S().Info("rule:", r.Name(), "\t alerts found: ", len(alerts)) + + // alerts[h] is ready, add or update active list now + for h, a := range alerts { + // Check whether we already have alerting state for the identifying label set. + // Update the last value and annotations if so, create a new alert entry otherwise. + if alert, ok := r.active[h]; ok && alert.State != StateInactive { + + alert.Value = a.Value + alert.Annotations = a.Annotations + continue + } + + r.active[h] = a + + } + + // Check if any pending alerts should be removed or fire now. Write out alert timeseries. + for fp, a := range r.active { + if _, ok := resultFPs[fp]; !ok { + // If the alert was previously firing, keep it around for a given + // retention time so it is reported as resolved to the AlertManager. + if a.State == StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > resolvedRetention) { + delete(r.active, fp) + } + if a.State != StateInactive { + a.State = StateInactive + a.ResolvedAt = ts + } + continue + } + + if a.State == StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration { + a.State = StateFiring + a.FiredAt = ts + } + + } + r.health = HealthGood + r.lastError = err + return vec, nil + +} + +func (r *ThresholdRule) String() string { + + ar := PostableRule{ + Alert: r.name, + RuleCondition: r.ruleCondition, + EvalWindow: Duration(r.evalWindow), + Labels: r.labels.Map(), + Annotations: r.annotations.Map(), + } + + byt, err := yaml.Marshal(ar) + if err != nil { + return fmt.Sprintf("error marshaling alerting rule: %s", err.Error()) + } + + return string(byt) +} diff --git a/pkg/query-service/tests/test-deploy/clickhouse-config.xml b/pkg/query-service/tests/test-deploy/clickhouse-config.xml index 3bb26a3a36..4a6a82b8af 100644 --- a/pkg/query-service/tests/test-deploy/clickhouse-config.xml +++ b/pkg/query-service/tests/test-deploy/clickhouse-config.xml @@ -22,7 +22,7 @@ [1]: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105-L114 --> - trace + information /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.err.log