chore: add signozspanmetrics delta temporality pipeline (#3776)

This commit is contained in:
Srikanth Chekuri 2024-01-10 01:16:24 +05:30 committed by GitHub
parent 361efd3b52
commit 722a38491e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 51 additions and 392 deletions

View File

@ -150,7 +150,7 @@ services:
command: command:
[ [
"-config=/root/config/prometheus.yml", "-config=/root/config/prometheus.yml",
"--prefer-delta=true" # "--prefer-delta=true"
] ]
# ports: # ports:
# - "6060:6060" # pprof port # - "6060:6060" # pprof port
@ -249,25 +249,6 @@ services:
# - clickhouse-2 # - clickhouse-2
# - clickhouse-3 # - clickhouse-3
otel-collector-metrics:
image: signoz/signoz-otel-collector:0.88.6
command:
[
"--config=/etc/otel-collector-metrics-config.yaml",
"--feature-gates=-pkg.translator.prometheus.NormalizeName"
]
volumes:
- ./otel-collector-metrics-config.yaml:/etc/otel-collector-metrics-config.yaml
# ports:
# - "1777:1777" # pprof extension
# - "8888:8888" # OtelCollector internal metrics
# - "13133:13133" # Health check extension
# - "55679:55679" # zPages extension
deploy:
restart_policy:
condition: on-failure
<<: *db-depend
logspout: logspout:
image: "gliderlabs/logspout:v3.2.14" image: "gliderlabs/logspout:v3.2.14"
volumes: volumes:

View File

@ -15,13 +15,9 @@ receivers:
# please remove names from below if you want to collect logs from them # please remove names from below if you want to collect logs from them
- type: filter - type: filter
id: signoz_logs_filter id: signoz_logs_filter
expr: 'attributes.container_name matches "^signoz_(logspout|frontend|alertmanager|query-service|otel-collector|otel-collector-metrics|clickhouse|zookeeper)"' expr: 'attributes.container_name matches "^signoz_(logspout|frontend|alertmanager|query-service|otel-collector|clickhouse|zookeeper)"'
opencensus: opencensus:
endpoint: 0.0.0.0:55678 endpoint: 0.0.0.0:55678
otlp/spanmetrics:
protocols:
grpc:
endpoint: localhost:12345
otlp: otlp:
protocols: protocols:
grpc: grpc:
@ -69,8 +65,8 @@ processors:
# Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels. # Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels.
detectors: [env, system] # include ec2 for AWS, gcp for GCP and azure for Azure. detectors: [env, system] # include ec2 for AWS, gcp for GCP and azure for Azure.
timeout: 2s timeout: 2s
signozspanmetrics/prometheus: signozspanmetrics/cumulative:
metrics_exporter: prometheus metrics_exporter: clickhousemetricswrite
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ] latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
dimensions_cache_size: 100000 dimensions_cache_size: 100000
dimensions: dimensions:
@ -97,6 +93,20 @@ processors:
# num_workers: 4 # num_workers: 4
# queue_size: 100 # queue_size: 100
# retry_on_failure: true # retry_on_failure: true
signozspanmetrics/delta:
metrics_exporter: clickhousemetricswrite
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
dimensions_cache_size: 100000
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
dimensions:
- name: service.namespace
default: default
- name: deployment.environment
default: default
# This is added to ensure the uniqueness of the timeseries
# Otherwise, identical timeseries produced by multiple replicas of
# collectors result in incorrect APM metrics
- name: signoz.collector.id
exporters: exporters:
clickhousetraces: clickhousetraces:
@ -109,8 +119,6 @@ exporters:
enabled: true enabled: true
clickhousemetricswrite/prometheus: clickhousemetricswrite/prometheus:
endpoint: tcp://clickhouse:9000/?database=signoz_metrics endpoint: tcp://clickhouse:9000/?database=signoz_metrics
prometheus:
endpoint: 0.0.0.0:8889
# logging: {} # logging: {}
clickhouselogsexporter: clickhouselogsexporter:
dsn: tcp://clickhouse:9000/ dsn: tcp://clickhouse:9000/
@ -140,7 +148,7 @@ service:
pipelines: pipelines:
traces: traces:
receivers: [jaeger, otlp] receivers: [jaeger, otlp]
processors: [signozspanmetrics/prometheus, batch] processors: [signozspanmetrics/cumulative, signozspanmetrics/delta, batch]
exporters: [clickhousetraces] exporters: [clickhousetraces]
metrics: metrics:
receivers: [otlp] receivers: [otlp]
@ -154,9 +162,6 @@ service:
receivers: [prometheus] receivers: [prometheus]
processors: [batch] processors: [batch]
exporters: [clickhousemetricswrite/prometheus] exporters: [clickhousemetricswrite/prometheus]
metrics/spanmetrics:
receivers: [otlp/spanmetrics]
exporters: [prometheus]
logs: logs:
receivers: [otlp, tcplog/docker] receivers: [otlp, tcplog/docker]
processors: [batch] processors: [batch]

View File

@ -1,64 +0,0 @@
receivers:
prometheus:
config:
scrape_configs:
# otel-collector-metrics internal metrics
- job_name: otel-collector-metrics
scrape_interval: 60s
static_configs:
- targets:
- localhost:8888
labels:
job_name: otel-collector-metrics
# SigNoz span metrics
- job_name: signozspanmetrics-collector
scrape_interval: 60s
dns_sd_configs:
- names:
- tasks.otel-collector
type: A
port: 8889
processors:
batch:
send_batch_size: 10000
send_batch_max_size: 11000
timeout: 10s
# memory_limiter:
# # 80% of maximum memory up to 2G
# limit_mib: 1500
# # 25% of limit up to 2G
# spike_limit_mib: 512
# check_interval: 5s
#
# # 50% of the maximum memory
# limit_percentage: 50
# # 20% of max memory usage spike expected
# spike_limit_percentage: 20
# queued_retry:
# num_workers: 4
# queue_size: 100
# retry_on_failure: true
exporters:
clickhousemetricswrite:
endpoint: tcp://clickhouse:9000/?database=signoz_metrics
extensions:
health_check:
endpoint: 0.0.0.0:13133
zpages:
endpoint: 0.0.0.0:55679
pprof:
endpoint: 0.0.0.0:1777
service:
telemetry:
metrics:
address: 0.0.0.0:8888
extensions: [health_check, zpages, pprof]
pipelines:
metrics:
receivers: [prometheus]
processors: [batch]
exporters: [clickhousemetricswrite]

View File

@ -116,28 +116,6 @@ services:
query-service: query-service:
condition: service_healthy condition: service_healthy
otel-collector-metrics:
container_name: signoz-otel-collector-metrics
image: signoz/signoz-otel-collector:0.88.6
command:
[
"--config=/etc/otel-collector-metrics-config.yaml",
"--feature-gates=-pkg.translator.prometheus.NormalizeName"
]
volumes:
- ./otel-collector-metrics-config.yaml:/etc/otel-collector-metrics-config.yaml
# ports:
# - "1777:1777" # pprof extension
# - "8888:8888" # OtelCollector internal metrics
# - "13133:13133" # Health check extension
# - "55679:55679" # zPages extension
restart: on-failure
depends_on:
clickhouse:
condition: service_healthy
otel-collector-migrator:
condition: service_completed_successfully
logspout: logspout:
image: "gliderlabs/logspout:v3.2.14" image: "gliderlabs/logspout:v3.2.14"
container_name: signoz-logspout container_name: signoz-logspout

View File

@ -25,7 +25,7 @@ services:
command: command:
[ [
"-config=/root/config/prometheus.yml", "-config=/root/config/prometheus.yml",
"--prefer-delta=true" # "--prefer-delta=true"
] ]
ports: ports:
- "6060:6060" - "6060:6060"

View File

@ -169,7 +169,7 @@ services:
command: command:
[ [
"-config=/root/config/prometheus.yml", "-config=/root/config/prometheus.yml",
"--prefer-delta=true" # "--prefer-delta=true"
] ]
# ports: # ports:
# - "6060:6060" # pprof port # - "6060:6060" # pprof port
@ -268,24 +268,6 @@ services:
query-service: query-service:
condition: service_healthy condition: service_healthy
otel-collector-metrics:
image: signoz/signoz-otel-collector:${OTELCOL_TAG:-0.88.6}
container_name: signoz-otel-collector-metrics
command:
[
"--config=/etc/otel-collector-metrics-config.yaml",
"--feature-gates=-pkg.translator.prometheus.NormalizeName"
]
volumes:
- ./otel-collector-metrics-config.yaml:/etc/otel-collector-metrics-config.yaml
# ports:
# - "1777:1777" # pprof extension
# - "8888:8888" # OtelCollector internal metrics
# - "13133:13133" # Health check extension
# - "55679:55679" # zPages extension
restart: on-failure
<<: *db-depend
logspout: logspout:
image: "gliderlabs/logspout:v3.2.14" image: "gliderlabs/logspout:v3.2.14"
container_name: signoz-logspout container_name: signoz-logspout

View File

@ -15,13 +15,9 @@ receivers:
# please remove names from below if you want to collect logs from them # please remove names from below if you want to collect logs from them
- type: filter - type: filter
id: signoz_logs_filter id: signoz_logs_filter
expr: 'attributes.container_name matches "^signoz-(logspout|frontend|alertmanager|query-service|otel-collector|otel-collector-metrics|clickhouse|zookeeper)"' expr: 'attributes.container_name matches "^signoz-(logspout|frontend|alertmanager|query-service|otel-collector|clickhouse|zookeeper)"'
opencensus: opencensus:
endpoint: 0.0.0.0:55678 endpoint: 0.0.0.0:55678
otlp/spanmetrics:
protocols:
grpc:
endpoint: localhost:12345
otlp: otlp:
protocols: protocols:
grpc: grpc:
@ -66,8 +62,9 @@ processors:
send_batch_size: 10000 send_batch_size: 10000
send_batch_max_size: 11000 send_batch_max_size: 11000
timeout: 10s timeout: 10s
signozspanmetrics/prometheus: signozspanmetrics/cumulative:
metrics_exporter: prometheus metrics_exporter: clickhousemetricswrite
metrics_flush_interval: 60s
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ] latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
dimensions_cache_size: 100000 dimensions_cache_size: 100000
dimensions: dimensions:
@ -98,6 +95,21 @@ processors:
# Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels. # Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels.
detectors: [env, system] # include ec2 for AWS, gcp for GCP and azure for Azure. detectors: [env, system] # include ec2 for AWS, gcp for GCP and azure for Azure.
timeout: 2s timeout: 2s
signozspanmetrics/delta:
metrics_exporter: clickhousemetricswrite
metrics_flush_interval: 60s
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
dimensions_cache_size: 100000
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
dimensions:
- name: service.namespace
default: default
- name: deployment.environment
default: default
# This is added to ensure the uniqueness of the timeseries
# Otherwise, identical timeseries produced by multiple replicas of
# collectors result in incorrect APM metrics
- name: signoz.collector.id
extensions: extensions:
health_check: health_check:
@ -118,8 +130,6 @@ exporters:
enabled: true enabled: true
clickhousemetricswrite/prometheus: clickhousemetricswrite/prometheus:
endpoint: tcp://clickhouse:9000/?database=signoz_metrics endpoint: tcp://clickhouse:9000/?database=signoz_metrics
prometheus:
endpoint: 0.0.0.0:8889
# logging: {} # logging: {}
clickhouselogsexporter: clickhouselogsexporter:
@ -145,7 +155,7 @@ service:
pipelines: pipelines:
traces: traces:
receivers: [jaeger, otlp] receivers: [jaeger, otlp]
processors: [signozspanmetrics/prometheus, batch] processors: [signozspanmetrics/cumulative, signozspanmetrics/delta, batch]
exporters: [clickhousetraces] exporters: [clickhousetraces]
metrics: metrics:
receivers: [otlp] receivers: [otlp]
@ -159,9 +169,6 @@ service:
receivers: [prometheus] receivers: [prometheus]
processors: [batch] processors: [batch]
exporters: [clickhousemetricswrite/prometheus] exporters: [clickhousemetricswrite/prometheus]
metrics/spanmetrics:
receivers: [otlp/spanmetrics]
exporters: [prometheus]
logs: logs:
receivers: [otlp, tcplog/docker] receivers: [otlp, tcplog/docker]
processors: [batch] processors: [batch]

View File

@ -1,69 +0,0 @@
receivers:
otlp:
protocols:
grpc:
http:
prometheus:
config:
scrape_configs:
# otel-collector-metrics internal metrics
- job_name: otel-collector-metrics
scrape_interval: 60s
static_configs:
- targets:
- localhost:8888
labels:
job_name: otel-collector-metrics
# SigNoz span metrics
- job_name: signozspanmetrics-collector
scrape_interval: 60s
static_configs:
- targets:
- otel-collector:8889
processors:
batch:
send_batch_size: 10000
send_batch_max_size: 11000
timeout: 10s
# memory_limiter:
# # 80% of maximum memory up to 2G
# limit_mib: 1500
# # 25% of limit up to 2G
# spike_limit_mib: 512
# check_interval: 5s
#
# # 50% of the maximum memory
# limit_percentage: 50
# # 20% of max memory usage spike expected
# spike_limit_percentage: 20
# queued_retry:
# num_workers: 4
# queue_size: 100
# retry_on_failure: true
extensions:
health_check:
endpoint: 0.0.0.0:13133
zpages:
endpoint: 0.0.0.0:55679
pprof:
endpoint: 0.0.0.0:1777
exporters:
clickhousemetricswrite:
endpoint: tcp://clickhouse:9000/?database=signoz_metrics
service:
telemetry:
metrics:
address: 0.0.0.0:8888
extensions:
- health_check
- zpages
- pprof
pipelines:
metrics:
receivers: [prometheus]
processors: [batch]
exporters: [clickhousemetricswrite]

View File

@ -1,76 +0,0 @@
receivers:
otlp/spanmetrics:
protocols:
grpc:
endpoint: "localhost:12345"
otlp:
protocols:
grpc:
http:
jaeger:
protocols:
grpc:
thrift_http:
hostmetrics:
collection_interval: 30s
scrapers:
cpu:
load:
memory:
disk:
filesystem:
network:
processors:
batch:
send_batch_size: 1000
timeout: 10s
signozspanmetrics/prometheus:
metrics_exporter: prometheus
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
dimensions_cache_size: 10000
dimensions:
- name: service.namespace
default: default
- name: deployment.environment
default: default
# memory_limiter:
# # 80% of maximum memory up to 2G
# limit_mib: 1500
# # 25% of limit up to 2G
# spike_limit_mib: 512
# check_interval: 5s
#
# # 50% of the maximum memory
# limit_percentage: 50
# # 20% of max memory usage spike expected
# spike_limit_percentage: 20
# queued_retry:
# num_workers: 4
# queue_size: 100
# retry_on_failure: true
extensions:
zpages: {}
exporters:
clickhousetraces:
datasource: tcp://localhost:9000/?database=signoz_traces
migrations: exporter/clickhousetracesexporter/migrations
clickhousemetricswrite:
endpoint: tcp://localhost:9000/?database=signoz_metrics
resource_to_telemetry_conversion:
enabled: true
prometheus:
endpoint: "0.0.0.0:8889"
service:
extensions: [zpages]
pipelines:
traces:
receivers: [jaeger, otlp]
processors: [signozspanmetrics/prometheus, batch]
exporters: [clickhousetraces]
metrics:
receivers: [otlp, hostmetrics]
processors: [batch]
exporters: [clickhousemetricswrite]
metrics/spanmetrics:
receivers: [otlp/spanmetrics]
exporters: [prometheus]

View File

@ -34,7 +34,7 @@ func TestServiceConfig(t *testing.T) {
"traces": map[string]interface{}{ "traces": map[string]interface{}{
"receivers": []interface{}{"jaeger", "otlp"}, "receivers": []interface{}{"jaeger", "otlp"},
"processors": []interface{}{ "processors": []interface{}{
"signozspanmetrics/prometheus", "batch", "signozspanmetrics/cumulative", "batch",
}, },
"exporters": []interface{}{ "exporters": []interface{}{
"clickhousetraces", "clickhousetraces",

View File

@ -24,8 +24,8 @@ processors:
batch: batch:
send_batch_size: 1000 send_batch_size: 1000
timeout: 10s timeout: 10s
signozspanmetrics/prometheus: signozspanmetrics/cumulative:
metrics_exporter: prometheus metrics_exporter: clickhousemetricswrite
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ] latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
dimensions_cache_size: 10000 dimensions_cache_size: 10000
dimensions: dimensions:
@ -65,7 +65,7 @@ service:
pipelines: pipelines:
traces: traces:
receivers: [jaeger, otlp] receivers: [jaeger, otlp]
processors: [signozspanmetrics/prometheus, batch] processors: [signozspanmetrics/cumulative, batch]
exporters: [clickhousetraces] exporters: [clickhousetraces]
metrics: metrics:
receivers: [otlp, hostmetrics] receivers: [otlp, hostmetrics]

View File

@ -3,7 +3,7 @@ service:
pipelines: pipelines:
traces: traces:
receivers: [jaeger, otlp] receivers: [jaeger, otlp]
processors: [signozspanmetrics/prometheus, batch] processors: [signozspanmetrics/cumulative, batch]
exporters: [clickhousetraces] exporters: [clickhousetraces]
metrics: metrics:
receivers: [otlp, hostmetrics] receivers: [otlp, hostmetrics]

View File

@ -158,7 +158,7 @@ services:
command: command:
[ [
"-config=/root/config/prometheus.yml", "-config=/root/config/prometheus.yml",
"--prefer-delta=true" # "--prefer-delta=true"
] ]
# ports: # ports:
# - "6060:6060" # pprof port # - "6060:6060" # pprof port
@ -244,24 +244,6 @@ services:
query-service: query-service:
condition: service_healthy condition: service_healthy
otel-collector-metrics:
image: signoz/signoz-otel-collector:0.88.6
container_name: signoz-otel-collector-metrics
command:
[
"--config=/etc/otel-collector-metrics-config.yaml",
"--feature-gates=-pkg.translator.prometheus.NormalizeName"
]
volumes:
- ./otel-collector-metrics-config.yaml:/etc/otel-collector-metrics-config.yaml
# ports:
# - "1777:1777" # pprof extension
# - "8888:8888" # OtelCollector internal metrics
# - "13133:13133" # Health check extension
# - "55679:55679" # zPages extension
restart: on-failure
<<: *db-depend
logspout: logspout:
image: "gliderlabs/logspout:v3.2.14" image: "gliderlabs/logspout:v3.2.14"
container_name: signoz-logspout container_name: signoz-logspout

View File

@ -15,7 +15,7 @@ receivers:
# please remove names from below if you want to collect logs from them # please remove names from below if you want to collect logs from them
- type: filter - type: filter
id: signoz_logs_filter id: signoz_logs_filter
expr: 'attributes.container_name matches "^signoz-(logspout|frontend|alertmanager|query-service|otel-collector|otel-collector-metrics|clickhouse|zookeeper)"' expr: 'attributes.container_name matches "^signoz-(logspout|frontend|alertmanager|query-service|otel-collector|clickhouse|zookeeper)"'
opencensus: opencensus:
endpoint: 0.0.0.0:55678 endpoint: 0.0.0.0:55678
otlp/spanmetrics: otlp/spanmetrics:
@ -63,8 +63,8 @@ processors:
send_batch_size: 10000 send_batch_size: 10000
send_batch_max_size: 11000 send_batch_max_size: 11000
timeout: 10s timeout: 10s
signozspanmetrics/prometheus: signozspanmetrics/cumulative:
metrics_exporter: prometheus metrics_exporter: clickhousemetricswrite
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ] latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
dimensions_cache_size: 100000 dimensions_cache_size: 100000
dimensions: dimensions:
@ -135,7 +135,7 @@ service:
pipelines: pipelines:
traces: traces:
receivers: [jaeger, otlp] receivers: [jaeger, otlp]
processors: [signozspanmetrics/prometheus, batch] processors: [signozspanmetrics/cumulative, batch]
exporters: [clickhousetraces] exporters: [clickhousetraces]
metrics: metrics:
receivers: [otlp] receivers: [otlp]

View File

@ -1,67 +0,0 @@
receivers:
otlp:
protocols:
grpc:
http:
prometheus:
config:
scrape_configs:
# otel-collector-metrics internal metrics
- job_name: otel-collector-metrics
scrape_interval: 60s
static_configs:
- targets:
- localhost:8888
# SigNoz span metrics
- job_name: signozspanmetrics-collector
scrape_interval: 60s
static_configs:
- targets:
- otel-collector:8889
processors:
batch:
send_batch_size: 10000
send_batch_max_size: 11000
timeout: 10s
# memory_limiter:
# # 80% of maximum memory up to 2G
# limit_mib: 1500
# # 25% of limit up to 2G
# spike_limit_mib: 512
# check_interval: 5s
#
# # 50% of the maximum memory
# limit_percentage: 50
# # 20% of max memory usage spike expected
# spike_limit_percentage: 20
# queued_retry:
# num_workers: 4
# queue_size: 100
# retry_on_failure: true
extensions:
health_check:
endpoint: 0.0.0.0:13133
zpages:
endpoint: 0.0.0.0:55679
pprof:
endpoint: 0.0.0.0:1777
exporters:
clickhousemetricswrite:
endpoint: tcp://clickhouse:9000/?database=signoz_metrics
service:
telemetry:
metrics:
address: 0.0.0.0:8888
extensions:
- health_check
- zpages
- pprof
pipelines:
metrics:
receivers: [prometheus]
processors: [batch]
exporters: [clickhousemetricswrite]