From 004dda200c13a3214346bd18d0b6a6fa66267063 Mon Sep 17 00:00:00 2001 From: Ankit Nayan Date: Tue, 28 Sep 2021 18:10:44 +0530 Subject: [PATCH] feat: signoz can now scale up in docker swarm (#309) * feat: signoz can now scale up in docker swarm * chore: adding empty folders for volume mount * chore: using image 0.4.0 * chore: adding folder to persist signoz.db --- .../clickhouse-setup/clickhouse-config.xml | 517 ++++++++++++++++++ .../clickhouse-setup/data/clickhouse/.gitkeep | 0 .../clickhouse-setup/data/signoz/.gitkeep | 0 .../clickhouse-setup/docker-compose.yaml | 113 ++++ .../docker-entrypoint-initdb.d/init-db.sql | 27 + .../otel-collector-config-hostmetrics.yaml | 72 +++ .../otel-collector-config.yaml | 47 ++ .../clickhouse-setup/prometheus.yml | 25 + .../common/locust-scripts/locustfile.py | 16 + deploy/docker-swarm/common/nginx-config.conf | 30 + deploy/docker-swarm/dashboards/.gitkeep | 0 11 files changed, 847 insertions(+) create mode 100644 deploy/docker-swarm/clickhouse-setup/clickhouse-config.xml create mode 100644 deploy/docker-swarm/clickhouse-setup/data/clickhouse/.gitkeep create mode 100644 deploy/docker-swarm/clickhouse-setup/data/signoz/.gitkeep create mode 100644 deploy/docker-swarm/clickhouse-setup/docker-compose.yaml create mode 100644 deploy/docker-swarm/clickhouse-setup/docker-entrypoint-initdb.d/init-db.sql create mode 100644 deploy/docker-swarm/clickhouse-setup/otel-collector-config-hostmetrics.yaml create mode 100644 deploy/docker-swarm/clickhouse-setup/otel-collector-config.yaml create mode 100644 deploy/docker-swarm/clickhouse-setup/prometheus.yml create mode 100644 deploy/docker-swarm/common/locust-scripts/locustfile.py create mode 100644 deploy/docker-swarm/common/nginx-config.conf create mode 100644 deploy/docker-swarm/dashboards/.gitkeep diff --git a/deploy/docker-swarm/clickhouse-setup/clickhouse-config.xml b/deploy/docker-swarm/clickhouse-setup/clickhouse-config.xml new file mode 100644 index 0000000000..23898ef5e7 --- /dev/null +++ b/deploy/docker-swarm/clickhouse-setup/clickhouse-config.xml @@ -0,0 +1,517 @@ + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + + 8123 + 9000 + + + + + + + + + /etc/clickhouse-server/server.crt + /etc/clickhouse-server/server.key + + /etc/clickhouse-server/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + + + + + + +9009 + + + + + +:: + + + + + + +4096 +3 + + +100 + + + + + + 8589934592 + + + 5368709120 + + + + /var/lib/clickhouse/ + + + /var/lib/clickhouse/tmp/ + + + users.xml + + + default + + + default + + + + + + + + + + + + + + localhost + 9000 + + + + + + + + + + + + + + + + + 3600 + + + + 3600 + + + 60 + + + + + + + + + + system + query_log
+ + + 7500 +
+ + + + + + + + + + + + + + + + *_dictionary.xml + + + + + + + + + + /clickhouse/task_queue/ddl + + + + + + + + + + + + + ^carbon\. + any + + 0 + 60 + + + 7776000 + 3600 + + + 10368000 + 21600 + + + 34560000 + 43200 + + + 63072000 + 86400 + + + 94608000 + 604800 + + + + + ^collectd\. + any + + 0 + 10 + + + 43200 + 60 + + + 864000 + 900 + + + 1728000 + 1800 + + + 3456000 + 3600 + + + 10368000 + 21600 + + + 34560000 + 43200 + + + 63072000 + 86400 + + + 94608000 + 604800 + + + + + ^high\. + any + + 0 + 10 + + + 172800 + 60 + + + 864000 + 900 + + + 1728000 + 1800 + + + 3456000 + 3600 + + + 10368000 + 21600 + + + 34560000 + 43200 + + + 63072000 + 86400 + + + 94608000 + 604800 + + + + + ^medium\. + any + + 0 + 60 + + + 864000 + 900 + + + 1728000 + 1800 + + + 3456000 + 3600 + + + 10368000 + 21600 + + + 34560000 + 43200 + + + 63072000 + 86400 + + + 94608000 + 604800 + + + + + ^low\. + any + + 0 + 600 + + + 15552000 + 1800 + + + 31536000 + 3600 + + + 63072000 + 21600 + + + 126144000 + 43200 + + + 252288000 + 86400 + + + 315360000 + 604800 + + + + + any + + 0 + 60 + + + 864000 + 900 + + + 1728000 + 1800 + + + 3456000 + 3600 + + + 10368000 + 21600 + + + 34560000 + 43200 + + + 63072000 + 86400 + + + 94608000 + 604800 + + + + + + /var/lib/clickhouse/format_schemas/ +
diff --git a/deploy/docker-swarm/clickhouse-setup/data/clickhouse/.gitkeep b/deploy/docker-swarm/clickhouse-setup/data/clickhouse/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/deploy/docker-swarm/clickhouse-setup/data/signoz/.gitkeep b/deploy/docker-swarm/clickhouse-setup/data/signoz/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/deploy/docker-swarm/clickhouse-setup/docker-compose.yaml b/deploy/docker-swarm/clickhouse-setup/docker-compose.yaml new file mode 100644 index 0000000000..529c1b95f8 --- /dev/null +++ b/deploy/docker-swarm/clickhouse-setup/docker-compose.yaml @@ -0,0 +1,113 @@ +version: "3" + +services: + clickhouse: + image: yandex/clickhouse-server + expose: + - 8123 + - 9000 + ports: + - 9001:9000 + - 8123:8123 + volumes: + - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml + - ./docker-entrypoint-initdb.d/init-db.sql:/docker-entrypoint-initdb.d/init-db.sql + - ./data/clickhouse/:/var/lib/clickhouse/ + + healthcheck: + # "clickhouse", "client", "-u ${CLICKHOUSE_USER}", "--password ${CLICKHOUSE_PASSWORD}", "-q 'SELECT 1'" + test: ["CMD", "wget", "--spider", "-q", "localhost:8123/ping"] + interval: 30s + timeout: 5s + retries: 3 + + query-service: + image: signoz/query-service:0.4.0 + container_name: query-service + restart: always + command: ["-config=/root/config/prometheus.yml"] + ports: + - "8080:8080" + volumes: + - ./prometheus.yml:/root/config/prometheus.yml + - ../dashboards:/root/config/dashboards + + environment: + - ClickHouseUrl=tcp://clickhouse:9000 + - STORAGE=clickhouse + - POSTHOG_API_KEY=H-htDCae7CR3RV57gUzmol6IAKtm5IMCvbcm_fwnL-w + - GODEBUG=netdns=go + depends_on: + - clickhouse + + + frontend: + image: signoz/frontend:0.4.0 + container_name: frontend + + depends_on: + - query-service + links: + - "query-service" + ports: + - "3000:3000" + volumes: + - ../common/nginx-config.conf:/etc/nginx/conf.d/default.conf + + + otel-collector: + image: signoz/otelcontribcol:0.4.0 + command: ["--config=/etc/otel-collector-config.yaml", "--mem-ballast-size-mib=2000"] + volumes: + - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml + ports: + - "1777:1777" # pprof extension + - "8887:8888" # Prometheus metrics exposed by the agent + - "14268:14268" # Jaeger receiver + - "55678" # OpenCensus receiver + - "55680:55680" # OTLP HTTP/2.0 legacy port + - "55681:55681" # OTLP HTTP/1.0 receiver + - "4317:4317" # OTLP GRPC receiver + - "55679:55679" # zpages extension + - "13133" # health_check + deploy: + mode: replicated + replicas: 3 + depends_on: + - clickhouse + + otel-collector-hostmetrics: + image: signoz/otelcontribcol:0.4.0 + command: ["--config=/etc/otel-collector-config-hostmetrics.yaml", "--mem-ballast-size-mib=683"] + volumes: + - ./otel-collector-config-hostmetrics.yaml:/etc/otel-collector-config-hostmetrics.yaml + depends_on: + - clickhouse + + + hotrod: + image: jaegertracing/example-hotrod:latest + container_name: hotrod + ports: + - "9000:8080" + command: ["all"] + environment: + - JAEGER_ENDPOINT=http://otel-collector:14268/api/traces + + + load-hotrod: + image: "grubykarol/locust:1.2.3-python3.9-alpine3.12" + container_name: load-hotrod + hostname: load-hotrod + ports: + - "8089:8089" + environment: + ATTACKED_HOST: http://hotrod:8080 + LOCUST_MODE: standalone + NO_PROXY: standalone + TASK_DELAY_FROM: 5 + TASK_DELAY_TO: 30 + QUIET_MODE: "${QUIET_MODE:-false}" + LOCUST_OPTS: "--headless -u 10 -r 1" + volumes: + - ../common/locust-scripts:/locust \ No newline at end of file diff --git a/deploy/docker-swarm/clickhouse-setup/docker-entrypoint-initdb.d/init-db.sql b/deploy/docker-swarm/clickhouse-setup/docker-entrypoint-initdb.d/init-db.sql new file mode 100644 index 0000000000..5aa7c0f7b2 --- /dev/null +++ b/deploy/docker-swarm/clickhouse-setup/docker-entrypoint-initdb.d/init-db.sql @@ -0,0 +1,27 @@ +CREATE TABLE IF NOT EXISTS signoz_index ( + timestamp DateTime64(9) CODEC(Delta, ZSTD(1)), + traceID String CODEC(ZSTD(1)), + spanID String CODEC(ZSTD(1)), + parentSpanID String CODEC(ZSTD(1)), + serviceName LowCardinality(String) CODEC(ZSTD(1)), + name LowCardinality(String) CODEC(ZSTD(1)), + kind Int32 CODEC(ZSTD(1)), + durationNano UInt64 CODEC(ZSTD(1)), + tags Array(String) CODEC(ZSTD(1)), + tagsKeys Array(String) CODEC(ZSTD(1)), + tagsValues Array(String) CODEC(ZSTD(1)), + statusCode Int64 CODEC(ZSTD(1)), + references String CODEC(ZSTD(1)), + externalHttpMethod Nullable(String) CODEC(ZSTD(1)), + externalHttpUrl Nullable(String) CODEC(ZSTD(1)), + component Nullable(String) CODEC(ZSTD(1)), + dbSystem Nullable(String) CODEC(ZSTD(1)), + dbName Nullable(String) CODEC(ZSTD(1)), + dbOperation Nullable(String) CODEC(ZSTD(1)), + peerService Nullable(String) CODEC(ZSTD(1)), + INDEX idx_tagsKeys tagsKeys TYPE bloom_filter(0.01) GRANULARITY 64, + INDEX idx_tagsValues tagsValues TYPE bloom_filter(0.01) GRANULARITY 64, + INDEX idx_duration durationNano TYPE minmax GRANULARITY 1 +) ENGINE MergeTree() +PARTITION BY toDate(timestamp) +ORDER BY (serviceName, -toUnixTimestamp(timestamp)) \ No newline at end of file diff --git a/deploy/docker-swarm/clickhouse-setup/otel-collector-config-hostmetrics.yaml b/deploy/docker-swarm/clickhouse-setup/otel-collector-config-hostmetrics.yaml new file mode 100644 index 0000000000..28033e9f2e --- /dev/null +++ b/deploy/docker-swarm/clickhouse-setup/otel-collector-config-hostmetrics.yaml @@ -0,0 +1,72 @@ +receivers: + otlp: + protocols: + grpc: + http: + jaeger: + protocols: + grpc: + thrift_http: + + hostmetrics: + collection_interval: 60s + scrapers: + cpu: + load: + memory: + disk: + filesystem: + network: + + # Data sources: metrics + prometheus: + config: + scrape_configs: + - job_name: "otel-collector" + dns_sd_configs: + - names: + - 'tasks.signoz_otel-collector' + type: 'A' + port: 8888 + - job_name: "otel-collector-hostmetrics" + scrape_interval: 10s + static_configs: + - targets: ["otel-collector-hostmetrics:8888"] +processors: + batch: + send_batch_size: 1000 + timeout: 10s + memory_limiter: + # Same as --mem-ballast-size-mib CLI argument + ballast_size_mib: 683 + # 80% of maximum memory up to 2G + limit_mib: 1500 + # 25% of limit up to 2G + spike_limit_mib: 512 + check_interval: 5s + # queued_retry: + # num_workers: 4 + # queue_size: 100 + # retry_on_failure: true +extensions: + health_check: {} + zpages: {} +exporters: + clickhouse: + datasource: tcp://clickhouse:9000 + clickhousemetricswrite: + endpoint: tcp://clickhouse:9000/?database=signoz_metrics + resource_to_telemetry_conversion: + enabled: true + +service: + extensions: [health_check, zpages] + pipelines: + traces: + receivers: [jaeger, otlp] + processors: [batch] + exporters: [clickhouse] + metrics: + receivers: [otlp, prometheus, hostmetrics] + processors: [batch] + exporters: [clickhousemetricswrite] \ No newline at end of file diff --git a/deploy/docker-swarm/clickhouse-setup/otel-collector-config.yaml b/deploy/docker-swarm/clickhouse-setup/otel-collector-config.yaml new file mode 100644 index 0000000000..7d41a4cb83 --- /dev/null +++ b/deploy/docker-swarm/clickhouse-setup/otel-collector-config.yaml @@ -0,0 +1,47 @@ +receivers: + otlp: + protocols: + grpc: + http: + jaeger: + protocols: + grpc: + thrift_http: +processors: + batch: + send_batch_size: 1000 + timeout: 10s + memory_limiter: + # Same as --mem-ballast-size-mib CLI argument + ballast_size_mib: 683 + # 80% of maximum memory up to 2G + limit_mib: 1500 + # 25% of limit up to 2G + spike_limit_mib: 512 + check_interval: 5s + # queued_retry: + # num_workers: 4 + # queue_size: 100 + # retry_on_failure: true +extensions: + health_check: {} + zpages: {} +exporters: + clickhouse: + datasource: tcp://clickhouse:9000 + clickhousemetricswrite: + endpoint: tcp://clickhouse:9000/?database=signoz_metrics + resource_to_telemetry_conversion: + enabled: true + +service: + extensions: [health_check, zpages] + pipelines: + traces: + receivers: [jaeger, otlp] + processors: [batch] + exporters: [clickhouse] + metrics: + receivers: [otlp] + processors: [batch] + exporters: [clickhousemetricswrite] \ No newline at end of file diff --git a/deploy/docker-swarm/clickhouse-setup/prometheus.yml b/deploy/docker-swarm/clickhouse-setup/prometheus.yml new file mode 100644 index 0000000000..7d04428a42 --- /dev/null +++ b/deploy/docker-swarm/clickhouse-setup/prometheus.yml @@ -0,0 +1,25 @@ +# my global config +global: + scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + +# Alertmanager configuration +alerting: + alertmanagers: + - static_configs: + - targets: + # - alertmanager:9093 + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + # - "first_rules.yml" + # - "second_rules.yml" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + + +remote_read: + - url: tcp://clickhouse:9000/?database=signoz_metrics diff --git a/deploy/docker-swarm/common/locust-scripts/locustfile.py b/deploy/docker-swarm/common/locust-scripts/locustfile.py new file mode 100644 index 0000000000..0b518208cd --- /dev/null +++ b/deploy/docker-swarm/common/locust-scripts/locustfile.py @@ -0,0 +1,16 @@ +from locust import HttpUser, task, between +class UserTasks(HttpUser): + wait_time = between(5, 15) + + @task + def rachel(self): + self.client.get("/dispatch?customer=123&nonse=0.6308392664170006") + @task + def trom(self): + self.client.get("/dispatch?customer=392&nonse=0.015296363321630757") + @task + def japanese(self): + self.client.get("/dispatch?customer=731&nonse=0.8022286220408668") + @task + def coffee(self): + self.client.get("/dispatch?customer=567&nonse=0.0022220379420636593") diff --git a/deploy/docker-swarm/common/nginx-config.conf b/deploy/docker-swarm/common/nginx-config.conf new file mode 100644 index 0000000000..881cdc77cb --- /dev/null +++ b/deploy/docker-swarm/common/nginx-config.conf @@ -0,0 +1,30 @@ +server { + listen 3000; + server_name _; + + gzip on; + gzip_static on; + gzip_types text/plain text/css application/json application/x-javascript text/xml application/xml application/xml+rss text/javascript; + gzip_proxied any; + gzip_vary on; + gzip_comp_level 6; + gzip_buffers 16 8k; + gzip_http_version 1.1; + + location / { + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html; + } + location /api { + proxy_pass http://query-service:8080/api; + + } + + # redirect server error pages to the static page /50x.html + # + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } +} \ No newline at end of file diff --git a/deploy/docker-swarm/dashboards/.gitkeep b/deploy/docker-swarm/dashboards/.gitkeep new file mode 100644 index 0000000000..e69de29bb2