chore(clickhouse): 🔧 include cluster.xml for distributed set up (#1810)

* chore(clickhouse): 🔧 include cluster.xml for distributed set up Signed-off-by: Prashant Shahi <prashant@signoz.io>
2025-08-12 03:39:02 +08:00 · 2022-12-05 17:26:13 +05:30 · 2022-12-05 17:26:13 +05:30 · 8bb3eefeb5
commit 8bb3eefeb5
parent a46f074e22
4 changed files with 225 additions and 250 deletions
--- a/deploy/docker-swarm/clickhouse-setup/clickhouse-cluster.xml
+++ b/deploy/docker-swarm/clickhouse-setup/clickhouse-cluster.xml
@ -0,0 +1,75 @@
+<?xml version="1.0"?>
+<clickhouse>
+    <!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
+         Optional. If you don't use replicated tables, you could omit that.
+
+         See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/
+      -->
+    <zookeeper>
+        <node index="1">
+            <host>zookeeper-1</host>
+            <port>2181</port>
+        </node>
+        <!-- <node index="2">
+            <host>zookeeper-2</host>
+            <port>2181</port>
+        </node>
+        <node index="3">
+            <host>zookeeper-3</host>
+            <port>2181</port>
+        </node> -->
+    </zookeeper>
+
+    <!-- Configuration of clusters that could be used in Distributed tables.
+         https://clickhouse.com/docs/en/operations/table_engines/distributed/
+      -->
+    <remote_servers>
+        <cluster>
+            <!-- Inter-server per-cluster secret for Distributed queries
+                 default: no secret (no authentication will be performed)
+
+                 If set, then Distributed queries will be validated on shards, so at least:
+                 - such cluster should exist on the shard,
+                 - such cluster should have the same secret.
+
+                 And also (and which is more important), the initial_user will
+                 be used as current user for the query.
+
+                 Right now the protocol is pretty simple and it only takes into account:
+                 - cluster name
+                 - query
+
+                 Also it will be nice if the following will be implemented:
+                 - source hostname (see interserver_http_host), but then it will depends from DNS,
+                   it can use IP address instead, but then the you need to get correct on the initiator node.
+                 - target hostname / ip address (same notes as for source hostname)
+                 - time-based security tokens
+            -->
+            <!-- <secret></secret> -->
+            <shard>
+                <!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
+                <!-- <internal_replication>false</internal_replication> -->
+                <!-- Optional. Shard weight when writing data. Default: 1. -->
+                <!-- <weight>1</weight> -->
+                <replica>
+                    <host>clickhouse</host>
+                    <port>9000</port>
+                    <!-- Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority). -->
+                    <!-- <priority>1</priority> -->
+                </replica>
+            </shard>
+            <!-- <shard>
+                <replica>
+                    <host>clickhouse-2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>clickhouse-3</host>
+                    <port>9000</port>
+                </replica>
+            </shard> -->
+        </cluster>
+    </remote_servers>
+</clickhouse>
--- a/deploy/docker/clickhouse-setup/clickhouse-cluster.xml
+++ b/deploy/docker/clickhouse-setup/clickhouse-cluster.xml
@ -0,0 +1,75 @@
+<?xml version="1.0"?>
+<clickhouse>
+    <!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
+         Optional. If you don't use replicated tables, you could omit that.
+
+         See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/
+      -->
+    <zookeeper>
+        <node index="1">
+            <host>zookeeper-1</host>
+            <port>2181</port>
+        </node>
+        <!-- <node index="2">
+            <host>zookeeper-2</host>
+            <port>2181</port>
+        </node>
+        <node index="3">
+            <host>zookeeper-3</host>
+            <port>2181</port>
+        </node> -->
+    </zookeeper>
+
+    <!-- Configuration of clusters that could be used in Distributed tables.
+         https://clickhouse.com/docs/en/operations/table_engines/distributed/
+      -->
+    <remote_servers>
+        <cluster>
+            <!-- Inter-server per-cluster secret for Distributed queries
+                 default: no secret (no authentication will be performed)
+
+                 If set, then Distributed queries will be validated on shards, so at least:
+                 - such cluster should exist on the shard,
+                 - such cluster should have the same secret.
+
+                 And also (and which is more important), the initial_user will
+                 be used as current user for the query.
+
+                 Right now the protocol is pretty simple and it only takes into account:
+                 - cluster name
+                 - query
+
+                 Also it will be nice if the following will be implemented:
+                 - source hostname (see interserver_http_host), but then it will depends from DNS,
+                   it can use IP address instead, but then the you need to get correct on the initiator node.
+                 - target hostname / ip address (same notes as for source hostname)
+                 - time-based security tokens
+            -->
+            <!-- <secret></secret> -->
+            <shard>
+                <!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
+                <!-- <internal_replication>false</internal_replication> -->
+                <!-- Optional. Shard weight when writing data. Default: 1. -->
+                <!-- <weight>1</weight> -->
+                <replica>
+                    <host>clickhouse</host>
+                    <port>9000</port>
+                    <!-- Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority). -->
+                    <!-- <priority>1</priority> -->
+                </replica>
+            </shard>
+            <!-- <shard>
+                <replica>
+                    <host>clickhouse-2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>clickhouse-3</host>
+                    <port>9000</port>
+                </replica>
+            </shard> -->
+        </cluster>
+    </remote_servers>
+</clickhouse>
--- a/deploy/docker/clickhouse-setup/docker-compose-distributed.yaml
+++ b/deploy/docker/clickhouse-setup/docker-compose-distributed.yaml
@ -1,250 +0,0 @@
-version: "2.4"
-
-x-clickhouse-defaults: &clickhouse-defaults
-  restart: on-failure
-  image: clickhouse/clickhouse-server:22.8.8-alpine
-  tty: true
-  depends_on:
-    - zookeeper-1
-    - zookeeper-2
-    - zookeeper-3
-  logging:
-    options:
-      max-size: 50m
-      max-file: "3"
-  healthcheck:
-    # "clickhouse", "client", "-u ${CLICKHOUSE_USER}", "--password ${CLICKHOUSE_PASSWORD}", "-q 'SELECT 1'"
-    test: ["CMD", "wget", "--spider", "-q", "localhost:8123/ping"]
-    interval: 30s
-    timeout: 5s
-    retries: 3
-  ulimits:
-    nproc: 65535
-    nofile:
-      soft: 262144
-      hard: 262144
-
-x-clickhouse-depend: &clickhouse-depend
-  depends_on:
-    clickhouse:
-      condition: service_healthy
-    clickhouse-2:
-      condition: service_healthy
-    clickhouse-3:
-      condition: service_healthy
-
-services:
-  
-  zookeeper-1:
-    image: bitnami/zookeeper:3.7.0
-    container_name: zookeeper-1
-    hostname: zookeeper-1
-    user: root
-    ports:
-      - "2181:2181"
-      - "2888:2888"
-      - "3888:3888"
-    volumes:
-      - ./data/zookeeper-1:/bitnami/zookeeper
-    environment:
-      - ZOO_SERVER_ID=1
-      - ZOO_SERVERS=0.0.0.0:2888:3888,zookeeper-2:2888:3888,zookeeper-3:2888:3888
-      - ALLOW_ANONYMOUS_LOGIN=yes
-      - ZOO_AUTOPURGE_INTERVAL=1
-
-  zookeeper-2:
-    image: bitnami/zookeeper:3.7.0
-    container_name: zookeeper-2
-    hostname: zookeeper-2
-    user: root
-    ports:
-      - "2182:2181"
-      - "2889:2888"
-      - "3889:3888"
-    volumes:
-      - ./data/zookeeper-2:/bitnami/zookeeper
-    environment:
-      - ZOO_SERVER_ID=2
-      - ZOO_SERVERS=zookeeper-1:2888:3888,0.0.0.0:2888:3888,zookeeper-3:2888:3888
-      - ALLOW_ANONYMOUS_LOGIN=yes
-      - ZOO_AUTOPURGE_INTERVAL=1
-
-  zookeeper-3:
-    image: bitnami/zookeeper:3.7.0
-    container_name: zookeeper-3
-    hostname: zookeeper-3
-    user: root
-    ports:
-      - "2183:2181"
-      - "2890:2888"
-      - "3890:3888"
-    volumes:
-      - ./data/zookeeper-3:/bitnami/zookeeper
-    environment:
-      - ZOO_SERVER_ID=3
-      - ZOO_SERVERS=zookeeper-1:2888:3888,zookeeper-2:2888:3888,0.0.0.0:2888:3888
-      - ALLOW_ANONYMOUS_LOGIN=yes
-      - ZOO_AUTOPURGE_INTERVAL=1
-
-  clickhouse:
-    <<: *clickhouse-defaults
-    container_name: clickhouse
-    hostname: clickhouse
-    ports:
-      - "9000:9000"
-      - "8123:8123"
-      - "9181:9181"
-    volumes:
-      - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml
-      - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml
-      - ./clickhouse-distributed.xml:/etc/clickhouse-server/config.d/distributed.xml
-      # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml
-      - ./data/clickhouse/:/var/lib/clickhouse/
-
-
-  clickhouse-2:
-    <<: *clickhouse-defaults
-    container_name: clickhouse-2
-    hostname: clickhouse-2
-    ports:
-      - "9001:9000"
-      - "8124:8123"
-      - "9182:9181"
-    volumes:
-      - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml
-      - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml
-      - ./clickhouse-distributed.xml:/etc/clickhouse-server/config.d/distributed.xml
-      # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml
-      - ./data/clickhouse-2/:/var/lib/clickhouse/
-
-  clickhouse-3:
-    <<: *clickhouse-defaults
-    container_name: clickhouse-3
-    hostname: clickhouse-3
-    ports:
-      - "9002:9000"
-      - "8125:8123"
-      - "9183:9181"
-    volumes:
-      - ./clickhouse-config.xml:/etc/clickhouse-server/config.xml
-      - ./clickhouse-users.xml:/etc/clickhouse-server/users.xml
-      - ./clickhouse-distributed.xml:/etc/clickhouse-server/config.d/distributed.xml
-      # - ./clickhouse-storage.xml:/etc/clickhouse-server/config.d/storage.xml
-      - ./data/clickhouse-3/:/var/lib/clickhouse/
-
-  alertmanager:
-    image: signoz/alertmanager:0.23.0-0.2
-    volumes:
-      - ./data/alertmanager:/data
-    depends_on:
-      query-service:
-        condition: service_healthy
-    restart: on-failure
-    command:
-      - --queryService.url=http://query-service:8085
-      - --storage.path=/data
-
-# Notes for Maintainers/Contributors who will change Line Numbers of Frontend & Query-Section. Please Update Line Numbers in `./scripts/commentLinesForSetup.sh` & `./CONTRIBUTING.md`
-
-  query-service:
-    image: signoz/query-service:distributed-test-1
-    container_name: query-service
-    command: ["-config=/root/config/prometheus.yml"]
-    # ports:
-    #   - "6060:6060"     # pprof port
-    #   - "8080:8080"     # query-service port
-    volumes:
-      - ./prometheus.yml:/root/config/prometheus.yml
-      - ../dashboards:/root/config/dashboards
-      - ./data/signoz/:/var/lib/signoz/
-    environment:
-      - ClickHouseUrl=tcp://clickhouse:9000/?database=signoz_traces
-      - ALERTMANAGER_API_PREFIX=http://alertmanager:9093/api/
-      - SIGNOZ_LOCAL_DB_PATH=/var/lib/signoz/signoz.db
-      - DASHBOARDS_PATH=/root/config/dashboards
-      - STORAGE=clickhouse
-      - GODEBUG=netdns=go
-      - TELEMETRY_ENABLED=true
-      - DEPLOYMENT_TYPE=docker-standalone-amd
-    restart: on-failure
-    healthcheck:
-      test: ["CMD", "wget", "--spider", "-q", "localhost:8080/api/v1/version"]
-      interval: 30s
-      timeout: 5s
-      retries: 3
-    <<: *clickhouse-depend
-
-  frontend:
-    image: signoz/frontend:distributed-test-1
-    container_name: frontend
-    restart: on-failure
-    depends_on:
-      - alertmanager
-      - query-service
-    ports:
-      - "3301:3301"
-    volumes:
-      - ../common/nginx-config.conf:/etc/nginx/conf.d/default.conf
-
-  otel-collector:
-    image: signoz/signoz-otel-collector:distributed-test-1
-    command: ["--config=/etc/otel-collector-config.yaml"]
-  # required for reading docker container logs
-    volumes:
-      - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml
-      - /var/lib/docker/containers:/var/lib/docker/containers:ro
-    environment:
-      - OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux
-    ports:
-      # - "1777:1777"     # pprof extension
-      - "4317:4317"     # OTLP gRPC receiver
-      - "4318:4318"     # OTLP HTTP receiver
-      # - "8888:8888"     # OtelCollector internal metrics
-      # - "8889:8889"     # signoz spanmetrics exposed by the agent
-      # - "9411:9411"     # Zipkin port
-      # - "13133:13133"   # health check extension
-      # - "14250:14250"   # Jaeger gRPC
-      # - "14268:14268"   # Jaeger thrift HTTP
-      # - "55678:55678"   # OpenCensus receiver
-      # - "55679:55679"   # zPages extension
-    restart: on-failure
-    <<: *clickhouse-depend
-
-  otel-collector-metrics:
-    image: signoz/signoz-otel-collector:distributed-test-1
-    command: ["--config=/etc/otel-collector-metrics-config.yaml"]
-    volumes:
-      - ./otel-collector-metrics-config.yaml:/etc/otel-collector-metrics-config.yaml
-    # ports:
-    #   - "1777:1777"     # pprof extension
-    #   - "8888:8888"     # OtelCollector internal metrics
-    #   - "13133:13133"   # Health check extension
-    #   - "55679:55679"   # zPages extension
-    restart: on-failure
-    <<: *clickhouse-depend
-
-  hotrod:
-   image: jaegertracing/example-hotrod:1.30
-   container_name: hotrod
-   logging:
-     options:
-       max-size: 50m
-       max-file: "3"
-   command: ["all"]
-   environment:
-     - JAEGER_ENDPOINT=http://otel-collector:14268/api/traces
-
-  load-hotrod:
-    image: "grubykarol/locust:1.2.3-python3.9-alpine3.12"
-    container_name: load-hotrod
-    hostname: load-hotrod
-    environment:
-      ATTACKED_HOST: http://hotrod:8080
-      LOCUST_MODE: standalone
-      NO_PROXY: standalone
-      TASK_DELAY_FROM: 5
-      TASK_DELAY_TO: 30
-      QUIET_MODE: "${QUIET_MODE:-false}"
-      LOCUST_OPTS: "--headless -u 10 -r 1"
-    volumes:
-      - ../common/locust-scripts:/locust
--- a/pkg/query-service/tests/test-deploy/clickhouse-cluster.xml
+++ b/pkg/query-service/tests/test-deploy/clickhouse-cluster.xml
@ -0,0 +1,75 @@
+<?xml version="1.0"?>
+<clickhouse>
+    <!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
+         Optional. If you don't use replicated tables, you could omit that.
+
+         See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/
+      -->
+    <zookeeper>
+        <node index="1">
+            <host>zookeeper-1</host>
+            <port>2181</port>
+        </node>
+        <!-- <node index="2">
+            <host>zookeeper-2</host>
+            <port>2181</port>
+        </node>
+        <node index="3">
+            <host>zookeeper-3</host>
+            <port>2181</port>
+        </node> -->
+    </zookeeper>
+
+    <!-- Configuration of clusters that could be used in Distributed tables.
+         https://clickhouse.com/docs/en/operations/table_engines/distributed/
+      -->
+    <remote_servers>
+        <cluster>
+            <!-- Inter-server per-cluster secret for Distributed queries
+                 default: no secret (no authentication will be performed)
+
+                 If set, then Distributed queries will be validated on shards, so at least:
+                 - such cluster should exist on the shard,
+                 - such cluster should have the same secret.
+
+                 And also (and which is more important), the initial_user will
+                 be used as current user for the query.
+
+                 Right now the protocol is pretty simple and it only takes into account:
+                 - cluster name
+                 - query
+
+                 Also it will be nice if the following will be implemented:
+                 - source hostname (see interserver_http_host), but then it will depends from DNS,
+                   it can use IP address instead, but then the you need to get correct on the initiator node.
+                 - target hostname / ip address (same notes as for source hostname)
+                 - time-based security tokens
+            -->
+            <!-- <secret></secret> -->
+            <shard>
+                <!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
+                <!-- <internal_replication>false</internal_replication> -->
+                <!-- Optional. Shard weight when writing data. Default: 1. -->
+                <!-- <weight>1</weight> -->
+                <replica>
+                    <host>clickhouse</host>
+                    <port>9000</port>
+                    <!-- Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority). -->
+                    <!-- <priority>1</priority> -->
+                </replica>
+            </shard>
+            <!-- <shard>
+                <replica>
+                    <host>clickhouse-2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>clickhouse-3</host>
+                    <port>9000</port>
+                </replica>
+            </shard> -->
+        </cluster>
+    </remote_servers>
+</clickhouse>