首次推送
This commit is contained in:
17
signoz/.env
Normal file
17
signoz/.env
Normal file
@@ -0,0 +1,17 @@
|
||||
# bitnami/zookeeper
|
||||
IMG_ZOOKEEPER=3.7.1
|
||||
# clickhouse/clickhouse-server
|
||||
IMG_CLICKHOUSE=24.1.2-alpine
|
||||
# signoz/alertmanager(告警)
|
||||
IMG_ALERTMANAGER=0.23.7
|
||||
# signoz/query-service(查询)
|
||||
IMG_QUERY_SERVICE=0.60.0
|
||||
# signoz/frontend(GUI)
|
||||
IMG_FRONTEND=0.60.0
|
||||
# signoz/locust(性能测试)
|
||||
IMG_LOCUST=1.2.3
|
||||
# signoz/signoz-otel-collector(otel 数据采集)
|
||||
IMG_OTEL_COLLECTOR=0.111.13
|
||||
# signoz/signoz-schema-migrator(数据库升级)
|
||||
IMG_MIGRATOR=0.111.13
|
||||
|
||||
0
signoz/README.md
Normal file
0
signoz/README.md
Normal file
75
signoz/clickhouse-setup/clickhouse-cluster.xml
Normal file
75
signoz/clickhouse-setup/clickhouse-cluster.xml
Normal file
@@ -0,0 +1,75 @@
|
||||
<?xml version="1.0"?>
|
||||
<clickhouse>
|
||||
<!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
|
||||
Optional. If you don't use replicated tables, you could omit that.
|
||||
|
||||
See https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/
|
||||
-->
|
||||
<zookeeper>
|
||||
<node index="1">
|
||||
<host>zookeeper</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<!-- <node index="2">
|
||||
<host>zookeeper-2</host>
|
||||
<port>2181</port>
|
||||
</node>
|
||||
<node index="3">
|
||||
<host>zookeeper-3</host>
|
||||
<port>2181</port>
|
||||
</node> -->
|
||||
</zookeeper>
|
||||
|
||||
<!-- Configuration of clusters that could be used in Distributed tables.
|
||||
https://clickhouse.com/docs/en/operations/table_engines/distributed/
|
||||
-->
|
||||
<remote_servers>
|
||||
<cluster>
|
||||
<!-- Inter-server per-cluster secret for Distributed queries
|
||||
default: no secret (no authentication will be performed)
|
||||
|
||||
If set, then Distributed queries will be validated on shards, so at least:
|
||||
- such cluster should exist on the shard,
|
||||
- such cluster should have the same secret.
|
||||
|
||||
And also (and which is more important), the initial_user will
|
||||
be used as current user for the query.
|
||||
|
||||
Right now the protocol is pretty simple and it only takes into account:
|
||||
- cluster name
|
||||
- query
|
||||
|
||||
Also it will be nice if the following will be implemented:
|
||||
- source hostname (see interserver_http_host), but then it will depends from DNS,
|
||||
it can use IP address instead, but then the you need to get correct on the initiator node.
|
||||
- target hostname / ip address (same notes as for source hostname)
|
||||
- time-based security tokens
|
||||
-->
|
||||
<!-- <secret></secret> -->
|
||||
<shard>
|
||||
<!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
|
||||
<!-- <internal_replication>false</internal_replication> -->
|
||||
<!-- Optional. Shard weight when writing data. Default: 1. -->
|
||||
<!-- <weight>1</weight> -->
|
||||
<replica>
|
||||
<host>clickhouse</host>
|
||||
<port>9000</port>
|
||||
<!-- Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority). -->
|
||||
<!-- <priority>1</priority> -->
|
||||
</replica>
|
||||
</shard>
|
||||
<!-- <shard>
|
||||
<replica>
|
||||
<host>clickhouse-2</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>clickhouse-3</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard> -->
|
||||
</cluster>
|
||||
</remote_servers>
|
||||
</clickhouse>
|
||||
1143
signoz/clickhouse-setup/clickhouse-config.xml
Normal file
1143
signoz/clickhouse-setup/clickhouse-config.xml
Normal file
File diff suppressed because it is too large
Load Diff
123
signoz/clickhouse-setup/clickhouse-users.xml
Normal file
123
signoz/clickhouse-setup/clickhouse-users.xml
Normal file
@@ -0,0 +1,123 @@
|
||||
<?xml version="1.0"?>
|
||||
<clickhouse>
|
||||
<!-- See also the files in users.d directory where the settings can be overridden. -->
|
||||
|
||||
<!-- Profiles of settings. -->
|
||||
<profiles>
|
||||
<!-- Default settings. -->
|
||||
<default>
|
||||
<!-- Maximum memory usage for processing single query, in bytes. -->
|
||||
<max_memory_usage>10000000000</max_memory_usage>
|
||||
|
||||
<!-- How to choose between replicas during distributed query processing.
|
||||
random - choose random replica from set of replicas with minimum number of errors
|
||||
nearest_hostname - from set of replicas with minimum number of errors, choose replica
|
||||
with minimum number of different symbols between replica's hostname and local hostname
|
||||
(Hamming distance).
|
||||
in_order - first live replica is chosen in specified order.
|
||||
first_or_random - if first replica one has higher number of errors, pick a random one from replicas with minimum number of errors.
|
||||
-->
|
||||
<load_balancing>random</load_balancing>
|
||||
</default>
|
||||
|
||||
<!-- Profile that allows only read queries. -->
|
||||
<readonly>
|
||||
<readonly>1</readonly>
|
||||
</readonly>
|
||||
</profiles>
|
||||
|
||||
<!-- Users and ACL. -->
|
||||
<users>
|
||||
<!-- If user name was not specified, 'default' user is used. -->
|
||||
<default>
|
||||
<!-- See also the files in users.d directory where the password can be overridden.
|
||||
|
||||
Password could be specified in plaintext or in SHA256 (in hex format).
|
||||
|
||||
If you want to specify password in plaintext (not recommended), place it in 'password' element.
|
||||
Example: <password>qwerty</password>.
|
||||
Password could be empty.
|
||||
|
||||
If you want to specify SHA256, place it in 'password_sha256_hex' element.
|
||||
Example: <password_sha256_hex>65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5</password_sha256_hex>
|
||||
Restrictions of SHA256: impossibility to connect to ClickHouse using MySQL JS client (as of July 2019).
|
||||
|
||||
If you want to specify double SHA1, place it in 'password_double_sha1_hex' element.
|
||||
Example: <password_double_sha1_hex>e395796d6546b1b65db9d665cd43f0e858dd4303</password_double_sha1_hex>
|
||||
|
||||
If you want to specify a previously defined LDAP server (see 'ldap_servers' in the main config) for authentication,
|
||||
place its name in 'server' element inside 'ldap' element.
|
||||
Example: <ldap><server>my_ldap_server</server></ldap>
|
||||
|
||||
If you want to authenticate the user via Kerberos (assuming Kerberos is enabled, see 'kerberos' in the main config),
|
||||
place 'kerberos' element instead of 'password' (and similar) elements.
|
||||
The name part of the canonical principal name of the initiator must match the user name for authentication to succeed.
|
||||
You can also place 'realm' element inside 'kerberos' element to further restrict authentication to only those requests
|
||||
whose initiator's realm matches it.
|
||||
Example: <kerberos />
|
||||
Example: <kerberos><realm>EXAMPLE.COM</realm></kerberos>
|
||||
|
||||
How to generate decent password:
|
||||
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-'
|
||||
In first line will be password and in second - corresponding SHA256.
|
||||
|
||||
How to generate double SHA1:
|
||||
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-'
|
||||
In first line will be password and in second - corresponding double SHA1.
|
||||
-->
|
||||
<password></password>
|
||||
|
||||
<!-- List of networks with open access.
|
||||
|
||||
To open access from everywhere, specify:
|
||||
<ip>::/0</ip>
|
||||
|
||||
To open access only from localhost, specify:
|
||||
<ip>::1</ip>
|
||||
<ip>127.0.0.1</ip>
|
||||
|
||||
Each element of list has one of the following forms:
|
||||
<ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
|
||||
2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
|
||||
<host> Hostname. Example: server01.clickhouse.com.
|
||||
To check access, DNS query is performed, and all received addresses compared to peer address.
|
||||
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$
|
||||
To check access, DNS PTR query is performed for peer address and then regexp is applied.
|
||||
Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
|
||||
Strongly recommended that regexp is ends with $
|
||||
All results of DNS requests are cached till server restart.
|
||||
-->
|
||||
<networks>
|
||||
<ip>::/0</ip>
|
||||
</networks>
|
||||
|
||||
<!-- Settings profile for user. -->
|
||||
<profile>default</profile>
|
||||
|
||||
<!-- Quota for user. -->
|
||||
<quota>default</quota>
|
||||
|
||||
<!-- User can create other users and grant rights to them. -->
|
||||
<!-- <access_management>1</access_management> -->
|
||||
</default>
|
||||
</users>
|
||||
|
||||
<!-- Quotas. -->
|
||||
<quotas>
|
||||
<!-- Name of quota. -->
|
||||
<default>
|
||||
<!-- Limits for time interval. You could specify many intervals with different limits. -->
|
||||
<interval>
|
||||
<!-- Length of interval. -->
|
||||
<duration>3600</duration>
|
||||
|
||||
<!-- No limits. Just calculate resource usage for time interval. -->
|
||||
<queries>0</queries>
|
||||
<errors>0</errors>
|
||||
<result_rows>0</result_rows>
|
||||
<read_rows>0</read_rows>
|
||||
<execution_time>0</execution_time>
|
||||
</interval>
|
||||
</default>
|
||||
</quotas>
|
||||
</clickhouse>
|
||||
21
signoz/clickhouse-setup/custom-function.xml
Normal file
21
signoz/clickhouse-setup/custom-function.xml
Normal file
@@ -0,0 +1,21 @@
|
||||
<functions>
|
||||
<function>
|
||||
<type>executable</type>
|
||||
<name>histogramQuantile</name>
|
||||
<return_type>Float64</return_type>
|
||||
<argument>
|
||||
<type>Array(Float64)</type>
|
||||
<name>buckets</name>
|
||||
</argument>
|
||||
<argument>
|
||||
<type>Array(Float64)</type>
|
||||
<name>counts</name>
|
||||
</argument>
|
||||
<argument>
|
||||
<type>Float64</type>
|
||||
<name>quantile</name>
|
||||
</argument>
|
||||
<format>CSV</format>
|
||||
<command>./histogramQuantile</command>
|
||||
</function>
|
||||
</functions>
|
||||
64
signoz/clickhouse-setup/keeper_config.xml
Normal file
64
signoz/clickhouse-setup/keeper_config.xml
Normal file
@@ -0,0 +1,64 @@
|
||||
<clickhouse>
|
||||
<logger>
|
||||
<!-- Possible levels [1]:
|
||||
|
||||
- none (turns off logging)
|
||||
- fatal
|
||||
- critical
|
||||
- error
|
||||
- warning
|
||||
- notice
|
||||
- information
|
||||
- debug
|
||||
- trace
|
||||
|
||||
[1]: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105-L114
|
||||
-->
|
||||
<level>information</level>
|
||||
<log>/var/log/clickhouse-keeper/clickhouse-keeper.log</log>
|
||||
<errorlog>/var/log/clickhouse-keeper/clickhouse-keeper.err.log</errorlog>
|
||||
<!-- Rotation policy
|
||||
See https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/FileChannel.h#L54-L85
|
||||
-->
|
||||
<size>1000M</size>
|
||||
<count>10</count>
|
||||
<!-- <console>1</console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
|
||||
</logger>
|
||||
|
||||
<listen_host>0.0.0.0</listen_host>
|
||||
<max_connections>4096</max_connections>
|
||||
|
||||
<keeper_server>
|
||||
<tcp_port>9181</tcp_port>
|
||||
|
||||
<!-- Must be unique among all keeper serves -->
|
||||
<server_id>1</server_id>
|
||||
|
||||
<log_storage_path>/var/lib/clickhouse/coordination/logs</log_storage_path>
|
||||
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
|
||||
|
||||
<coordination_settings>
|
||||
<operation_timeout_ms>10000</operation_timeout_ms>
|
||||
<min_session_timeout_ms>10000</min_session_timeout_ms>
|
||||
<session_timeout_ms>100000</session_timeout_ms>
|
||||
<raft_logs_level>information</raft_logs_level>
|
||||
<compress_logs>false</compress_logs>
|
||||
<!-- All settings listed in https://github.com/ClickHouse/ClickHouse/blob/master/src/Coordination/CoordinationSettings.h -->
|
||||
</coordination_settings>
|
||||
|
||||
<!-- enable sanity hostname checks for cluster configuration (e.g. if localhost is used with remote endpoints) -->
|
||||
<hostname_checks_enabled>true</hostname_checks_enabled>
|
||||
<raft_configuration>
|
||||
<server>
|
||||
<id>1</id>
|
||||
|
||||
<!-- Internal port and hostname -->
|
||||
<hostname>clickhouses-keeper-1</hostname>
|
||||
<port>9234</port>
|
||||
</server>
|
||||
|
||||
<!-- Add more servers here -->
|
||||
|
||||
</raft_configuration>
|
||||
</keeper_server>
|
||||
</clickhouse>
|
||||
116
signoz/clickhouse-setup/otel-collector-config.yaml
Normal file
116
signoz/clickhouse-setup/otel-collector-config.yaml
Normal file
@@ -0,0 +1,116 @@
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
hostmetrics:
|
||||
collection_interval: 30s
|
||||
scrapers:
|
||||
cpu: {}
|
||||
load: {}
|
||||
memory: {}
|
||||
disk: {}
|
||||
filesystem: {}
|
||||
network: {}
|
||||
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 10000
|
||||
send_batch_max_size: 11000
|
||||
timeout: 10s
|
||||
# memory_limiter:
|
||||
# # 80% of maximum memory up to 2G
|
||||
# limit_mib: 1500
|
||||
# # 25% of limit up to 2G
|
||||
# spike_limit_mib: 512
|
||||
# check_interval: 5s
|
||||
#
|
||||
# # 50% of the maximum memory
|
||||
# limit_percentage: 50
|
||||
# # 20% of max memory usage spike expected
|
||||
# spike_limit_percentage: 20
|
||||
# queued_retry:
|
||||
# num_workers: 4
|
||||
# queue_size: 100
|
||||
# retry_on_failure: true
|
||||
resourcedetection:
|
||||
# Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels.
|
||||
detectors: [env, system] # include ec2 for AWS, gcp for GCP and azure for Azure.
|
||||
timeout: 2s
|
||||
signozspanmetrics/delta:
|
||||
metrics_exporter: clickhousemetricswrite
|
||||
metrics_flush_interval: 60s
|
||||
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
|
||||
dimensions_cache_size: 100000
|
||||
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
|
||||
enable_exp_histogram: true
|
||||
dimensions:
|
||||
- name: service.namespace
|
||||
default: default
|
||||
- name: deployment.environment
|
||||
default: default
|
||||
# This is added to ensure the uniqueness of the timeseries
|
||||
# Otherwise, identical timeseries produced by multiple replicas of
|
||||
# collectors result in incorrect APM metrics
|
||||
- name: signoz.collector.id
|
||||
- name: service.version
|
||||
- name: browser.platform
|
||||
- name: browser.mobile
|
||||
- name: k8s.cluster.name
|
||||
- name: k8s.node.name
|
||||
- name: k8s.namespace.name
|
||||
- name: host.name
|
||||
- name: host.type
|
||||
- name: container.name
|
||||
|
||||
extensions:
|
||||
health_check:
|
||||
endpoint: 0.0.0.0:13133
|
||||
zpages:
|
||||
endpoint: 0.0.0.0:55679
|
||||
pprof:
|
||||
endpoint: 0.0.0.0:1777
|
||||
|
||||
exporters:
|
||||
clickhousetraces:
|
||||
datasource: tcp://clickhouse:9000/signoz_traces
|
||||
low_cardinal_exception_grouping: ${env:LOW_CARDINAL_EXCEPTION_GROUPING}
|
||||
clickhousemetricswrite:
|
||||
endpoint: tcp://clickhouse:9000/signoz_metrics
|
||||
resource_to_telemetry_conversion:
|
||||
enabled: true
|
||||
clickhousemetricswrite/prometheus:
|
||||
endpoint: tcp://clickhouse:9000/signoz_metrics
|
||||
clickhousemetricswritev2:
|
||||
dsn: tcp://clickhouse:9000/signoz_metrics
|
||||
clickhouselogsexporter:
|
||||
dsn: tcp://clickhouse:9000/signoz_logs
|
||||
timeout: 10s
|
||||
use_new_schema: true
|
||||
# logging: {}
|
||||
|
||||
service:
|
||||
telemetry:
|
||||
logs:
|
||||
encoding: json
|
||||
metrics:
|
||||
address: 0.0.0.0:8888
|
||||
extensions:
|
||||
- health_check
|
||||
- zpages
|
||||
- pprof
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [signozspanmetrics/delta, batch]
|
||||
exporters: [clickhousetraces]
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [clickhousemetricswrite, clickhousemetricswritev2]
|
||||
logs:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [clickhouselogsexporter]
|
||||
1
signoz/clickhouse-setup/otel-collector-opamp-config.yaml
Normal file
1
signoz/clickhouse-setup/otel-collector-opamp-config.yaml
Normal file
@@ -0,0 +1 @@
|
||||
server_endpoint: ws://query-service:4320/v1/opamp
|
||||
25
signoz/clickhouse-setup/prometheus.yml
Normal file
25
signoz/clickhouse-setup/prometheus.yml
Normal file
@@ -0,0 +1,25 @@
|
||||
# my global config
|
||||
global:
|
||||
scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
|
||||
# Alertmanager configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- alertmanager:9093
|
||||
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
# - "first_rules.yml"
|
||||
# - "second_rules.yml"
|
||||
- 'alerts.yml'
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs: []
|
||||
|
||||
remote_read:
|
||||
- url: tcp://clickhouse:9000/signoz_metrics
|
||||
BIN
signoz/clickhouse-setup/user_scripts/histogramQuantile
Normal file
BIN
signoz/clickhouse-setup/user_scripts/histogramQuantile
Normal file
Binary file not shown.
237
signoz/clickhouse-setup/user_scripts/histogramQuantile.go
Normal file
237
signoz/clickhouse-setup/user_scripts/histogramQuantile.go
Normal file
@@ -0,0 +1,237 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// NOTE: executable must be built with target OS and architecture set to linux/amd64
|
||||
// env GOOS=linux GOARCH=amd64 go build -o histogramQuantile histogramQuantile.go
|
||||
|
||||
// The following code is adapted from the following source:
|
||||
// https://github.com/prometheus/prometheus/blob/main/promql/quantile.go
|
||||
|
||||
type bucket struct {
|
||||
upperBound float64
|
||||
count float64
|
||||
}
|
||||
|
||||
// buckets implements sort.Interface.
|
||||
type buckets []bucket
|
||||
|
||||
func (b buckets) Len() int { return len(b) }
|
||||
func (b buckets) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
||||
func (b buckets) Less(i, j int) bool { return b[i].upperBound < b[j].upperBound }
|
||||
|
||||
// bucketQuantile calculates the quantile 'q' based on the given buckets. The
|
||||
// buckets will be sorted by upperBound by this function (i.e. no sorting
|
||||
// needed before calling this function). The quantile value is interpolated
|
||||
// assuming a linear distribution within a bucket. However, if the quantile
|
||||
// falls into the highest bucket, the upper bound of the 2nd highest bucket is
|
||||
// returned. A natural lower bound of 0 is assumed if the upper bound of the
|
||||
// lowest bucket is greater 0. In that case, interpolation in the lowest bucket
|
||||
// happens linearly between 0 and the upper bound of the lowest bucket.
|
||||
// However, if the lowest bucket has an upper bound less or equal 0, this upper
|
||||
// bound is returned if the quantile falls into the lowest bucket.
|
||||
//
|
||||
// There are a number of special cases (once we have a way to report errors
|
||||
// happening during evaluations of AST functions, we should report those
|
||||
// explicitly):
|
||||
//
|
||||
// If 'buckets' has 0 observations, NaN is returned.
|
||||
//
|
||||
// If 'buckets' has fewer than 2 elements, NaN is returned.
|
||||
//
|
||||
// If the highest bucket is not +Inf, NaN is returned.
|
||||
//
|
||||
// If q==NaN, NaN is returned.
|
||||
//
|
||||
// If q<0, -Inf is returned.
|
||||
//
|
||||
// If q>1, +Inf is returned.
|
||||
func bucketQuantile(q float64, buckets buckets) float64 {
|
||||
if math.IsNaN(q) {
|
||||
return math.NaN()
|
||||
}
|
||||
if q < 0 {
|
||||
return math.Inf(-1)
|
||||
}
|
||||
if q > 1 {
|
||||
return math.Inf(+1)
|
||||
}
|
||||
sort.Sort(buckets)
|
||||
if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) {
|
||||
return math.NaN()
|
||||
}
|
||||
|
||||
buckets = coalesceBuckets(buckets)
|
||||
ensureMonotonic(buckets)
|
||||
|
||||
if len(buckets) < 2 {
|
||||
return math.NaN()
|
||||
}
|
||||
observations := buckets[len(buckets)-1].count
|
||||
if observations == 0 {
|
||||
return math.NaN()
|
||||
}
|
||||
rank := q * observations
|
||||
b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })
|
||||
|
||||
if b == len(buckets)-1 {
|
||||
return buckets[len(buckets)-2].upperBound
|
||||
}
|
||||
if b == 0 && buckets[0].upperBound <= 0 {
|
||||
return buckets[0].upperBound
|
||||
}
|
||||
var (
|
||||
bucketStart float64
|
||||
bucketEnd = buckets[b].upperBound
|
||||
count = buckets[b].count
|
||||
)
|
||||
if b > 0 {
|
||||
bucketStart = buckets[b-1].upperBound
|
||||
count -= buckets[b-1].count
|
||||
rank -= buckets[b-1].count
|
||||
}
|
||||
return bucketStart + (bucketEnd-bucketStart)*(rank/count)
|
||||
}
|
||||
|
||||
// coalesceBuckets merges buckets with the same upper bound.
|
||||
//
|
||||
// The input buckets must be sorted.
|
||||
func coalesceBuckets(buckets buckets) buckets {
|
||||
last := buckets[0]
|
||||
i := 0
|
||||
for _, b := range buckets[1:] {
|
||||
if b.upperBound == last.upperBound {
|
||||
last.count += b.count
|
||||
} else {
|
||||
buckets[i] = last
|
||||
last = b
|
||||
i++
|
||||
}
|
||||
}
|
||||
buckets[i] = last
|
||||
return buckets[:i+1]
|
||||
}
|
||||
|
||||
// The assumption that bucket counts increase monotonically with increasing
|
||||
// upperBound may be violated during:
|
||||
//
|
||||
// * Recording rule evaluation of histogram_quantile, especially when rate()
|
||||
// has been applied to the underlying bucket timeseries.
|
||||
// * Evaluation of histogram_quantile computed over federated bucket
|
||||
// timeseries, especially when rate() has been applied.
|
||||
//
|
||||
// This is because scraped data is not made available to rule evaluation or
|
||||
// federation atomically, so some buckets are computed with data from the
|
||||
// most recent scrapes, but the other buckets are missing data from the most
|
||||
// recent scrape.
|
||||
//
|
||||
// Monotonicity is usually guaranteed because if a bucket with upper bound
|
||||
// u1 has count c1, then any bucket with a higher upper bound u > u1 must
|
||||
// have counted all c1 observations and perhaps more, so that c >= c1.
|
||||
//
|
||||
// Randomly interspersed partial sampling breaks that guarantee, and rate()
|
||||
// exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from
|
||||
// 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The
|
||||
// monotonicity is broken. It is exacerbated by rate() because under normal
|
||||
// operation, cumulative counting of buckets will cause the bucket counts to
|
||||
// diverge such that small differences from missing samples are not a problem.
|
||||
// rate() removes this divergence.)
|
||||
//
|
||||
// bucketQuantile depends on that monotonicity to do a binary search for the
|
||||
// bucket with the φ-quantile count, so breaking the monotonicity
|
||||
// guarantee causes bucketQuantile() to return undefined (nonsense) results.
|
||||
//
|
||||
// As a somewhat hacky solution until ingestion is atomic per scrape, we
|
||||
// calculate the "envelope" of the histogram buckets, essentially removing
|
||||
// any decreases in the count between successive buckets.
|
||||
|
||||
func ensureMonotonic(buckets buckets) {
|
||||
max := buckets[0].count
|
||||
for i := 1; i < len(buckets); i++ {
|
||||
switch {
|
||||
case buckets[i].count > max:
|
||||
max = buckets[i].count
|
||||
case buckets[i].count < max:
|
||||
buckets[i].count = max
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// End of copied code.
|
||||
|
||||
func readLines() []string {
|
||||
r := bufio.NewReader(os.Stdin)
|
||||
bytes := []byte{}
|
||||
lines := []string{}
|
||||
for {
|
||||
line, isPrefix, err := r.ReadLine()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
bytes = append(bytes, line...)
|
||||
if !isPrefix {
|
||||
str := strings.TrimSpace(string(bytes))
|
||||
if len(str) > 0 {
|
||||
lines = append(lines, str)
|
||||
bytes = []byte{}
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(bytes) > 0 {
|
||||
lines = append(lines, string(bytes))
|
||||
}
|
||||
return lines
|
||||
}
|
||||
|
||||
func main() {
|
||||
lines := readLines()
|
||||
for _, text := range lines {
|
||||
// Example input
|
||||
// "[1, 2, 4, 8, 16]", "[1, 5, 8, 10, 14]", 0.9"
|
||||
// bounds - counts - quantile
|
||||
parts := strings.Split(text, "\",")
|
||||
|
||||
var bucketNumbers []float64
|
||||
// Strip the ends with square brackets
|
||||
text = parts[0][2 : len(parts[0])-1]
|
||||
// Parse the bucket bounds
|
||||
for _, num := range strings.Split(text, ",") {
|
||||
num = strings.TrimSpace(num)
|
||||
number, err := strconv.ParseFloat(num, 64)
|
||||
if err == nil {
|
||||
bucketNumbers = append(bucketNumbers, number)
|
||||
}
|
||||
}
|
||||
|
||||
var bucketCounts []float64
|
||||
// Strip the ends with square brackets
|
||||
text = parts[1][2 : len(parts[1])-1]
|
||||
// Parse the bucket counts
|
||||
for _, num := range strings.Split(text, ",") {
|
||||
num = strings.TrimSpace(num)
|
||||
number, err := strconv.ParseFloat(num, 64)
|
||||
if err == nil {
|
||||
bucketCounts = append(bucketCounts, number)
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the quantile
|
||||
q, err := strconv.ParseFloat(parts[2], 64)
|
||||
var b buckets
|
||||
|
||||
if err == nil {
|
||||
for i := 0; i < len(bucketNumbers); i++ {
|
||||
b = append(b, bucket{upperBound: bucketNumbers[i], count: bucketCounts[i]})
|
||||
}
|
||||
}
|
||||
fmt.Println(bucketQuantile(q, b))
|
||||
}
|
||||
}
|
||||
64
signoz/common/nginx-config.conf
Normal file
64
signoz/common/nginx-config.conf
Normal file
@@ -0,0 +1,64 @@
|
||||
map $http_upgrade $connection_upgrade {
|
||||
default upgrade;
|
||||
'' close;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 3301;
|
||||
server_name _;
|
||||
|
||||
gzip on;
|
||||
gzip_static on;
|
||||
gzip_types text/plain text/css application/json application/x-javascript text/xml application/xml application/xml+rss text/javascript;
|
||||
gzip_proxied any;
|
||||
gzip_vary on;
|
||||
gzip_comp_level 6;
|
||||
gzip_buffers 16 8k;
|
||||
gzip_http_version 1.1;
|
||||
|
||||
# to handle uri issue 414 from nginx
|
||||
client_max_body_size 24M;
|
||||
large_client_header_buffers 8 128k;
|
||||
|
||||
location / {
|
||||
if ( $uri = '/index.html' ) {
|
||||
add_header Cache-Control no-store always;
|
||||
}
|
||||
root /usr/share/nginx/html;
|
||||
index index.html index.htm;
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
location ~ ^/api/(v1|v3)/logs/(tail|livetail){
|
||||
proxy_pass http://query-service:8080;
|
||||
proxy_http_version 1.1;
|
||||
|
||||
# connection will be closed if no data is read for 600s between successive read operations
|
||||
proxy_read_timeout 600s;
|
||||
|
||||
# dont buffer the data send it directly to client.
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
}
|
||||
|
||||
location /api {
|
||||
proxy_pass http://query-service:8080/api;
|
||||
# connection will be closed if no data is read for 600s between successive read operations
|
||||
proxy_read_timeout 600s;
|
||||
}
|
||||
|
||||
location /ws {
|
||||
proxy_pass http://query-service:8080/ws;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade "websocket";
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_read_timeout 86400;
|
||||
}
|
||||
|
||||
# redirect server error pages to the static page /50x.html
|
||||
#
|
||||
error_page 500 502 503 504 /50x.html;
|
||||
location = /50x.html {
|
||||
root /usr/share/nginx/html;
|
||||
}
|
||||
}
|
||||
174
signoz/docker-compose.yaml
Normal file
174
signoz/docker-compose.yaml
Normal file
@@ -0,0 +1,174 @@
|
||||
services:
|
||||
|
||||
zookeeper:
|
||||
image: bitnami/zookeeper:${IMG_ZOOKEEPER}
|
||||
container_name: signoz-zookeeper
|
||||
hostname: zookeeper
|
||||
user: root
|
||||
# ports:
|
||||
# - "2181:2181"
|
||||
# - "2888:2888"
|
||||
# - "3888:3888"
|
||||
# volumes:
|
||||
# - ./data/zookeeper:/bitnami/zookeeper
|
||||
environment:
|
||||
- ZOO_SERVER_ID=1
|
||||
- ALLOW_ANONYMOUS_LOGIN=yes
|
||||
- ZOO_AUTOPURGE_INTERVAL=1
|
||||
|
||||
clickhouse:
|
||||
image: clickhouse/clickhouse-server:${IMG_CLICKHOUSE}
|
||||
tty: true
|
||||
container_name: signoz-clickhouse
|
||||
hostname: clickhouse
|
||||
ports:
|
||||
# - "9000:9000"
|
||||
- "8123:8123"
|
||||
# - "9181:9181"
|
||||
volumes:
|
||||
- ./clickhouse-setup/clickhouse-config.xml:/etc/clickhouse-server/config.xml
|
||||
- ./clickhouse-setup/clickhouse-users.xml:/etc/clickhouse-server/users.xml
|
||||
- ./clickhouse-setup/custom-function.xml:/etc/clickhouse-server/custom-function.xml
|
||||
- ./clickhouse-setup/clickhouse-cluster.xml:/etc/clickhouse-server/config.d/cluster.xml
|
||||
# - ./data/clickhouse/:/var/lib/clickhouse/
|
||||
- ./clickhouse-setup/user_scripts:/var/lib/clickhouse/user_scripts/
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD",
|
||||
"wget",
|
||||
"--spider",
|
||||
"-q",
|
||||
"0.0.0.0:8123/ping"
|
||||
]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
alertmanager:
|
||||
image: signoz/alertmanager:${IMG_ALERTMANAGER}
|
||||
container_name: signoz-alertmanager
|
||||
# volumes:
|
||||
# - ./data/alertmaRnager:/data
|
||||
depends_on:
|
||||
query-service:
|
||||
condition: service_healthy
|
||||
command:
|
||||
- --queryService.url=http://query-service:8085
|
||||
- --storage.path=/data
|
||||
|
||||
query-service:
|
||||
image: signoz/query-service:${IMG_QUERY_SERVICE}
|
||||
container_name: signoz-query-service
|
||||
command:
|
||||
[
|
||||
"-config=/root/config/prometheus.yml",
|
||||
"--use-logs-new-schema=true"
|
||||
]
|
||||
# ports:
|
||||
# - "6060:6060" # pprof port
|
||||
# - "8080:8080" # query-service port
|
||||
|
||||
volumes:
|
||||
- ./clickhouse-setup/prometheus.yml:/root/config/prometheus.yml
|
||||
- ./signoz_data/:/var/lib/signoz/
|
||||
environment:
|
||||
- ClickHouseUrl=tcp://clickhouse:9000
|
||||
- ALERTMANAGER_API_PREFIX=http://alertmanager:9093/api/
|
||||
- SIGNOZ_LOCAL_DB_PATH=/var/lib/signoz/signoz.db
|
||||
- DASHBOARDS_PATH=/root/config/dashboards
|
||||
- STORAGE=clickhouse
|
||||
- GODEBUG=netdns=go
|
||||
- TELEMETRY_ENABLED=true
|
||||
- DEPLOYMENT_TYPE=docker-standalone-amd
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD",
|
||||
"wget",
|
||||
"--spider",
|
||||
"-q",
|
||||
"localhost:8080/api/v1/health"
|
||||
]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
depends_on:
|
||||
clickhouse:
|
||||
condition: service_healthy
|
||||
otel-collector-migrator-sync:
|
||||
condition: service_completed_successfully
|
||||
|
||||
frontend:
|
||||
image: signoz/frontend:${IMG_FRONTEND}
|
||||
container_name: signoz-frontend
|
||||
depends_on:
|
||||
- alertmanager
|
||||
- query-service
|
||||
ports:
|
||||
- "3301:3301"
|
||||
volumes:
|
||||
- ./common/nginx-config.conf:/etc/nginx/conf.d/default.conf
|
||||
|
||||
otel-collector:
|
||||
image: signoz/signoz-otel-collector:${IMG_OTEL_COLLECTOR}
|
||||
container_name: signoz-otel-collector
|
||||
command:
|
||||
[
|
||||
"--config=/etc/otel-collector-config.yaml",
|
||||
"--manager-config=/etc/manager-config.yaml",
|
||||
"--copy-path=/var/tmp/collector-config.yaml",
|
||||
"--feature-gates=-pkg.translator.prometheus.NormalizeName"
|
||||
]
|
||||
user: root
|
||||
volumes:
|
||||
- ./clickhouse-setup/otel-collector-config.yaml:/etc/otel-collector-config.yaml
|
||||
- ./clickhouse-setup/otel-collector-opamp-config.yaml:/etc/manager-config.yaml
|
||||
# - /:/hostfs:ro
|
||||
environment:
|
||||
- OTEL_RESOURCE_ATTRIBUTES=host.name=signoz-host,os.type=linux
|
||||
- LOW_CARDINAL_EXCEPTION_GROUPING=false
|
||||
ports:
|
||||
# - "1777:1777" # pprof extension
|
||||
- "4317:4317" # OTLP gRPC receiver
|
||||
- "4318:4318" # OTLP HTTP receiver
|
||||
# - "8888:8888" # OtelCollector internal metrics
|
||||
# - "8889:8889" # signoz spanmetrics exposed by the agent
|
||||
# - "9411:9411" # Zipkin port
|
||||
# - "13133:13133" # health check extension
|
||||
# - "14250:14250" # Jaeger gRPC
|
||||
# - "14268:14268" # Jaeger thrift HTTP
|
||||
# - "55678:55678" # OpenCensus receiver
|
||||
# - "55679:55679" # zPages extension
|
||||
depends_on:
|
||||
clickhouse:
|
||||
condition: service_healthy
|
||||
otel-collector-migrator-sync:
|
||||
condition: service_completed_successfully
|
||||
query-service:
|
||||
condition: service_healthy
|
||||
|
||||
# 数据库初始化
|
||||
otel-collector-migrator-sync:
|
||||
image: signoz/signoz-schema-migrator:${IMG_MIGRATOR}
|
||||
container_name: signoz-otel-migrator-sync
|
||||
command:
|
||||
- "sync"
|
||||
- "--dsn=tcp://clickhouse:9000"
|
||||
- "--up="
|
||||
depends_on:
|
||||
clickhouse:
|
||||
condition: service_healthy
|
||||
|
||||
otel-collector-migrator-async:
|
||||
image: signoz/signoz-schema-migrator:${IMG_MIGRATOR}
|
||||
container_name: signoz-otel-migrator-async
|
||||
command:
|
||||
- "async"
|
||||
- "--dsn=tcp://clickhouse:9000"
|
||||
- "--up="
|
||||
depends_on:
|
||||
clickhouse:
|
||||
condition: service_healthy
|
||||
otel-collector-migrator-sync:
|
||||
condition: service_completed_successfully
|
||||
Reference in New Issue
Block a user