fix: further improve clickhouse queries

Signed-off-by: Shivanshu Raj Shrivastava <shivanshu1333@gmail.com>
This commit is contained in:
Shivanshu Raj Shrivastava 2025-05-30 16:07:06 +05:30
parent 5a3ed26f01
commit 62fb05ac5a
No known key found for this signature in database
GPG Key ID: D34D26C62AC3E9AE

View File

@ -559,94 +559,47 @@ func BuildTwoStepFunnelStepOverviewQuery(
) string {
const tpl = `
WITH
%[1]d AS contains_error_t1,
%[2]d AS contains_error_t2,
'%[3]s' AS latency_pointer_t1,
'%[4]s' AS latency_pointer_t2,
toDateTime64(%[5]d/1e9,9) AS start_ts,
toDateTime64(%[6]d/1e9,9) AS end_ts,
(%[6]d - %[5]d)/1e9 AS time_window_sec,
'%[7]s' AS service_name_t1,
'%[8]s' AS span_name_t1,
'%[9]s' AS service_name_t2,
'%[10]s' AS span_name_t2
(%[6]d - %[5]d)/1e9 AS time_window_sec,
, step1 AS (
SELECT
trace_id,
argMin(timestamp, timestamp) AS t1_time,
argMin(has_error, timestamp) AS s1_has_error
FROM signoz_traces.signoz_index_v3
WHERE
timestamp BETWEEN start_ts AND end_ts
AND serviceName = service_name_t1
AND name = span_name_t1
AND (contains_error_t1 = 0 OR has_error = true)
%[11]s
GROUP BY trace_id
LIMIT 100000
)
, step2 AS (
SELECT
trace_id,
argMin(timestamp, timestamp) AS t2_time,
argMin(has_error, timestamp) AS s2_has_error
FROM signoz_traces.signoz_index_v3
WHERE
timestamp BETWEEN start_ts AND end_ts
AND serviceName = service_name_t2
AND name = span_name_t2
AND (contains_error_t2 = 0 OR has_error = true)
%[12]s
GROUP BY trace_id
LIMIT 100000
)
, joined AS (
SELECT
s1.trace_id,
s1.t1_time,
s2.t2_time,
s1.s1_has_error,
s2.s2_has_error
FROM step1 AS s1
INNER JOIN step2 AS s2 ON s1.trace_id = s2.trace_id
WHERE s2.t2_time > s1.t1_time
)
, errors_step1 AS (SELECT countIf(s1_has_error) AS errors FROM step1)
, errors_step2 AS (SELECT countIf(s2_has_error) AS errors FROM step2)
('%[7]s','%[8]s') AS step1,
('%[9]s','%[10]s') AS step2
SELECT
round(
count(DISTINCT trace_id)*100.0
/ (SELECT count(DISTINCT trace_id) FROM step1), 2
) AS conversion_rate,
count(DISTINCT trace_id) / time_window_sec AS avg_rate,
greatest(
(SELECT errors FROM errors_step1),
(SELECT errors FROM errors_step2)
) AS errors,
countIf(t1_time > 0 AND t2_time > t1_time) * 100.0
/ countIf(t1_time > 0),
2
) AS conversion_rate,
countIf(t1_time > 0 AND t2_time > t1_time)
/ time_window_sec AS avg_rate,
greatest(sum(s1_error), sum(s2_error)) AS errors,
avg(
abs(
CAST(t2_time AS Decimal(20,9))
- CAST(t1_time AS Decimal(20,9))
) * 1000
) AS avg_duration,
CASE
WHEN '%[13]s' = 'p99' THEN quantile(0.99)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) * 1000
)
WHEN '%[13]s' = 'p95' THEN quantile(0.95)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) * 1000
)
WHEN '%[13]s' = 'p90' THEN quantile(0.90)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) * 1000
)
ELSE quantile(0.99)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) * 1000
)
END AS latency
FROM joined;
`
dateDiff('microseconds', t1_time, t2_time) / 1000.0
) AS avg_duration_ms,
quantile(0.99)(
dateDiff('microseconds', t1_time, t2_time) / 1000.0
) AS p99_duration_ms
FROM (
SELECT
trace_id,
minIf(timestamp, serviceName = step1.1 AND name = step1.2) AS t1_time,
minIf(timestamp, serviceName = step2.1 AND name = step2.2) AS t2_time,
toUInt8(anyIf(has_error, serviceName = step1.1 AND name = step1.2)) AS s1_error,
toUInt8(anyIf(has_error, serviceName = step2.1 AND name = step2.2)) AS s2_error
FROM signoz_traces.signoz_index_v3
WHERE
timestamp BETWEEN start_ts AND end_ts
AND serviceName IN (step1.1, step2.1)
AND name IN (step1.2, step2.2)
AND ((%[1]d = 0) OR (has_error AND serviceName = step1.1 AND name = step1.2))
AND ((%[2]d = 0) OR (has_error AND serviceName = step2.1 AND name = step2.2))
%[11]s
%[12]s
GROUP BY trace_id
) AS funnel
WHERE t1_time > 0 AND t2_time > t1_time;`
return fmt.Sprintf(tpl,
containsErrorT1,
@ -688,162 +641,62 @@ func BuildThreeStepFunnelStepOverviewQuery(
latencyTypeT2 string,
latencyTypeT3 string,
) string {
base := `
const tpl = `
WITH
%[1]d AS contains_error_t1,
%[2]d AS contains_error_t2,
%[3]d AS contains_error_t3,
'%[4]s' AS latency_pointer_t1,
'%[5]s' AS latency_pointer_t2,
'%[6]s' AS latency_pointer_t3,
toDateTime64(%[7]d/1e9,9) AS start_ts,
toDateTime64(%[8]d/1e9,9) AS end_ts,
(%[8]d - %[7]d)/1e9 AS time_window_sec,
'%[9]s' AS service_name_t1,
'%[10]s' AS span_name_t1,
'%[11]s' AS service_name_t2,
'%[12]s' AS span_name_t2,
'%[13]s' AS service_name_t3,
'%[14]s' AS span_name_t3
(%[8]d - %[7]d)/1e9 AS time_window_sec,
, step1 AS (
('%[9]s','%[10]s') AS step1,
('%[11]s','%[12]s') AS step2,
('%[13]s','%[14]s') AS step3
SELECT
round(
countIf(
t1_time > 0
AND t2_time > t1_time
AND t3_time > t2_time
) * 100.0
/ countIf(t1_time > 0),
2
) AS conversion_rate,
countIf(t1_time > 0 AND t2_time > t1_time)
/ time_window_sec AS avg_rate_step1_to_2,
greatest(sum(s1_error), sum(s2_error), sum(s3_error)) AS errors,
avg(
dateDiff('microseconds', t1_time, t2_time) / 1000.0
) AS avg_duration_ms,
quantile(0.99)(
dateDiff('microseconds', t1_time, t2_time) / 1000.0
) AS p99_duration_ms
FROM (
SELECT
trace_id,
argMin(timestamp, timestamp) AS t1_time,
argMin(has_error, timestamp) AS s1_has_error
minIf(timestamp, serviceName = step1.1 AND name = step1.2) AS t1_time,
minIf(timestamp, serviceName = step2.1 AND name = step2.2) AS t2_time,
minIf(timestamp, serviceName = step3.1 AND name = step3.2) AS t3_time,
toUInt8(anyIf(has_error, serviceName = step1.1 AND name = step1.2)) AS s1_error,
toUInt8(anyIf(has_error, serviceName = step2.1 AND name = step2.2)) AS s2_error,
toUInt8(anyIf(has_error, serviceName = step3.1 AND name = step3.2)) AS s3_error
FROM signoz_traces.signoz_index_v3
WHERE
timestamp BETWEEN start_ts AND end_ts
AND serviceName = service_name_t1
AND name = span_name_t1
AND (contains_error_t1 = 0 OR has_error = true)
AND serviceName IN (step1.1, step2.1, step3.1)
AND name IN (step1.2, step2.2, step3.2)
AND ((%[1]d = 0) OR (has_error AND serviceName = step1.1 AND name = step1.2))
AND ((%[2]d = 0) OR (has_error AND serviceName = step2.1 AND name = step2.2))
AND ((%[3]d = 0) OR (has_error AND serviceName = step3.1 AND name = step3.2))
%[15]s
GROUP BY trace_id
LIMIT 100000
)
, step2 AS (
SELECT
trace_id,
argMin(timestamp, timestamp) AS t2_time,
argMin(has_error, timestamp) AS s2_has_error
FROM signoz_traces.signoz_index_v3
WHERE
timestamp BETWEEN start_ts AND end_ts
AND serviceName = service_name_t2
AND name = span_name_t2
AND (contains_error_t2 = 0 OR has_error = true)
%[16]s
GROUP BY trace_id
LIMIT 100000
)
, step3 AS (
SELECT
trace_id,
argMin(timestamp, timestamp) AS t3_time,
argMin(has_error, timestamp) AS s3_has_error
FROM signoz_traces.signoz_index_v3
WHERE
timestamp BETWEEN start_ts AND end_ts
AND serviceName = service_name_t3
AND name = span_name_t3
AND (contains_error_t3 = 0 OR has_error = true)
%[17]s
GROUP BY trace_id
LIMIT 100000
)
, joined_t2 AS (
SELECT
s1.trace_id,
s1.t1_time,
s2.t2_time,
s1.s1_has_error,
s2.s2_has_error
FROM step1 AS s1
INNER JOIN step2 AS s2 ON s1.trace_id = s2.trace_id
WHERE s2.t2_time > s1.t1_time
)
, joined_t3 AS (
SELECT
j.trace_id,
j.t1_time,
j.t2_time,
s3.t3_time,
j.s1_has_error,
j.s2_has_error,
s3.s3_has_error
FROM joined_t2 AS j
INNER JOIN step3 AS s3 ON j.trace_id = s3.trace_id
WHERE s3.t3_time > j.t2_time
)
, errors_step1 AS (SELECT countIf(s1_has_error) AS errors FROM step1)
, errors_step2 AS (SELECT countIf(s2_has_error) AS errors FROM step2)
, errors_step3 AS (SELECT countIf(s3_has_error) AS errors FROM step3)
`
) AS funnel
WHERE t1_time > 0
AND t2_time > t1_time
AND t3_time > t2_time;`
var sel string
if stepStart == 1 && stepEnd == 2 {
sel = `
SELECT
round(
count(DISTINCT trace_id)*100.0
/ (SELECT count(DISTINCT trace_id) FROM step1), 2
) AS conversion_rate,
count(DISTINCT trace_id) / time_window_sec AS avg_rate,
greatest(
(SELECT errors FROM errors_step1),
(SELECT errors FROM errors_step2)
) AS errors,
avg(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) *1000
) AS avg_duration,
CASE
WHEN '%[19]s' = 'p99' THEN quantile(0.99)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) *1000
)
WHEN '%[19]s' = 'p95' THEN quantile(0.95)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) *1000
)
WHEN '%[19]s' = 'p90' THEN quantile(0.90)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) *1000
)
ELSE quantile(0.99)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) *1000
)
END AS latency
FROM joined_t2;`
} else {
sel = `
SELECT
round(
count(DISTINCT trace_id)*100.0
/ (SELECT count(DISTINCT trace_id) FROM joined_t2), 2
) AS conversion_rate,
count(DISTINCT trace_id) / time_window_sec AS avg_rate,
greatest(
(SELECT errors FROM errors_step2),
(SELECT errors FROM errors_step3)
) AS errors,
avg(
abs(CAST(t3_time AS Decimal(20,9)) - CAST(t2_time AS Decimal(20,9))) *1000
) AS avg_duration,
CASE
WHEN '%[20]s' = 'p99' THEN quantile(0.99)(
abs(CAST(t3_time AS Decimal(20,9)) - CAST(t2_time AS Decimal(20,9))) *1000
)
WHEN '%[20]s' = 'p95' THEN quantile(0.95)(
abs(CAST(t3_time AS Decimal(20,9)) - CAST(t2_time AS Decimal(20,9))) *1000
)
WHEN '%[20]s' = 'p90' THEN quantile(0.90)(
abs(CAST(t3_time AS Decimal(20,9)) - CAST(t2_time AS Decimal(20,9))) *1000
)
ELSE quantile(0.99)(
abs(CAST(t3_time AS Decimal(20,9)) - CAST(t2_time AS Decimal(20,9))) *1000
)
END AS latency
FROM joined_t3;`
}
return fmt.Sprintf(base+sel,
return fmt.Sprintf(tpl,
containsErrorT1,
containsErrorT2,
containsErrorT3,
@ -861,6 +714,8 @@ FROM joined_t3;`
clauseStep1,
clauseStep2,
clauseStep3,
stepStart,
stepEnd,
latencyTypeT2,
latencyTypeT3,
)