fix: further improve clickhouse queries

Signed-off-by: Shivanshu Raj Shrivastava <shivanshu1333@gmail.com>
This commit is contained in:
Shivanshu Raj Shrivastava 2025-05-30 16:07:06 +05:30
parent 5a3ed26f01
commit 62fb05ac5a
No known key found for this signature in database
GPG Key ID: D34D26C62AC3E9AE

View File

@ -559,94 +559,47 @@ func BuildTwoStepFunnelStepOverviewQuery(
) string { ) string {
const tpl = ` const tpl = `
WITH WITH
%[1]d AS contains_error_t1,
%[2]d AS contains_error_t2,
'%[3]s' AS latency_pointer_t1,
'%[4]s' AS latency_pointer_t2,
toDateTime64(%[5]d/1e9,9) AS start_ts, toDateTime64(%[5]d/1e9,9) AS start_ts,
toDateTime64(%[6]d/1e9,9) AS end_ts, toDateTime64(%[6]d/1e9,9) AS end_ts,
(%[6]d - %[5]d)/1e9 AS time_window_sec, (%[6]d - %[5]d)/1e9 AS time_window_sec,
'%[7]s' AS service_name_t1,
'%[8]s' AS span_name_t1,
'%[9]s' AS service_name_t2,
'%[10]s' AS span_name_t2
, step1 AS ( ('%[7]s','%[8]s') AS step1,
SELECT ('%[9]s','%[10]s') AS step2
trace_id,
argMin(timestamp, timestamp) AS t1_time,
argMin(has_error, timestamp) AS s1_has_error
FROM signoz_traces.signoz_index_v3
WHERE
timestamp BETWEEN start_ts AND end_ts
AND serviceName = service_name_t1
AND name = span_name_t1
AND (contains_error_t1 = 0 OR has_error = true)
%[11]s
GROUP BY trace_id
LIMIT 100000
)
, step2 AS (
SELECT
trace_id,
argMin(timestamp, timestamp) AS t2_time,
argMin(has_error, timestamp) AS s2_has_error
FROM signoz_traces.signoz_index_v3
WHERE
timestamp BETWEEN start_ts AND end_ts
AND serviceName = service_name_t2
AND name = span_name_t2
AND (contains_error_t2 = 0 OR has_error = true)
%[12]s
GROUP BY trace_id
LIMIT 100000
)
, joined AS (
SELECT
s1.trace_id,
s1.t1_time,
s2.t2_time,
s1.s1_has_error,
s2.s2_has_error
FROM step1 AS s1
INNER JOIN step2 AS s2 ON s1.trace_id = s2.trace_id
WHERE s2.t2_time > s1.t1_time
)
, errors_step1 AS (SELECT countIf(s1_has_error) AS errors FROM step1)
, errors_step2 AS (SELECT countIf(s2_has_error) AS errors FROM step2)
SELECT SELECT
round( round(
count(DISTINCT trace_id)*100.0 countIf(t1_time > 0 AND t2_time > t1_time) * 100.0
/ (SELECT count(DISTINCT trace_id) FROM step1), 2 / countIf(t1_time > 0),
) AS conversion_rate, 2
count(DISTINCT trace_id) / time_window_sec AS avg_rate, ) AS conversion_rate,
greatest( countIf(t1_time > 0 AND t2_time > t1_time)
(SELECT errors FROM errors_step1), / time_window_sec AS avg_rate,
(SELECT errors FROM errors_step2) greatest(sum(s1_error), sum(s2_error)) AS errors,
) AS errors,
avg( avg(
abs( dateDiff('microseconds', t1_time, t2_time) / 1000.0
CAST(t2_time AS Decimal(20,9)) ) AS avg_duration_ms,
- CAST(t1_time AS Decimal(20,9)) quantile(0.99)(
) * 1000 dateDiff('microseconds', t1_time, t2_time) / 1000.0
) AS avg_duration, ) AS p99_duration_ms
CASE FROM (
WHEN '%[13]s' = 'p99' THEN quantile(0.99)( SELECT
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) * 1000 trace_id,
) minIf(timestamp, serviceName = step1.1 AND name = step1.2) AS t1_time,
WHEN '%[13]s' = 'p95' THEN quantile(0.95)( minIf(timestamp, serviceName = step2.1 AND name = step2.2) AS t2_time,
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) * 1000 toUInt8(anyIf(has_error, serviceName = step1.1 AND name = step1.2)) AS s1_error,
) toUInt8(anyIf(has_error, serviceName = step2.1 AND name = step2.2)) AS s2_error
WHEN '%[13]s' = 'p90' THEN quantile(0.90)( FROM signoz_traces.signoz_index_v3
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) * 1000 WHERE
) timestamp BETWEEN start_ts AND end_ts
ELSE quantile(0.99)( AND serviceName IN (step1.1, step2.1)
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) * 1000 AND name IN (step1.2, step2.2)
) AND ((%[1]d = 0) OR (has_error AND serviceName = step1.1 AND name = step1.2))
END AS latency AND ((%[2]d = 0) OR (has_error AND serviceName = step2.1 AND name = step2.2))
FROM joined; %[11]s
` %[12]s
GROUP BY trace_id
) AS funnel
WHERE t1_time > 0 AND t2_time > t1_time;`
return fmt.Sprintf(tpl, return fmt.Sprintf(tpl,
containsErrorT1, containsErrorT1,
@ -688,162 +641,62 @@ func BuildThreeStepFunnelStepOverviewQuery(
latencyTypeT2 string, latencyTypeT2 string,
latencyTypeT3 string, latencyTypeT3 string,
) string { ) string {
base := ` const tpl = `
WITH WITH
%[1]d AS contains_error_t1,
%[2]d AS contains_error_t2,
%[3]d AS contains_error_t3,
'%[4]s' AS latency_pointer_t1,
'%[5]s' AS latency_pointer_t2,
'%[6]s' AS latency_pointer_t3,
toDateTime64(%[7]d/1e9,9) AS start_ts, toDateTime64(%[7]d/1e9,9) AS start_ts,
toDateTime64(%[8]d/1e9,9) AS end_ts, toDateTime64(%[8]d/1e9,9) AS end_ts,
(%[8]d - %[7]d)/1e9 AS time_window_sec, (%[8]d - %[7]d)/1e9 AS time_window_sec,
'%[9]s' AS service_name_t1,
'%[10]s' AS span_name_t1,
'%[11]s' AS service_name_t2,
'%[12]s' AS span_name_t2,
'%[13]s' AS service_name_t3,
'%[14]s' AS span_name_t3
, step1 AS ( ('%[9]s','%[10]s') AS step1,
('%[11]s','%[12]s') AS step2,
('%[13]s','%[14]s') AS step3
SELECT
round(
countIf(
t1_time > 0
AND t2_time > t1_time
AND t3_time > t2_time
) * 100.0
/ countIf(t1_time > 0),
2
) AS conversion_rate,
countIf(t1_time > 0 AND t2_time > t1_time)
/ time_window_sec AS avg_rate_step1_to_2,
greatest(sum(s1_error), sum(s2_error), sum(s3_error)) AS errors,
avg(
dateDiff('microseconds', t1_time, t2_time) / 1000.0
) AS avg_duration_ms,
quantile(0.99)(
dateDiff('microseconds', t1_time, t2_time) / 1000.0
) AS p99_duration_ms
FROM (
SELECT SELECT
trace_id, trace_id,
argMin(timestamp, timestamp) AS t1_time, minIf(timestamp, serviceName = step1.1 AND name = step1.2) AS t1_time,
argMin(has_error, timestamp) AS s1_has_error minIf(timestamp, serviceName = step2.1 AND name = step2.2) AS t2_time,
minIf(timestamp, serviceName = step3.1 AND name = step3.2) AS t3_time,
toUInt8(anyIf(has_error, serviceName = step1.1 AND name = step1.2)) AS s1_error,
toUInt8(anyIf(has_error, serviceName = step2.1 AND name = step2.2)) AS s2_error,
toUInt8(anyIf(has_error, serviceName = step3.1 AND name = step3.2)) AS s3_error
FROM signoz_traces.signoz_index_v3 FROM signoz_traces.signoz_index_v3
WHERE WHERE
timestamp BETWEEN start_ts AND end_ts timestamp BETWEEN start_ts AND end_ts
AND serviceName = service_name_t1 AND serviceName IN (step1.1, step2.1, step3.1)
AND name = span_name_t1 AND name IN (step1.2, step2.2, step3.2)
AND (contains_error_t1 = 0 OR has_error = true) AND ((%[1]d = 0) OR (has_error AND serviceName = step1.1 AND name = step1.2))
AND ((%[2]d = 0) OR (has_error AND serviceName = step2.1 AND name = step2.2))
AND ((%[3]d = 0) OR (has_error AND serviceName = step3.1 AND name = step3.2))
%[15]s %[15]s
GROUP BY trace_id
LIMIT 100000
)
, step2 AS (
SELECT
trace_id,
argMin(timestamp, timestamp) AS t2_time,
argMin(has_error, timestamp) AS s2_has_error
FROM signoz_traces.signoz_index_v3
WHERE
timestamp BETWEEN start_ts AND end_ts
AND serviceName = service_name_t2
AND name = span_name_t2
AND (contains_error_t2 = 0 OR has_error = true)
%[16]s %[16]s
GROUP BY trace_id
LIMIT 100000
)
, step3 AS (
SELECT
trace_id,
argMin(timestamp, timestamp) AS t3_time,
argMin(has_error, timestamp) AS s3_has_error
FROM signoz_traces.signoz_index_v3
WHERE
timestamp BETWEEN start_ts AND end_ts
AND serviceName = service_name_t3
AND name = span_name_t3
AND (contains_error_t3 = 0 OR has_error = true)
%[17]s %[17]s
GROUP BY trace_id GROUP BY trace_id
LIMIT 100000 ) AS funnel
) WHERE t1_time > 0
, joined_t2 AS ( AND t2_time > t1_time
SELECT AND t3_time > t2_time;`
s1.trace_id,
s1.t1_time,
s2.t2_time,
s1.s1_has_error,
s2.s2_has_error
FROM step1 AS s1
INNER JOIN step2 AS s2 ON s1.trace_id = s2.trace_id
WHERE s2.t2_time > s1.t1_time
)
, joined_t3 AS (
SELECT
j.trace_id,
j.t1_time,
j.t2_time,
s3.t3_time,
j.s1_has_error,
j.s2_has_error,
s3.s3_has_error
FROM joined_t2 AS j
INNER JOIN step3 AS s3 ON j.trace_id = s3.trace_id
WHERE s3.t3_time > j.t2_time
)
, errors_step1 AS (SELECT countIf(s1_has_error) AS errors FROM step1)
, errors_step2 AS (SELECT countIf(s2_has_error) AS errors FROM step2)
, errors_step3 AS (SELECT countIf(s3_has_error) AS errors FROM step3)
`
var sel string return fmt.Sprintf(tpl,
if stepStart == 1 && stepEnd == 2 {
sel = `
SELECT
round(
count(DISTINCT trace_id)*100.0
/ (SELECT count(DISTINCT trace_id) FROM step1), 2
) AS conversion_rate,
count(DISTINCT trace_id) / time_window_sec AS avg_rate,
greatest(
(SELECT errors FROM errors_step1),
(SELECT errors FROM errors_step2)
) AS errors,
avg(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) *1000
) AS avg_duration,
CASE
WHEN '%[19]s' = 'p99' THEN quantile(0.99)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) *1000
)
WHEN '%[19]s' = 'p95' THEN quantile(0.95)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) *1000
)
WHEN '%[19]s' = 'p90' THEN quantile(0.90)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) *1000
)
ELSE quantile(0.99)(
abs(CAST(t2_time AS Decimal(20,9)) - CAST(t1_time AS Decimal(20,9))) *1000
)
END AS latency
FROM joined_t2;`
} else {
sel = `
SELECT
round(
count(DISTINCT trace_id)*100.0
/ (SELECT count(DISTINCT trace_id) FROM joined_t2), 2
) AS conversion_rate,
count(DISTINCT trace_id) / time_window_sec AS avg_rate,
greatest(
(SELECT errors FROM errors_step2),
(SELECT errors FROM errors_step3)
) AS errors,
avg(
abs(CAST(t3_time AS Decimal(20,9)) - CAST(t2_time AS Decimal(20,9))) *1000
) AS avg_duration,
CASE
WHEN '%[20]s' = 'p99' THEN quantile(0.99)(
abs(CAST(t3_time AS Decimal(20,9)) - CAST(t2_time AS Decimal(20,9))) *1000
)
WHEN '%[20]s' = 'p95' THEN quantile(0.95)(
abs(CAST(t3_time AS Decimal(20,9)) - CAST(t2_time AS Decimal(20,9))) *1000
)
WHEN '%[20]s' = 'p90' THEN quantile(0.90)(
abs(CAST(t3_time AS Decimal(20,9)) - CAST(t2_time AS Decimal(20,9))) *1000
)
ELSE quantile(0.99)(
abs(CAST(t3_time AS Decimal(20,9)) - CAST(t2_time AS Decimal(20,9))) *1000
)
END AS latency
FROM joined_t3;`
}
return fmt.Sprintf(base+sel,
containsErrorT1, containsErrorT1,
containsErrorT2, containsErrorT2,
containsErrorT3, containsErrorT3,
@ -861,6 +714,8 @@ FROM joined_t3;`
clauseStep1, clauseStep1,
clauseStep2, clauseStep2,
clauseStep3, clauseStep3,
stepStart,
stepEnd,
latencyTypeT2, latencyTypeT2,
latencyTypeT3, latencyTypeT3,
) )