SUM() OVER (... ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
SUM() OVER (... ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
累计求和
会话编号生成
GROUP_CONCAT(... ORDER BY ... SEPARATOR ...)
GROUP_CONCAT(... ORDER BY ... SEPARATOR ...)
有序字符串拼接
行为路径字符串
COLLECT_LIST(... ORDER BY ...)
COLLECT_LIST(... ORDER BY ...)
有序数组收集
行为路径数组
前置准备
以下示例使用模拟的用户行为事件表
doc_funnel_events
doc_funnel_events
,覆盖注册、浏览、加购、支付四个步骤:
-- 创建测试表
CREATE TABLE IF NOT EXISTS doc_funnel_events (
event_id INT,
user_id INT,
step_name STRING,
event_time TIMESTAMP
);
-- 插入测试数据(7 个用户,部分未完成全流程)
INSERT INTO doc_funnel_events VALUES
(1, 101, 'register', CAST('2024-01-01 08:00:00' AS TIMESTAMP)),
(2, 101, 'browse', CAST('2024-01-01 08:05:00' AS TIMESTAMP)),
(3, 101, 'add_cart', CAST('2024-01-01 08:10:00' AS TIMESTAMP)),
(4, 101, 'pay', CAST('2024-01-01 08:20:00' AS TIMESTAMP)),
(5, 102, 'register', CAST('2024-01-02 09:00:00' AS TIMESTAMP)),
(6, 102, 'browse', CAST('2024-01-02 09:10:00' AS TIMESTAMP)),
(7, 102, 'add_cart', CAST('2024-01-02 09:20:00' AS TIMESTAMP)),
(8, 103, 'register', CAST('2024-01-03 10:00:00' AS TIMESTAMP)),
(9, 103, 'browse', CAST('2024-01-03 10:15:00' AS TIMESTAMP)),
(10, 104, 'register', CAST('2024-01-04 11:00:00' AS TIMESTAMP)),
(11, 104, 'browse', CAST('2024-01-04 11:30:00' AS TIMESTAMP)),
(12, 104, 'add_cart', CAST('2024-01-04 12:00:00' AS TIMESTAMP)),
(13, 104, 'pay', CAST('2024-01-04 12:30:00' AS TIMESTAMP)),
(14, 105, 'register', CAST('2024-01-05 14:00:00' AS TIMESTAMP)),
(15, 106, 'register', CAST('2024-01-06 15:00:00' AS TIMESTAMP)),
(16, 106, 'browse', CAST('2024-01-06 15:10:00' AS TIMESTAMP)),
(17, 107, 'register', CAST('2024-01-07 16:00:00' AS TIMESTAMP)),
(18, 107, 'browse', CAST('2024-01-07 16:20:00' AS TIMESTAMP)),
(19, 107, 'add_cart', CAST('2024-01-07 17:00:00' AS TIMESTAMP)),
(20, 107, 'pay', CAST('2024-01-07 17:30:00' AS TIMESTAMP)),
-- 后续日期的行为(用于留存和会话分析)
(21, 101, 'browse', CAST('2024-01-02 10:00:00' AS TIMESTAMP)),
(22, 101, 'pay', CAST('2024-01-08 11:00:00' AS TIMESTAMP)),
(23, 102, 'browse', CAST('2024-01-03 09:00:00' AS TIMESTAMP)),
(24, 104, 'browse', CAST('2024-01-05 14:00:00' AS TIMESTAMP)),
(25, 104, 'pay', CAST('2024-01-11 15:00:00' AS TIMESTAMP)),
(26, 107, 'browse', CAST('2024-01-08 10:00:00' AS TIMESTAMP));
说明:
TIMESTAMP
TIMESTAMP
字面量不支持直接写字符串,需使用
CAST('...' AS TIMESTAMP)
CAST('...' AS TIMESTAMP)
显式转换。
场景一:基础漏斗转化率
统计注册、浏览、加购、支付各步骤的用户数,并计算相邻步骤之间的转化率。
WITH funnel AS (
SELECT
user_id,
MAX(CASE WHEN step_name = 'register' THEN 1 ELSE 0 END) AS has_register,
MAX(CASE WHEN step_name = 'browse' THEN 1 ELSE 0 END) AS has_browse,
MAX(CASE WHEN step_name = 'add_cart' THEN 1 ELSE 0 END) AS has_add_cart,
MAX(CASE WHEN step_name = 'pay' THEN 1 ELSE 0 END) AS has_pay
FROM doc_funnel_events
GROUP BY user_id
)
SELECT
COUNT(*) AS total_users,
SUM(has_register) AS step1_register,
SUM(has_browse) AS step2_browse,
SUM(has_add_cart) AS step3_add_cart,
SUM(has_pay) AS step4_pay,
ROUND(SUM(has_browse) * 100.0 / SUM(has_register), 1) AS browse_rate,
ROUND(SUM(has_add_cart) * 100.0 / SUM(has_browse), 1) AS add_cart_rate,
ROUND(SUM(has_pay) * 100.0 / SUM(has_add_cart), 1) AS pay_rate
FROM funnel;
WITH step_times AS (
-- 取每个用户每个步骤的最早发生时间
SELECT
user_id,
MIN(CASE WHEN step_name = 'register' THEN event_time END) AS t_register,
MIN(CASE WHEN step_name = 'browse' THEN event_time END) AS t_browse,
MIN(CASE WHEN step_name = 'add_cart' THEN event_time END) AS t_add_cart,
MIN(CASE WHEN step_name = 'pay' THEN event_time END) AS t_pay
FROM doc_funnel_events
GROUP BY user_id
),
ordered_funnel AS (
SELECT
user_id,
CASE WHEN t_register IS NOT NULL THEN 1 ELSE 0 END AS s1,
CASE WHEN t_browse > t_register THEN 1 ELSE 0 END AS s2,
CASE WHEN t_add_cart > t_browse THEN 1 ELSE 0 END AS s3,
CASE WHEN t_pay > t_add_cart THEN 1 ELSE 0 END AS s4
FROM step_times
)
SELECT
COUNT(*) AS total,
SUM(s1) AS step1_register,
SUM(s2) AS step2_browse,
SUM(s3) AS step3_add_cart,
SUM(s4) AS step4_pay
FROM ordered_funnel;
执行结果:
total
step1_register
step2_browse
step3_add_cart
step4_pay
7
7
6
4
3
如需查看每个用户的有序漏斗明细:
WITH step_times AS (
SELECT
user_id,
MIN(CASE WHEN step_name = 'register' THEN event_time END) AS t_register,
MIN(CASE WHEN step_name = 'browse' THEN event_time END) AS t_browse,
MIN(CASE WHEN step_name = 'add_cart' THEN event_time END) AS t_add_cart,
MIN(CASE WHEN step_name = 'pay' THEN event_time END) AS t_pay
FROM doc_funnel_events
GROUP BY user_id
)
SELECT
user_id,
t_register IS NOT NULL AS did_register,
t_browse > t_register AS did_browse_after,
t_add_cart > t_browse AS did_add_cart_after,
t_pay > t_add_cart AS did_pay_after
FROM step_times
ORDER BY user_id;
执行结果:
user_id
did_register
did_browse_after
did_add_cart_after
did_pay_after
101
true
true
true
true
102
true
true
true
NULL
103
true
true
NULL
NULL
104
true
true
true
true
105
true
NULL
NULL
NULL
106
true
true
NULL
NULL
107
true
true
true
true
说明:当某步骤时间戳为
NULL
NULL
(用户未触达),比较表达式返回
NULL
NULL
而非
false
false
,在结果中显示为
NULL
NULL
。如需将
NULL
NULL
统一显示为
false
false
,可用
COALESCE(t_browse > t_register, false)
COALESCE(t_browse > t_register, false)
。
场景三:用户首次/末次行为
使用
MIN
MIN
和
MAX
MAX
找出每个用户的首次注册时间、首次购买时间和末次活跃时间。
SELECT
user_id,
MIN(event_time) AS first_event_time,
MAX(event_time) AS last_event_time,
MIN(CASE WHEN step_name = 'pay' THEN event_time END) AS first_pay_time,
MAX(CASE WHEN step_name = 'pay' THEN event_time END) AS last_pay_time
FROM doc_funnel_events
GROUP BY user_id
ORDER BY user_id;
执行结果:
user_id
first_event_time
last_event_time
first_pay_time
last_pay_time
101
2024-01-01T00:00:00.000Z
2024-01-08T03:00:00.000Z
2024-01-01T00:20:00.000Z
2024-01-08T03:00:00.000Z
102
2024-01-02T01:00:00.000Z
2024-01-03T01:00:00.000Z
NULL
NULL
103
2024-01-03T02:00:00.000Z
2024-01-03T02:15:00.000Z
NULL
NULL
104
2024-01-04T03:00:00.000Z
2024-01-11T07:00:00.000Z
2024-01-04T04:30:00.000Z
2024-01-11T07:00:00.000Z
105
2024-01-05T06:00:00.000Z
2024-01-05T06:00:00.000Z
NULL
NULL
106
2024-01-06T07:00:00.000Z
2024-01-06T07:10:00.000Z
NULL
NULL
107
2024-01-07T08:00:00.000Z
2024-01-08T02:00:00.000Z
2024-01-07T09:30:00.000Z
2024-01-07T09:30:00.000Z
说明:
MIN(CASE WHEN step_name = 'pay' THEN event_time END)
MIN(CASE WHEN step_name = 'pay' THEN event_time END)
只对
pay
pay
行取最小值,未购买的用户返回
NULL
NULL
。时间戳以 UTC 存储,展示时可用
DATE_FORMAT(event_time, 'yyyy-MM-dd HH:mm:ss')
DATE_FORMAT(event_time, 'yyyy-MM-dd HH:mm:ss')
转为本地时间字符串。
场景四:用户留存分析
统计每个用户注册后第 1 天、第 7 天是否有行为,并汇总留存率。
WITH first_register AS (
-- 每个用户的注册日期
SELECT
user_id,
MIN(event_time) AS register_time
FROM doc_funnel_events
WHERE step_name = 'register'
GROUP BY user_id
),
activity AS (
-- 每个用户每天的活跃记录(去重)
SELECT DISTINCT
user_id,
CAST(DATE_FORMAT(event_time, 'yyyy-MM-dd') AS DATE) AS active_day
FROM doc_funnel_events
),
retention AS (
SELECT
r.user_id,
DATE_FORMAT(r.register_time, 'yyyy-MM-dd') AS register_day,
MAX(CASE WHEN DATEDIFF(a.active_day,
CAST(DATE_FORMAT(r.register_time, 'yyyy-MM-dd') AS DATE)) = 1
THEN 1 ELSE 0 END) AS day1_retained,
MAX(CASE WHEN DATEDIFF(a.active_day,
CAST(DATE_FORMAT(r.register_time, 'yyyy-MM-dd') AS DATE)) = 7
THEN 1 ELSE 0 END) AS day7_retained
FROM first_register r
LEFT JOIN activity a ON r.user_id = a.user_id
GROUP BY r.user_id, r.register_time
)
SELECT
COUNT(*) AS total_users,
SUM(day1_retained) AS day1_count,
SUM(day7_retained) AS day7_count,
ROUND(SUM(day1_retained) * 100.0 / COUNT(*), 1) AS day1_rate,
ROUND(SUM(day7_retained) * 100.0 / COUNT(*), 1) AS day7_rate
FROM retention;
执行结果:
total_users
day1_count
day7_count
day1_rate
day7_rate
7
4
2
57.1
28.6
如需查看每个用户的留存明细:
WITH first_register AS (
SELECT user_id, MIN(event_time) AS register_time
FROM doc_funnel_events
WHERE step_name = 'register'
GROUP BY user_id
),
activity AS (
SELECT DISTINCT user_id,
CAST(DATE_FORMAT(event_time, 'yyyy-MM-dd') AS DATE) AS active_day
FROM doc_funnel_events
)
SELECT
r.user_id,
DATE_FORMAT(r.register_time, 'yyyy-MM-dd') AS register_day,
MAX(CASE WHEN DATEDIFF(a.active_day,
CAST(DATE_FORMAT(r.register_time, 'yyyy-MM-dd') AS DATE)) = 1
THEN 1 ELSE 0 END) AS day1_retained,
MAX(CASE WHEN DATEDIFF(a.active_day,
CAST(DATE_FORMAT(r.register_time, 'yyyy-MM-dd') AS DATE)) = 7
THEN 1 ELSE 0 END) AS day7_retained
FROM first_register r
LEFT JOIN activity a ON r.user_id = a.user_id
GROUP BY r.user_id, r.register_time
ORDER BY r.user_id;
WITH with_prev AS (
SELECT
user_id,
step_name,
event_time,
LAG(event_time) OVER (PARTITION BY user_id ORDER BY event_time) AS prev_time
FROM doc_funnel_events
),
with_session AS (
SELECT
user_id,
step_name,
event_time,
-- 首次事件或间隔超过 30 分钟时标记为新会话
SUM(CASE
WHEN prev_time IS NULL
OR (UNIX_TIMESTAMP(event_time) - UNIX_TIMESTAMP(prev_time)) > 1800
THEN 1 ELSE 0
END) OVER (
PARTITION BY user_id
ORDER BY event_time
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS session_id
FROM with_prev
)
SELECT
user_id,
session_id,
COUNT(*) AS event_count,
MIN(DATE_FORMAT(event_time, 'yyyy-MM-dd HH:mm:ss')) AS session_start,
MAX(DATE_FORMAT(event_time, 'yyyy-MM-dd HH:mm:ss')) AS session_end
FROM with_session
GROUP BY user_id, session_id
ORDER BY user_id, session_id;
SELECT
user_id,
GROUP_CONCAT(step_name ORDER BY event_time SEPARATOR ' -> ') AS behavior_path
FROM doc_funnel_events
GROUP BY user_id
ORDER BY user_id;
SELECT
user_id,
COLLECT_LIST(step_name ORDER BY event_time) AS step_list,
SIZE(COLLECT_LIST(step_name ORDER BY event_time)) AS step_count
FROM doc_funnel_events
GROUP BY user_id
ORDER BY user_id;