WITH events AS (
SELECT user_id, CAST(event_time AS TIMESTAMP_LTZ) AS event_time, page, event_type
FROM user_events
),
-- Step 1: 获取上一个事件时间
with_prev AS (
SELECT
user_id,
event_time,
page,
event_type,
LAG(event_time) OVER (PARTITION BY user_id ORDER BY event_time) AS prev_event_time
FROM events
),
-- Step 2: 计算时间间隔,标记新会话开始
with_gap AS (
SELECT
*,
CASE
WHEN prev_event_time IS NULL THEN 1 -- 第一个事件,新会话
WHEN TIMESTAMPDIFF(MINUTE, prev_event_time, event_time) > 30 THEN 1 -- 超时,新会话
ELSE 0
END AS is_new_session
FROM with_prev
),
-- Step 3: 累积标记生成会话 ID
with_session AS (
SELECT
*,
SUM(is_new_session) OVER (PARTITION BY user_id ORDER BY event_time) AS session_id
FROM with_gap
)
SELECT user_id, session_id, event_time, page, event_type
FROM with_session
ORDER BY user_id, session_id, event_time;
输出示例:
user_id
session_id
event_time
page
event_type
1
1
2026-03-01 10:00:00
home
view
1
1
2026-03-01 10:05:00
products
view
1
1
2026-03-01 10:15:00
product_detail
view
1
2
2026-03-01 14:00:00
home
view
1
2
2026-03-01 14:10:00
cart
view
2
1
2026-03-01 09:00:00
landing
view
2
2
2026-03-01 11:00:00
home
view
关键说明
TIMESTAMPDIFF(MINUTE, start, end)
TIMESTAMPDIFF(MINUTE, start, end)
返回两个时间戳之间的分钟差
DATEDIFF
DATEDIFF
对
TIMESTAMP_LTZ
TIMESTAMP_LTZ
类型默认返回天数差(不是分钟),必须用
TIMESTAMPDIFF
TIMESTAMPDIFF
会话超时阈值可根据业务调整:电商常用 30 分钟,内容平台可用 15 分钟
2. 会话聚合指标
SQL 实现
-- 在会话切分基础上聚合
SELECT
user_id,
session_id,
MIN(event_time) AS session_start,
MAX(event_time) AS session_end,
TIMESTAMPDIFF(MINUTE, MIN(event_time), MAX(event_time)) AS session_duration_min,
COUNT(*) AS page_views,
COUNT(DISTINCT page) AS unique_pages,
-- 跳出会话: 只有 1 个页面视图
CASE WHEN COUNT(*) = 1 THEN 1 ELSE 0 END AS is_bounce,
-- 转化会话: 包含目标事件
MAX(CASE WHEN event_type IN ('add_to_cart', 'complete') THEN 1 ELSE 0 END) AS is_converted
FROM with_session
GROUP BY user_id, session_id
ORDER BY user_id, session_id;
输出示例:
user_id
session_id
session_start
session_end
duration_min
page_views
is_bounce
is_converted
1
1
2026-03-01 10:00:00
2026-03-01 10:15:00
15
3
0
0
1
2
2026-03-01 14:00:00
2026-03-01 14:10:00
10
2
0
0
2
1
2026-03-01 09:00:00
2026-03-01 09:00:00
0
1
1
0
2
2
2026-03-01 11:00:00
2026-03-01 11:12:00
12
5
0
1
3
1
2026-03-01 08:00:00
2026-03-01 08:30:00
30
7
0
1
3. 整体会话统计
SQL 实现
SELECT
COUNT(*) AS total_sessions,
COUNT(DISTINCT user_id) AS unique_users,
ROUND(AVG(page_views), 1) AS avg_pages_per_session,
ROUND(AVG(session_duration_min), 1) AS avg_duration_min,
ROUND(SUM(CASE WHEN page_views = 1 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS bounce_rate_pct,
ROUND(SUM(is_converted) * 100.0 / COUNT(*), 1) AS conversion_rate_pct
FROM session_agg;
输出示例:
total_sessions
unique_users
avg_pages
avg_duration_min
bounce_rate
conversion_rate
5
3
3.6
13.4
20.0
40.0
4. 用户路径分析
SQL 实现
SELECT
user_id,
session_id,
GROUP_CONCAT(page ORDER BY event_time SEPARATOR ' → ') AS page_path,
COUNT(*) AS path_length
FROM with_session
GROUP BY user_id, session_id
ORDER BY user_id, session_id;
输出示例:
user_id
session_id
page_path
path_length
1
1
home → products → product_detail
3
1
2
home → cart
2
2
1
landing
1
2
2
home → products → product_detail → cart → checkout
-- 统计最常见的 5 条路径
SELECT
page_path,
COUNT(*) AS session_count,
COUNT(DISTINCT user_id) AS unique_users
FROM (
SELECT
user_id,
session_id,
GROUP_CONCAT(page ORDER BY event_time SEPARATOR ' → ') AS page_path
FROM with_session
GROUP BY user_id, session_id
)
GROUP BY page_path
ORDER BY session_count DESC
LIMIT 5;
5. 会话质量分层
分层标准
层级
页面数
时长
定义
跳出
1
0 分钟
只看一页即离开
浅层
2-3
< 5 分钟
快速浏览
中等
4-6
5-15 分钟
有一定参与
深度
7+
> 15 分钟
深度参与
SQL 实现
SELECT
user_id,
session_id,
page_views,
session_duration_min,
CASE
WHEN page_views = 1 THEN 'bounce'
WHEN page_views <= 3 AND session_duration_min < 5 THEN 'shallow'
WHEN page_views <= 6 AND session_duration_min < 15 THEN 'medium'
ELSE 'deep'
END AS session_quality
FROM session_agg
ORDER BY user_id, session_id;
质量分布统计
SELECT
session_quality,
COUNT(*) AS session_count,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 1) AS pct
FROM (
SELECT
CASE
WHEN page_views = 1 THEN 'bounce'
WHEN page_views <= 3 AND session_duration_min < 5 THEN 'shallow'
WHEN page_views <= 6 AND session_duration_min < 15 THEN 'medium'
ELSE 'deep'
END AS session_quality
FROM session_agg
)
GROUP BY session_quality
ORDER BY session_count DESC;
6. 进阶场景
场景 1: 按来源渠道分析会话质量
SELECT
u.traffic_source,
COUNT(DISTINCT s.session_id) AS sessions,
ROUND(AVG(s.page_views), 1) AS avg_pages,
ROUND(SUM(CASE WHEN s.page_views = 1 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS bounce_rate
FROM session_agg s
JOIN users u ON s.user_id = u.user_id
GROUP BY u.traffic_source
ORDER BY bounce_rate;
场景 2: 会话内事件序列标记
-- 标记每个事件在会话中的位置(第一步、第二步...)
SELECT
user_id,
session_id,
event_time,
page,
ROW_NUMBER() OVER (PARTITION BY user_id, session_id ORDER BY event_time) AS step_number,
CASE
WHEN event_type = 'complete' THEN 'conversion'
WHEN event_type = 'add_to_cart' THEN 'intent'
ELSE 'browse'
END AS event_category
FROM with_session
ORDER BY user_id, session_id, step_number;
场景 3: 跨会话用户行为分析
以下查询基于第 1 节会话切分后的
with_session
with_session
CTE 结果。
-- 统计每个用户的总会话数、平均会话间隔
WITH session_times AS (
SELECT
user_id,
session_id,
MIN(event_time) AS session_start
FROM with_session
GROUP BY user_id, session_id
),
session_gaps AS (
SELECT
user_id,
session_id,
session_start,
LAG(session_start) OVER (PARTITION BY user_id ORDER BY session_start) AS prev_session_start,
TIMESTAMPDIFF(HOUR, LAG(session_start) OVER (PARTITION BY user_id ORDER BY session_start), session_start) AS hours_between_sessions
FROM session_times
)
SELECT
user_id,
COUNT(*) AS total_sessions,
ROUND(AVG(hours_between_sessions), 1) AS avg_hours_between_sessions
FROM session_gaps
WHERE hours_between_sessions IS NOT NULL
GROUP BY user_id;
-- 错误: 不指定 ORDER BY,路径顺序不确定
GROUP_CONCAT(page SEPARATOR ' → ')
-- 正确: 按时间排序保证路径顺序
GROUP_CONCAT(page ORDER BY event_time SEPARATOR ' → ')
5. 大规模数据性能
-- 优化: 先按日期分区过滤,再会话切分
WITH filtered_events AS (
SELECT * FROM user_events
WHERE event_time >= '2026-03-01' AND event_time < '2026-03-02'
),
-- 后续会话切分逻辑不变...
性能优化建议
场景
优化策略
大数据量会话切分
按天分区处理,避免全表窗口计算
高频查询
将会话聚合结果物化为 Dynamic Table
路径分析
对
(user_id, event_time)
(user_id, event_time)
建 Bloom Filter 索引
跨天会话
使用
DATE_TRUNC('DAY', event_time)
DATE_TRUNC('DAY', event_time)
作为额外分区键
-- 推荐: 将会话切分逻辑封装为 Dynamic Table
CREATE DYNAMIC TABLE dws_user_sessions
REFRESH INTERVAL 1 HOUR
AS
WITH with_prev AS (...),
with_gap AS (...),
with_session AS (...)
SELECT user_id, session_id, event_time, page, event_type
FROM with_session;