WITH retention_base AS (
SELECT
u.user_id,
u.register_date,
a.activity_date,
DATEDIFF(a.activity_date, u.register_date) AS days_since_register
FROM users u
JOIN activity a ON u.user_id = a.user_id
),
cohort_size AS (
SELECT
DATE_TRUNC('MONTH', CAST(register_date AS DATE)) AS cohort_month,
COUNT(DISTINCT user_id) AS cohort_users
FROM users
GROUP BY 1
),
retention_by_day AS (
SELECT
DATE_TRUNC('MONTH', CAST(rb.register_date AS DATE)) AS cohort_month,
rb.days_since_register,
COUNT(DISTINCT rb.user_id) AS retained_users
FROM retention_base rb
GROUP BY 1, 2
)
SELECT
r.cohort_month,
r.days_since_register,
r.retained_users,
c.cohort_users,
ROUND(r.retained_users * 100.0 / c.cohort_users, 1) AS retention_pct
FROM retention_by_day r
JOIN cohort_size c ON r.cohort_month = c.cohort_month
WHERE r.days_since_register IN (0, 1, 3, 7, 14, 30)
ORDER BY r.cohort_month, r.days_since_register;
输出示例:
cohort_month
days_since_register
retained_users
cohort_users
retention_pct
2026-01-01
0
5
5
100.0
2026-01-01
1
5
5
100.0
2026-01-01
3
2
5
40.0
2026-01-01
7
3
5
60.0
2026-02-01
0
5
5
100.0
2026-02-01
7
4
5
80.0
关键说明
DATEDIFF(end, start)
DATEDIFF(end, start)
返回两个日期之间的天数差
DATE_TRUNC('MONTH', date)
DATE_TRUNC('MONTH', date)
将日期截断到月初,用于同期群分组
留存天数选择应根据业务周期:游戏看 1/3/7 日,SaaS 看 7/14/30 日
2. 同期群留存矩阵(行转列)
适用场景
BI 报表中常见的留存矩阵:行 = 同期群,列 = 留存天数,单元格 = 留存率。
SQL 实现
-- 在基础留存查询基础上,使用 CASE 行转列
SELECT
r.cohort_month,
c.cohort_users,
MAX(CASE WHEN r.days_since_register = 0 THEN ROUND(r.retained_users * 100.0 / c.cohort_users, 1) END) AS day_0,
MAX(CASE WHEN r.days_since_register = 1 THEN ROUND(r.retained_users * 100.0 / c.cohort_users, 1) END) AS day_1,
MAX(CASE WHEN r.days_since_register = 3 THEN ROUND(r.retained_users * 100.0 / c.cohort_users, 1) END) AS day_3,
MAX(CASE WHEN r.days_since_register = 7 THEN ROUND(r.retained_users * 100.0 / c.cohort_users, 1) END) AS day_7,
MAX(CASE WHEN r.days_since_register = 14 THEN ROUND(r.retained_users * 100.0 / c.cohort_users, 1) END) AS day_14,
MAX(CASE WHEN r.days_since_register = 30 THEN ROUND(r.retained_users * 100.0 / c.cohort_users, 1) END) AS day_30
FROM retention_by_day r
JOIN cohort_size c ON r.cohort_month = c.cohort_month
GROUP BY r.cohort_month, c.cohort_users
ORDER BY r.cohort_month;
输出示例:
cohort_month
cohort_users
day_0
day_1
day_3
day_7
day_14
day_30
2026-01-01
5
100
100
40
60
NULL
NULL
2026-02-01
5
100
100
20
80
20
NULL
2026-03-01
5
100
100
20
60
20
NULL
⚠️ 注意:
day_30
day_30
为
NULL
NULL
表示该同期群数据尚未覆盖 30 天,非计算错误。
动态列数
如需支持更多留存天数(如 60/90 日),只需在
SELECT
SELECT
中追加
MAX(CASE WHEN ...)
MAX(CASE WHEN ...)
列。
3. 连续留存:最长连续活跃天数
核心逻辑
使用窗口函数
ROW_NUMBER()
ROW_NUMBER()
标记连续区间:
如果日期连续,则
activity_date - row_number
activity_date - row_number
的差值相同
同一差值即为同一连续区间
SQL 实现
WITH ranked AS (
SELECT
user_id,
CAST(activity_date AS DATE) AS activity_date,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY activity_date) AS rn
FROM activity
),
grouped AS (
SELECT
user_id,
activity_date,
rn,
-- 核心技巧: 用 DATEDIFF 计算相对于固定基准日的天数差,再减去 rn
DATEDIFF(activity_date, CAST('2000-01-01' AS DATE)) - rn AS grp
FROM ranked
),
streaks AS (
SELECT
user_id,
grp,
MIN(activity_date) AS streak_start,
MAX(activity_date) AS streak_end,
COUNT(*) AS streak_length
FROM grouped
GROUP BY user_id, grp
),
max_streaks AS (
SELECT
user_id,
MAX(streak_length) AS max_consecutive_days
FROM streaks
GROUP BY user_id
)
SELECT
ms.user_id,
ms.max_consecutive_days,
s.streak_start,
s.streak_end
FROM max_streaks ms
JOIN streaks s ON ms.user_id = s.user_id AND ms.max_consecutive_days = s.streak_length
ORDER BY ms.max_consecutive_days DESC, ms.user_id;
输出示例:
user_id
max_consecutive_days
streak_start
streak_end
1
2
2026-01-05
2026-01-06
2
2
2026-01-10
2026-01-11
...
...
...
...
扩展:连续 N 天活跃用户数
-- 统计连续活跃 ≥ 3 天的用户数
SELECT COUNT(DISTINCT user_id) AS users_with_3day_streak
FROM streaks
WHERE streak_length >= 3;
4. 流失分析:N 天未活跃用户识别
用户状态分类
状态
定义
运营动作
active
7 天内有活跃
保持体验
warning
7-13 天未活跃
推送提醒
at_risk
14-29 天未活跃
优惠挽回
churned
30+ 天未活跃
召回活动
SQL 实现
-- 假设分析基准日为 2026-03-25
WITH user_last_active AS (
SELECT
u.user_id,
u.register_date,
MAX(a.activity_date) AS last_active_date,
DATEDIFF(CAST('2026-03-25' AS DATE), CAST(MAX(a.activity_date) AS DATE)) AS days_since_active
FROM users u
LEFT JOIN activity a ON u.user_id = a.user_id
GROUP BY u.user_id, u.register_date
)
SELECT
user_id,
register_date,
last_active_date,
days_since_active,
CASE
WHEN days_since_active >= 30 THEN 'churned'
WHEN days_since_active >= 14 THEN 'at_risk'
WHEN days_since_active >= 7 THEN 'warning'
ELSE 'active'
END AS user_status
FROM user_last_active
ORDER BY days_since_active DESC;
输出示例:
user_id
register_date
last_active_date
days_since_active
user_status
5
2026-01-25
2026-01-26
58
churned
4
2026-01-20
2026-01-27
57
churned
8
2026-02-10
2026-02-25
28
at_risk
12
2026-03-05
2026-03-20
5
active
流失率统计
SELECT
user_status,
COUNT(*) AS user_count,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 1) AS pct
FROM user_last_active
GROUP BY user_status
ORDER BY user_count DESC;
5. 留存曲线:按周聚合的留存趋势
适用场景
生成折线图数据,展示留存率随时间衰减的趋势。
SQL 实现
WITH retention_base AS (
SELECT
u.user_id,
u.register_date,
a.activity_date,
DATEDIFF(a.activity_date, u.register_date) AS days_since_register
FROM users u
JOIN activity a ON u.user_id = a.user_id
),
cohort_size AS (
SELECT
DATE_TRUNC('MONTH', CAST(register_date AS DATE)) AS cohort_month,
COUNT(DISTINCT user_id) AS cohort_users
FROM users
GROUP BY 1
),
retention_by_week AS (
SELECT
DATE_TRUNC('MONTH', CAST(rb.register_date AS DATE)) AS cohort_month,
FLOOR(rb.days_since_register / 7.0) AS week_number,
COUNT(DISTINCT rb.user_id) AS retained_users
FROM retention_base rb
GROUP BY 1, 2
)
SELECT
r.cohort_month,
r.week_number,
r.retained_users,
c.cohort_users,
ROUND(r.retained_users * 100.0 / c.cohort_users, 1) AS retention_pct
FROM retention_by_week r
JOIN cohort_size c ON r.cohort_month = c.cohort_month
ORDER BY r.cohort_month, r.week_number;
输出示例:
cohort_month
week_number
retained_users
cohort_users
retention_pct
2026-01-01
0
5
5
100.0
2026-01-01
1
4
5
80.0
2026-01-01
2
2
5
40.0
2026-01-01
3
2
5
40.0
2026-02-01
0
5
5
100.0
2026-02-01
1
4
5
80.0
可视化建议
X 轴:
week_number
week_number
(周数)
Y 轴:
retention_pct
retention_pct
(留存率)
图例:
cohort_month
cohort_month
(不同颜色代表不同注册月份)
6. 高级场景:同期群 + 行为特征交叉分析
场景:不同渠道用户的留存差异
SELECT
u.channel,
DATE_TRUNC('MONTH', CAST(u.register_date AS DATE)) AS cohort_month,
COUNT(DISTINCT u.user_id) AS cohort_users,
COUNT(DISTINCT CASE WHEN DATEDIFF(a.activity_date, u.register_date) = 7 THEN u.user_id END) AS day_7_retained,
ROUND(
COUNT(DISTINCT CASE WHEN DATEDIFF(a.activity_date, u.register_date) = 7 THEN u.user_id END) * 100.0
/ COUNT(DISTINCT u.user_id), 1
) AS day_7_retention_pct
FROM users u
LEFT JOIN activity a ON u.user_id = a.user_id
GROUP BY u.channel, DATE_TRUNC('MONTH', CAST(u.register_date AS DATE))
ORDER BY cohort_month, day_7_retention_pct DESC;
场景:同期群 LTV(生命周期价值)
SELECT
DATE_TRUNC('MONTH', CAST(u.register_date AS DATE)) AS cohort_month,
COUNT(DISTINCT u.user_id) AS cohort_users,
SUM(o.order_amount) AS total_revenue,
ROUND(SUM(o.order_amount) / COUNT(DISTINCT u.user_id), 2) AS avg_ltv
FROM users u
LEFT JOIN orders o ON u.user_id = o.user_id
GROUP BY 1
ORDER BY 1;
-- 按月分组(推荐)
DATE_TRUNC('MONTH', CAST(register_date AS DATE))
-- 按周分组
DATE_TRUNC('WEEK', CAST(register_date AS DATE))
-- 按天分组(数据量大时不推荐)
CAST(register_date AS DATE)
性能优化建议
场景
优化策略
大数据量留存计算
先按
user_id
user_id
去重活跃日志,再 JOIN
同期群矩阵
将
retention_base
retention_base
物化为 Dynamic Table
连续活跃统计
对
activity
activity
表按
(user_id, activity_date)
(user_id, activity_date)
建 Bloom Filter 索引
流失分析
基准日参数化,避免硬编码
-- 优化示例: 先去重再计算
WITH distinct_activity AS (
SELECT DISTINCT user_id, CAST(activity_date AS DATE) AS activity_date
FROM activity
),
retention_base AS (
SELECT
u.user_id,
u.register_date,
a.activity_date,
DATEDIFF(a.activity_date, u.register_date) AS days_since_register
FROM users u
JOIN distinct_activity a ON u.user_id = a.user_id
)
-- 后续逻辑不变...