-- 创建测试表
CREATE TABLE IF NOT EXISTS doc_ts_events (
event_id INT,
user_id INT,
event_type STRING,
amount DOUBLE,
event_time TIMESTAMP
);
-- 插入测试数据(覆盖 2024-01 至 2024-03)
INSERT INTO doc_ts_events VALUES
(1, 101, 'purchase', 120.50, CAST('2024-01-05 10:00:00' AS TIMESTAMP)),
(2, 102, 'purchase', 85.00, CAST('2024-01-10 14:30:00' AS TIMESTAMP)),
(3, 101, 'refund', 30.00, CAST('2024-01-15 09:00:00' AS TIMESTAMP)),
(4, 103, 'purchase', 200.00, CAST('2024-01-20 16:00:00' AS TIMESTAMP)),
(5, 102, 'purchase', 55.00, CAST('2024-01-25 11:00:00' AS TIMESTAMP)),
(6, 101, 'purchase', 300.00, CAST('2024-02-03 10:00:00' AS TIMESTAMP)),
(7, 104, 'purchase', 150.00, CAST('2024-02-08 13:00:00' AS TIMESTAMP)),
(8, 103, 'refund', 50.00, CAST('2024-02-14 15:00:00' AS TIMESTAMP)),
(9, 102, 'purchase', 90.00, CAST('2024-02-18 09:30:00' AS TIMESTAMP)),
(10, 101, 'purchase', 75.00, CAST('2024-02-22 17:00:00' AS TIMESTAMP)),
(11, 104, 'purchase', 220.00, CAST('2024-03-01 10:00:00' AS TIMESTAMP)),
(12, 103, 'purchase', 180.00, CAST('2024-03-07 14:00:00' AS TIMESTAMP)),
(13, 101, 'refund', 25.00, CAST('2024-03-12 11:00:00' AS TIMESTAMP)),
(14, 102, 'purchase', 130.00, CAST('2024-03-18 16:00:00' AS TIMESTAMP)),
(15, 104, 'purchase', 95.00, CAST('2024-03-25 12:00:00' AS TIMESTAMP));
说明:
TIMESTAMP
TIMESTAMP
字面量不支持直接写字符串,需使用
CAST('...' AS TIMESTAMP)
CAST('...' AS TIMESTAMP)
显式转换。
场景一:按时间粒度聚合
按月统计各事件类型的发生次数和金额总和,是最常见的时间序列聚合需求。
-- 按月、事件类型聚合
SELECT
DATE_FORMAT(event_time, 'yyyy-MM') AS month,
event_type,
COUNT(*) AS event_count,
ROUND(SUM(amount), 2) AS total_amount
FROM doc_ts_events
GROUP BY DATE_FORMAT(event_time, 'yyyy-MM'), event_type
ORDER BY month, event_type;
执行结果:
month
event_type
event_count
total_amount
2024-01
purchase
4
460.5
2024-01
refund
1
30
2024-02
purchase
4
615
2024-02
refund
1
50
2024-03
purchase
4
625
2024-03
refund
1
25
如需按周聚合,使用
DATE_TRUNC('week', ...)
DATE_TRUNC('week', ...)
截断到周一:
-- 按自然周聚合(week_start 为每周周一)
SELECT
DATE_FORMAT(DATE_TRUNC('week', event_time), 'yyyy-MM-dd') AS week_start,
COUNT(*) AS event_count,
ROUND(SUM(amount), 2) AS weekly_amount
FROM doc_ts_events
WHERE event_type = 'purchase'
GROUP BY DATE_TRUNC('week', event_time)
ORDER BY week_start;
执行结果(前 5 行):
week_start
event_count
weekly_amount
2024-01-01
1
120.5
2024-01-08
1
85
2024-01-15
1
200
2024-01-22
1
55
2024-01-29
1
300
场景二:滚动窗口计算
计算每个交易日的 7 日滚动平均金额,用于平滑短期波动、观察趋势。
RANGE BETWEEN INTERVAL N DAY PRECEDING AND CURRENT ROW
RANGE BETWEEN INTERVAL N DAY PRECEDING AND CURRENT ROW
基于时间范围(而非行数)定义窗口,能正确处理日期不连续的情况。
WITH daily_sales AS (
SELECT
CAST(DATE_FORMAT(event_time, 'yyyy-MM-dd') AS DATE) AS sale_day,
ROUND(SUM(amount), 2) AS daily_amount
FROM doc_ts_events
WHERE event_type = 'purchase'
GROUP BY DATE_FORMAT(event_time, 'yyyy-MM-dd')
)
SELECT
sale_day,
daily_amount,
ROUND(
AVG(daily_amount) OVER (
ORDER BY sale_day
RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW
), 2
) AS rolling_7d_avg
FROM daily_sales
ORDER BY sale_day;
WITH monthly_sales AS (
SELECT
DATE_FORMAT(event_time, 'yyyy-MM') AS month,
ROUND(SUM(amount), 2) AS monthly_amount
FROM doc_ts_events
WHERE event_type = 'purchase'
GROUP BY DATE_FORMAT(event_time, 'yyyy-MM')
)
SELECT
month,
monthly_amount,
LAG(monthly_amount, 1) OVER (ORDER BY month) AS prev_month_amount,
ROUND(
(monthly_amount - LAG(monthly_amount, 1) OVER (ORDER BY month))
/ LAG(monthly_amount, 1) OVER (ORDER BY month) * 100,
2
) AS mom_growth_pct
FROM monthly_sales
ORDER BY month;
执行结果:
month
monthly_amount
prev_month_amount
mom_growth_pct
2024-01
460.5
NULL
NULL
2024-02
615
460.5
33.55
2024-03
625
615
1.63
说明:第一行无上期数据,
LAG
LAG
返回
NULL
NULL
,环比增长率也为
NULL
NULL
。如需将
NULL
NULL
显示为 0,可用
COALESCE(mom_growth_pct, 0)
COALESCE(mom_growth_pct, 0)
。
场景四:时间间隔计算
计算每个用户相邻两次事件之间的间隔天数,用于分析用户活跃频率和购买周期。
SELECT
user_id,
event_type,
DATE_FORMAT(event_time, 'yyyy-MM-dd') AS event_day,
LAG(DATE_FORMAT(event_time, 'yyyy-MM-dd'), 1)
OVER (PARTITION BY user_id ORDER BY event_time) AS prev_event_day,
DATEDIFF(
DATE_FORMAT(event_time, 'yyyy-MM-dd'),
LAG(DATE_FORMAT(event_time, 'yyyy-MM-dd'), 1)
OVER (PARTITION BY user_id ORDER BY event_time)
) AS days_since_last
FROM doc_ts_events
ORDER BY user_id, event_time;
执行结果:
user_id
event_type
event_day
prev_event_day
days_since_last
101
purchase
2024-01-05
NULL
NULL
101
refund
2024-01-15
2024-01-05
10
101
purchase
2024-02-03
2024-01-15
19
101
purchase
2024-02-22
2024-02-03
19
101
refund
2024-03-12
2024-02-22
19
102
purchase
2024-01-10
NULL
NULL
102
purchase
2024-01-25
2024-01-10
15
102
purchase
2024-02-18
2024-01-25
24
102
purchase
2024-03-18
2024-02-18
29
103
purchase
2024-01-20
NULL
NULL
103
refund
2024-02-14
2024-01-20
25
103
purchase
2024-03-07
2024-02-14
22
104
purchase
2024-02-08
NULL
NULL
104
purchase
2024-03-01
2024-02-08
22
104
purchase
2024-03-25
2024-03-01
24
场景五:连续日期补零
当某些日期没有数据时,直接
GROUP BY
GROUP BY
会跳过这些日期,导致折线图出现断点。使用
SEQUENCE
SEQUENCE
+
EXPLODE
EXPLODE
生成完整日期序列,再
LEFT JOIN
LEFT JOIN
实际数据,用
COALESCE
COALESCE
将空值填为 0。
WITH date_spine AS (
-- 生成 2024-01-01 到 2024-03-31 的连续日期
SELECT EXPLODE(
SEQUENCE(
CAST('2024-01-01' AS DATE),
CAST('2024-03-31' AS DATE),
INTERVAL 1 DAY
)
) AS cal_day
),
daily_sales AS (
SELECT
CAST(DATE_FORMAT(event_time, 'yyyy-MM-dd') AS DATE) AS sale_day,
ROUND(SUM(amount), 2) AS daily_amount
FROM doc_ts_events
WHERE event_type = 'purchase'
GROUP BY DATE_FORMAT(event_time, 'yyyy-MM-dd')
)
SELECT
d.cal_day,
COALESCE(s.daily_amount, 0) AS daily_amount
FROM date_spine d
LEFT JOIN daily_sales s ON d.cal_day = s.sale_day
ORDER BY d.cal_day
LIMIT 10;
执行结果(前 10 行):
cal_day
daily_amount
2024-01-01
0
2024-01-02
0
2024-01-03
0
2024-01-04
0
2024-01-05
120.5
2024-01-06
0
2024-01-07
0
2024-01-08
0
2024-01-09
0
2024-01-10
85
说明:
SEQUENCE(start, end, INTERVAL 1 DAY)
SEQUENCE(start, end, INTERVAL 1 DAY)
生成日期数组,
EXPLODE
EXPLODE
将数组展开为多行。无数据的日期通过
LEFT JOIN
LEFT JOIN
保留,
COALESCE
COALESCE
将
NULL
NULL
替换为
0
0
。
场景六:最近N天活跃用户
筛选在最近 90 天内有购买行为且购买次数不少于 2 次的用户,统计其购买频次和最近活跃日期。
SELECT
user_id,
COUNT(*) AS purchase_count,
MAX(DATE_FORMAT(event_time, 'yyyy-MM-dd')) AS last_active_day
FROM doc_ts_events
WHERE event_type = 'purchase'
AND event_time >= CAST('2024-03-31' AS TIMESTAMP) - INTERVAL 90 DAY
GROUP BY user_id
HAVING COUNT(*) >= 2
ORDER BY last_active_day DESC;