SELECT
user_id,
event_types,
-- 是否有 purchase 事件
exists(event_types, t -> t = 'purchase') AS has_purchase,
-- 是否所有事件都是 view
forall(event_types, t -> t = 'view') AS all_views,
-- 是否有时长超过 20 的事件
exists(durations, d -> d > 20) AS has_long_session
FROM user_events;
输出:
user_id
event_types
has_purchase
all_views
has_long_session
1
view:click:view:purchase:view
true
false
true
2
click:view:click
false
false
true
3
view:view:view
false
true
true
场景 3:数组元素转换(transform)
问题
对数组中每个元素进行统一转换,如单位换算、格式化等。
SQL 实现
SELECT
user_id,
durations,
-- 毫秒转秒(假设原始数据是毫秒)
transform(durations, d -> d * 1000) AS durations_ms,
-- 时长分级
transform(durations, d ->
CASE
WHEN d >= 30 THEN 'long'
WHEN d >= 10 THEN 'medium'
ELSE 'short'
END
) AS duration_levels
FROM user_events;
输出:
user_id
durations
durations_ms
duration_levels
1
10:25:15:100:8
10000:25000:15000:100000:8000
medium:long:medium:long:short
2
5:30:12
5000:30000:12000
short:long:medium
3
20:25:15
20000:25000:15000
medium:long:medium
场景 4:数组内聚合(array_aggregate)
问题
在数组内部进行聚合计算,如求和、最大值、计数等,无需展开为多行。
SQL 实现
SELECT
user_id,
durations,
-- 总时长
array_aggregate(durations, 0, (acc, x) -> acc + x) AS total_duration,
-- 最大时长
array_aggregate(durations, 0, (acc, x) -> CASE WHEN x > acc THEN x ELSE acc END) AS max_duration,
-- 事件数量(数组长度)
array_aggregate(durations, 0, (acc, x) -> acc + 1) AS event_count
FROM user_events;
SELECT
user_id,
event_types,
durations,
-- 计算每个事件的"单位时长价值"(假设 purchase 价值 100,click 价值 10,view 价值 1)
zip_with(
event_types,
durations,
(t, d) -> d * CASE t WHEN 'purchase' THEN 100 WHEN 'click' THEN 10 ELSE 1 END
) AS event_values
FROM user_events;
输出:
user_id
event_values
1
10:250:15:10000:8
2
50:30:120
3
20:25:15
场景 6:Map 数据过滤与转换
问题
处理键值对数据(如用户画像标签、配置参数),按条件过滤或转换。
SQL 实现
-- 使用 MAP_FROM_ARRAYS 创建 Map(推荐方式)
WITH user_profiles AS (
SELECT 1 AS user_id, MAP_FROM_ARRAYS(ARRAY['age', 'city', 'tier'], ARRAY[25, 1, 3]) AS profile UNION ALL
SELECT 2, MAP_FROM_ARRAYS(ARRAY['age', 'city', 'tier'], ARRAY[30, 2, 1]) UNION ALL
SELECT 3, MAP_FROM_ARRAYS(ARRAY['age', 'city', 'tier'], ARRAY[22, 1, 2])
)
SELECT
user_id,
profile,
-- 过滤出 tier > 1 的用户画像条目
map_filter(profile, (k, v) -> k = 'tier' AND v > 1) AS high_tier_filter,
-- 将 age 值翻倍
transform_values(profile, (k, v) ->
CASE WHEN k = 'age' THEN v * 2 ELSE v END
) AS doubled_age,
-- 将键名转为大写
transform_keys(profile, (k, v) -> UPPER(k)) AS upper_keys
FROM user_profiles;
SELECT
user_id,
event_types,
durations,
-- 购买标记
CASE WHEN exists(event_types, t -> t = 'purchase') THEN 'buyer' ELSE 'non_buyer' END AS buyer_flag,
-- 平均时长(总时长 / 事件数)
array_aggregate(durations, 0, (acc, x) -> acc + x) /
array_aggregate(durations, 0, (acc, x) -> acc + 1) AS avg_duration,
-- 高频标记
CASE WHEN array_aggregate(durations, 0, (acc, x) -> acc + 1) >= 4
THEN 'high_freq' ELSE 'low_freq' END AS freq_flag,
-- 标签数组转换
transform(tags, t -> CONCAT('tag_', t)) AS prefixed_tags
FROM user_events;
-- 推荐: 先过滤再转换(减少处理元素数)
transform(filter(durations, d -> d > 10), d -> d * 2)
-- 不推荐: 先转换再过滤(所有元素都经过 transform)
filter(transform(durations, d -> d * 2), d -> d > 20)