⚠️ Silver 和 Gold 全部使用 Dynamic Table,不推荐物化视图。DT 支持增量刷新和 Time Travel,物化视图不支持。
实施步骤
前置条件
VCluster 可用(使用
DEFAULT
DEFAULT
,GP 型,Serverless 按需唤醒)
Bronze 数据已入库(
nhl_game_data.*
nhl_game_data.*
10 张表)
具备 CREATE SCHEMA / CREATE DYNAMIC TABLE 权限
第一步:创建 Schema
用独立的 Schema 物理隔离各层:
CREATE SCHEMA IF NOT EXISTS silver COMMENT 'Medallion Silver 清洗层';
CREATE SCHEMA IF NOT EXISTS gold COMMENT 'Medallion Gold 聚合指标层';
第二步:Silver 层 — 维度表
最简单的 DT:直接从 Bronze 表筛选/转换列。这两张表数据量小(33 行和 3,925 行),每次 FULL 刷新也毫无压力。
-- 球队维度
CREATE OR REPLACE DYNAMIC TABLE silver.dim_team
REFRESH INTERVAL 1 DAY vcluster DEFAULT
COMMENT 'Silver 球队维度表'
AS
SELECT
team_id,
franchiseid,
shortname,
teamname,
abbreviation,
link
FROM nhl_game_data.team_info;
-- 球员维度(标准化 + 添加全名列)
CREATE OR REPLACE DYNAMIC TABLE silver.dim_player
REFRESH INTERVAL 1 DAY vcluster DEFAULT
COMMENT 'Silver 球员维度表——标准化字段 + 全名'
AS
SELECT
player_id,
firstname,
lastname,
CONCAT(firstname, ' ', lastname) AS full_name,
nationality,
birthcity,
primaryposition AS position,
birthdate,
height,
height_cm,
CAST(NULLIF(REGEXP_REPLACE(weight, ',', ''), '') AS INT) AS weight_kg,
shootscatches
FROM nhl_game_data.player_info;
为什么用
REGEXP_REPLACE(weight, ',', '')
REGEXP_REPLACE(weight, ',', '')
? NHL 原始数据中数值字段(如 hits、weight)可能包含千位分隔符逗号(如 "1,234"),直接 CAST 会报错。先去掉逗号再转 INT 是必要的清洗步骤。
第三步:Silver 层 — 事实表
事实表的核心工作:JOIN 维度表关联名称 + 类型标准化。以球员技术统计为例:
CREATE OR REPLACE DYNAMIC TABLE silver.fact_skater_stats
REFRESH INTERVAL 1 DAY vcluster DEFAULT
COMMENT 'Silver 球员技术统计事实表——关联球员名+球队名+赛季'
AS
SELECT
s.game_id,
s.player_id,
p.full_name AS player_name,
p.position,
s.team_id,
t.teamname AS team_name,
t.abbreviation AS team_abbr,
g.season,
g.date_time_gmt AS game_date,
s.timeonice,
s.goals,
s.assists,
s.goals + s.assists AS points, -- 计算字段:得分
s.shots,
CAST(NULLIF(REGEXP_REPLACE(s.hits, ',', ''), '') AS INT) AS hits,
s.powerplaygoals,
s.penaltyminutes,
s.plusminus,
s.eventimeonice,
s.powerplaytimeonice
FROM nhl_game_data.game_skater_stats s
LEFT JOIN nhl_game_data.game g
ON s.game_id = g.game_id
LEFT JOIN silver.dim_player p
ON s.player_id = p.player_id
LEFT JOIN silver.dim_team t
ON s.team_id = t.team_id;
⚠️ Silver 事实表引用了 Silver 维度表(
silver.dim_player
silver.dim_player
、
silver.dim_team
silver.dim_team
)。这意味着系统会先刷新维度表,再刷新事实表——DT 的依赖链自动处理。
门将统计事实表同理,额外计算了扑救率:
CREATE OR REPLACE DYNAMIC TABLE silver.fact_goalie_stats
REFRESH INTERVAL 1 DAY vcluster DEFAULT
COMMENT 'Silver 门将技术统计事实表——含扑救率计算'
AS
SELECT
gs.game_id,
gs.player_id,
p.full_name AS player_name,
t.teamname AS team_name,
t.abbreviation AS team_abbr,
g.season,
g.date_time_gmt AS game_date,
gs.timeonice,
gs.shots AS shots_faced,
gs.saves,
CASE WHEN gs.shots > 0
THEN ROUND(gs.saves * 1.0 / gs.shots, 3)
ELSE NULL
END AS save_pct, -- 计算字段:扑救率
gs.decision
FROM nhl_game_data.game_goalie_stats gs
LEFT JOIN nhl_game_data.game g
ON gs.game_id = g.game_id
LEFT JOIN silver.dim_player p
ON gs.player_id = p.player_id
LEFT JOIN silver.dim_team t
ON gs.team_id = t.team_id;
CREATE OR REPLACE DYNAMIC TABLE gold.scoring_leaders
REFRESH INTERVAL 1 DAY vcluster DEFAULT
COMMENT 'Gold 每赛季射手榜 TOP 20——按得分(进球+助攻)排名'
AS
SELECT season, rank, player_id, player_name, position, team_abbr,
games_played, goals, assists, points,
ROUND(points * 1.0 / games_played, 2) AS pts_per_game
FROM (
SELECT
season, player_id, player_name, position, team_abbr,
COUNT(*) AS games_played,
SUM(goals) AS goals,
SUM(assists) AS assists,
SUM(points) AS points,
RANK() OVER (PARTITION BY season ORDER BY SUM(points) DESC) AS rank
FROM silver.fact_skater_stats
GROUP BY season, player_id, player_name, position, team_abbr
) t
WHERE rank <= 20;
CREATE OR REPLACE DYNAMIC TABLE gold.team_season_summary
REFRESH INTERVAL 1 DAY vcluster DEFAULT
COMMENT 'Gold 球队赛季战绩——胜/负/进球/失球/积分'
AS
SELECT
g.season, g.team_id,
t.teamname AS team_name,
t.abbreviation AS team_abbr,
COUNT(*) AS games_played,
SUM(CASE WHEN g.side = 'home' AND g.outcome LIKE 'home win%' THEN 1
WHEN g.side = 'away' AND g.outcome LIKE 'away win%' THEN 1
ELSE 0 END) AS wins,
SUM(CASE WHEN g.side = 'home' AND g.outcome LIKE 'away win%' THEN 1
WHEN g.side = 'away' AND g.outcome LIKE 'home win%' THEN 1
ELSE 0 END) AS losses,
SUM(CASE WHEN g.side = 'home' THEN g.home_goals
ELSE g.away_goals END) AS goals_for,
SUM(CASE WHEN g.side = 'home' THEN g.away_goals
ELSE g.home_goals END) AS goals_against,
SUM(CASE WHEN g.side = 'home' AND g.outcome LIKE 'home win%' THEN 2
WHEN g.side = 'away' AND g.outcome LIKE 'away win%' THEN 2
ELSE 0 END) AS points
FROM (
SELECT season, home_team_id AS team_id, outcome,
home_goals, away_goals, 'home' AS side
FROM nhl_game_data.game
UNION ALL
SELECT season, away_team_id AS team_id, outcome,
home_goals, away_goals, 'away' AS side
FROM nhl_game_data.game
) g
LEFT JOIN silver.dim_team t ON g.team_id = t.team_id
GROUP BY g.season, g.team_id, t.teamname, t.abbreviation;
⚠️ 注意:初版用
outcome LIKE '%win%'
outcome LIKE '%win%'
匹配胜利条件,但这会导致主队获胜时客队行也被计为胜。必须按
side
side
和
outcome
outcome
交叉匹配:主队行只匹配
'home win%'
'home win%'
,客队行只匹配
'away win%'
'away win%'
。
验证结果(2019-20 赛季 TOP 5):
team
games
wins
losses
points
Lightning (TBL)
190
122
68
244
Stars (DAL)
192
104
88
208
Golden Knights (VGK)
182
102
80
204
Avalanche (COL)
170
102
68
204
Flyers (PHI)
170
102
68
204
门将赛季排名 + 球员生涯统计 + 主客场对比
完整 DDL 见本文附录。核心模式相同:从 Silver 层聚合 →
RANK() OVER (PARTITION BY season ...)
RANK() OVER (PARTITION BY season ...)
→ 取 TOP N。
第六步:验证全链路
-- 各层行数对比
SELECT 'Bronze game' AS layer, COUNT(*) FROM nhl_game_data.game
UNION ALL SELECT 'Silver dim_team', COUNT(*) FROM silver.dim_team
UNION ALL SELECT 'Silver fact_skater', COUNT(*) FROM silver.fact_skater_stats
UNION ALL SELECT 'Gold scoring_leaders', COUNT(*) FROM gold.scoring_leaders
UNION ALL SELECT 'Gold team_season', COUNT(*) FROM gold.team_season_summary;
-- 查看 DT 刷新历史
SHOW DYNAMIC TABLE REFRESH HISTORY WHERE name = 'scoring_leaders';
ALTER TABLE table_name SET PROPERTIES ('change_tracking' = 'true')
ALTER TABLE table_name SET PROPERTIES ('change_tracking' = 'true')
)。
设计原则
1. 层间引用规则
引用方向
允许
示例
Silver → Bronze
✅
FROM nhl_game_data.game
FROM nhl_game_data.game
Gold → Silver
✅
FROM silver.fact_skater_stats
FROM silver.fact_skater_stats
Gold → Bronze
⚠️ 不推荐
应通过 Silver 层间接访问
Gold → Gold
⚠️ 谨慎
仅在需要多层聚合时使用
Bronze → Silver
❌ 禁止
下层不应依赖上层
2. LEFT JOIN 过滤条件必须放 ON 子句
-- ❌ 错误:WHERE 过滤导致 LEFT JOIN 退化为 INNER JOIN
SELECT * FROM skater_stats s
LEFT JOIN team_info t ON s.team_id = t.team_id
WHERE t.abbreviation = 'TBL';
-- ✅ 正确:过滤条件放 ON 子句
SELECT * FROM skater_stats s
LEFT JOIN team_info t
ON s.team_id = t.team_id AND t.abbreviation = 'TBL';
CREATE OR REPLACE DYNAMIC TABLE gold.player_career_stats
REFRESH INTERVAL 1 DAY vcluster DEFAULT
COMMENT 'Gold 球员生涯总览——全赛季汇总 + 场均效率'
AS
SELECT
player_id, player_name, position,
COUNT(*) AS games_played,
SUM(goals) AS total_goals,
SUM(assists) AS total_assists,
SUM(points) AS total_points,
ROUND(SUM(points) * 1.0 / COUNT(*), 2) AS pts_per_game,
ROUND(SUM(goals) * 1.0 / NULLIF(SUM(shots), 0), 3) AS shooting_pct,
AVG(timeonice) AS avg_timeonice_sec,
SUM(penaltyminutes) AS total_pim,
AVG(plusminus) AS avg_plusminus
FROM silver.fact_skater_stats
GROUP BY player_id, player_name, position;
门将赛季排名
CREATE OR REPLACE DYNAMIC TABLE gold.goalie_season_rankings
REFRESH INTERVAL 1 DAY vcluster DEFAULT
COMMENT 'Gold 门将赛季排名 TOP 15——按胜场数排名'
AS
SELECT season, rank, player_id, player_name, team_abbr,
games_played, wins, saves, shots_faced,
ROUND(save_pct, 3) AS save_pct
FROM (
SELECT
season, player_id, player_name, team_abbr,
COUNT(*) AS games_played,
SUM(CASE WHEN decision = 'W' THEN 1 ELSE 0 END) AS wins,
SUM(saves) AS saves,
SUM(shots_faced) AS shots_faced,
CASE WHEN SUM(shots_faced) > 0
THEN SUM(saves) * 1.0 / SUM(shots_faced)
ELSE NULL END AS save_pct,
RANK() OVER (PARTITION BY season ORDER BY
SUM(CASE WHEN decision = 'W' THEN 1 ELSE 0 END) DESC) AS rank
FROM silver.fact_goalie_stats
GROUP BY season, player_id, player_name, team_abbr
) t
WHERE rank <= 15;
主客场对比
CREATE OR REPLACE DYNAMIC TABLE gold.team_home_away_split
REFRESH INTERVAL 1 DAY vcluster DEFAULT
COMMENT 'Gold 球队主客场表现对比——主场 vs 客场胜率'
AS
SELECT
g.season, g.team_id,
t.teamname AS team_name,
t.abbreviation AS team_abbr,
COUNT(CASE WHEN g.side = 'home' THEN 1 END) AS home_games,
COUNT(CASE WHEN g.side = 'home' AND g.outcome LIKE 'home win%' THEN 1 END) AS home_wins,
COUNT(CASE WHEN g.side = 'away' THEN 1 END) AS away_games,
COUNT(CASE WHEN g.side = 'away' AND g.outcome LIKE 'away win%' THEN 1 END) AS away_wins,
ROUND(
COUNT(CASE WHEN g.side = 'home' AND g.outcome LIKE 'home win%' THEN 1 END) * 1.0 /
NULLIF(COUNT(CASE WHEN g.side = 'home' THEN 1 END), 0), 3
) AS home_win_pct,
ROUND(
COUNT(CASE WHEN g.side = 'away' AND g.outcome LIKE 'away win%' THEN 1 END) * 1.0 /
NULLIF(COUNT(CASE WHEN g.side = 'away' THEN 1 END), 0), 3
) AS away_win_pct
FROM (
SELECT season, home_team_id AS team_id, outcome, 'home' AS side
FROM nhl_game_data.game
UNION ALL
SELECT season, away_team_id AS team_id, outcome, 'away' AS side
FROM nhl_game_data.game
) g
LEFT JOIN silver.dim_team t ON g.team_id = t.team_id
GROUP BY g.season, g.team_id, t.teamname, t.abbreviation;