INSERT INTO dqc.dqc_results (check_id, check_name, layer, metric, expected, actual, status, detail)
SELECT 'DQC-001', 'dim_row_match', 'silver', 'dim_team_rows',
CAST((SELECT COUNT(*) FROM nhl_game_data.team_info) AS STRING),
CAST((SELECT COUNT(*) FROM silver.dim_team) AS STRING),
CASE WHEN (SELECT COUNT(*) FROM nhl_game_data.team_info)
= (SELECT COUNT(*) FROM silver.dim_team)
THEN 'PASS' ELSE 'FAIL' END,
'Bronze team_info 行数应与 Silver dim_team 一致';
空值率
INSERT INTO dqc.dqc_results (check_id, check_name, layer, metric, expected, actual, status, detail)
SELECT 'DQC-003', 'null_rate', 'silver', 'skater_goals_null_pct',
'=0',
CAST(ROUND(SUM(CASE WHEN goals IS NULL THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS STRING),
CASE WHEN SUM(CASE WHEN goals IS NULL THEN 1 ELSE 0 END) * 100.0 / COUNT(*) = 0
THEN 'PASS' ELSE 'WARN' END,
'Silver fact_skater_stats.goals 不应有 NULL'
FROM silver.fact_skater_stats;
PASS vs WARN vs FAIL 分级:
PASS
PASS
:完全符合预期
WARN
WARN
:偏差在容忍范围内,需关注但不断路(如空值率 < 1%)
FAIL
FAIL
:严重偏离,应阻断下游消费
唯一性
INSERT INTO dqc.dqc_results (check_id, check_name, layer, metric, expected, actual, status, detail)
SELECT 'DQC-005', 'uniqueness', 'silver', 'dim_player_id_unique',
'TRUE',
CAST(CASE WHEN COUNT(*) = COUNT(DISTINCT player_id) THEN 'TRUE' ELSE 'FALSE' END AS STRING),
CASE WHEN COUNT(*) = COUNT(DISTINCT player_id) THEN 'PASS' ELSE 'FAIL' END,
'Silver dim_player.player_id 应唯一'
FROM silver.dim_player;
值范围
INSERT INTO dqc.dqc_results (check_id, check_name, layer, metric, expected, actual, status, detail)
SELECT 'DQC-006', 'value_range', 'silver', 'skater_goals_positive',
'>=0',
CAST(MIN(goals) AS STRING),
CASE WHEN MIN(goals) >= 0 THEN 'PASS' ELSE 'FAIL' END,
'Silver fact_skater_stats.goals 不应为负数'
FROM silver.fact_skater_stats;
新鲜度
INSERT INTO dqc.dqc_results (check_id, check_name, layer, metric, expected, actual, status, detail)
SELECT 'DQC-008', 'freshness', 'bronze', 'max_season',
'>=2019',
CAST(MAX(season) AS STRING),
CASE WHEN MAX(season) >= 2019 THEN 'PASS' ELSE 'WARN' END,
'Bronze 最新赛季不应早于 2019'
FROM nhl_game_data.game;
第三步:DQC 仪表盘
将
dqc_results
dqc_results
聚合为仪表盘 Dynamic Table,一键查看各层质量状态:
CREATE OR REPLACE DYNAMIC TABLE dqc.dqc_dashboard
REFRESH INTERVAL 1 DAY vcluster DEFAULT
COMMENT 'DQC 仪表盘——按层汇总质量状态'
AS
SELECT
layer,
COUNT(*) AS total_checks,
SUM(CASE WHEN status = 'PASS' THEN 1 ELSE 0 END) AS pass_cnt,
SUM(CASE WHEN status = 'WARN' THEN 1 ELSE 0 END) AS warn_cnt,
SUM(CASE WHEN status = 'FAIL' THEN 1 ELSE 0 END) AS fail_cnt,
ROUND(SUM(CASE WHEN status = 'PASS' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS pass_rate
FROM dqc.dqc_results
GROUP BY layer;
-- 执行全部 DQC 检查后查看结果
SELECT check_id, status, metric, actual
FROM dqc.dqc_results
WHERE status != 'PASS'; -- 只看异常
方式二:Dynamic Table 自动执行
将 DQC 检查逻辑封装为 Dynamic Table,每次源表刷新后自动重跑:
-- DQC 检查 DT:空值率监控
CREATE OR REPLACE DYNAMIC TABLE dqc.skater_null_monitor
REFRESH INTERVAL 1 DAY vcluster DEFAULT
COMMENT 'Silver 层球员统计空值率监控'
AS
SELECT
'DQC-003' AS check_id,
'null_rate' AS check_name,
'silver' AS layer,
'skater_goals_null_pct' AS metric,
'=0' AS expected,
CAST(ROUND(SUM(CASE WHEN goals IS NULL THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) AS STRING) AS actual,
CASE WHEN SUM(CASE WHEN goals IS NULL THEN 1 ELSE 0 END) * 100.0 / COUNT(*) = 0
THEN 'PASS' ELSE 'WARN' END AS status
FROM silver.fact_skater_stats;
-- 查询所有 FAIL 的检查
SELECT * FROM dqc.dqc_results WHERE status = 'FAIL';
-- 查询本轮检查的异常汇总
SELECT layer,
SUM(CASE WHEN status = 'FAIL' THEN 1 ELSE 0 END) AS fails,
SUM(CASE WHEN status = 'WARN' THEN 1 ELSE 0 END) AS warns
FROM dqc.dqc_results
WHERE checked_at > CURRENT_TIMESTAMP() - INTERVAL 1 DAY
GROUP BY layer
HAVING SUM(CASE WHEN status = 'FAIL' THEN 1 ELSE 0 END) > 0;