-- 第一步:通过 SQL PUT 将本地 CSV 文件上传到 User Volume
PUT '/path/to/your/data.csv' TO USER VOLUME FILE 'data.csv';
-- 第二步:从 User Volume COPY INTO 表
COPY INTO best_practice_hr_analytics.doc_ods_employees
FROM USER VOLUME
USING csv
OPTIONS('header'='true', 'sep'=',', 'nullValue'='')
FILES ('data.csv');
SELECT COUNT(*) AS ods_row_count
FROM best_practice_hr_analytics.doc_ods_employees;
ods_row_count
-------------
40
Column Masking:薪酬字段脱敏
monthly_income
monthly_income
是高度敏感字段。方案:HR 管理员看原始值,其他用户查询返回 -1。
-- 创建脱敏函数
CREATE OR REPLACE FUNCTION best_practice_hr_analytics.mask_salary(salary INT)
RETURNS INT
AS CASE
WHEN current_user() IN ('privileged_user') THEN salary -- 替换为实际获授权的用户名
ELSE -1
END;
-- 绑定到 monthly_income 列
ALTER TABLE best_practice_hr_analytics.doc_ods_employees
CHANGE COLUMN monthly_income
SET MASK best_practice_hr_analytics.mask_salary;
SELECT emp_id, department, job_role, monthly_income
FROM best_practice_hr_analytics.doc_ods_employees
LIMIT 5;
emp_id | department | job_role | monthly_income
-------+--------------------------+------------------------+---------------
31 | Research & Development | Laboratory Technician | 2496
32 | Research & Development | Healthcare Representative | 6465
33 | Research & Development | Laboratory Technician | 2206
34 | Sales | Sales Representative | 2086
35 | Research & Development | Research Scientist | 2293
DWD 层:员工事件时间线
DWD 层基于 ODS 原始数据,衍生出三类分析字段:任期分层(
tenure_band
tenure_band
)、晋升停滞标志(
promotion_stalled_flag
promotion_stalled_flag
)和留任风险评分(
retention_risk_score
retention_risk_score
)。
建表
CREATE DYNAMIC TABLE IF NOT EXISTS best_practice_hr_analytics.doc_dwd_employee_timeline
AS
SELECT
emp_id,
department,
job_role,
job_level,
age,
gender,
marital_status,
education,
education_field,
years_at_company,
years_in_current_role,
years_since_last_promo,
years_with_curr_mgr,
total_working_years,
monthly_income,
pct_salary_hike,
performance_rating,
job_satisfaction,
work_life_balance,
env_satisfaction,
overtime,
business_travel,
attrition,
-- 任期分层,便于分组分析
CASE
WHEN years_at_company <= 1 THEN 'New'
WHEN years_at_company <= 3 THEN 'Junior'
WHEN years_at_company <= 7 THEN 'Mid'
WHEN years_at_company <= 15 THEN 'Senior'
ELSE 'Veteran'
END AS tenure_band,
-- 晋升停滞标志:2 年以上未晋升且绩效评分 >= 3
CASE
WHEN years_since_last_promo >= 2 AND performance_rating >= 3 THEN 1
ELSE 0
END AS promotion_stalled_flag,
-- 留任风险评分(0.0 ~ 1.0,越高越危险)
ROUND(
CASE WHEN overtime = 'Yes' THEN 0.25 ELSE 0.0 END
+ CASE WHEN job_satisfaction <= 2 THEN 0.25 ELSE 0.0 END
+ CASE WHEN work_life_balance <= 2 THEN 0.20 ELSE 0.0 END
+ CASE WHEN years_since_last_promo >= 3 THEN 0.15 ELSE 0.0 END
+ CASE WHEN pct_salary_hike <= 12 THEN 0.15 ELSE 0.0 END
, 2) AS retention_risk_score
FROM best_practice_hr_analytics.doc_ods_employees;
⚠️ 注意:
CREATE DYNAMIC TABLE
CREATE DYNAMIC TABLE
DDL 中不写
REFRESH INTERVAL
REFRESH INTERVAL
。调度刷新通过 Studio Task 管理(见"调度配置"章节),这样可以在同一任务上附加监控告警和数据质量规则。
REFRESH DYNAMIC TABLE best_practice_hr_analytics.doc_dwd_employee_timeline;
SELECT COUNT(*) AS dwd_count
FROM best_practice_hr_analytics.doc_dwd_employee_timeline;
dwd_count
---------
40
按任期分层的离职风险分析
任期分层帮助 HR 识别哪个阶段员工流失风险最高:
SELECT
tenure_band,
COUNT(*) AS emp_count,
ROUND(AVG(retention_risk_score), 2) AS avg_risk,
SUM(CASE WHEN attrition = 'Yes' THEN 1 ELSE 0 END) AS attrition_cnt
FROM best_practice_hr_analytics.doc_dwd_employee_timeline
GROUP BY tenure_band
ORDER BY avg_risk DESC;
CREATE DYNAMIC TABLE IF NOT EXISTS best_practice_hr_analytics.doc_dws_dept_headcount_metrics
AS
SELECT
department,
COUNT(*) AS headcount,
SUM(CASE WHEN attrition = 'Yes' THEN 1 ELSE 0 END) AS attrition_count,
ROUND(SUM(CASE WHEN attrition = 'Yes' THEN 1 ELSE 0 END)
* 100.0 / COUNT(*), 2) AS attrition_rate_pct,
ROUND(AVG(years_at_company), 2) AS avg_tenure_years,
ROUND(AVG(monthly_income), 0) AS avg_monthly_income,
ROUND(AVG(performance_rating), 2) AS avg_performance,
ROUND(AVG(job_satisfaction), 2) AS avg_job_satisfaction,
ROUND(AVG(retention_risk_score), 2) AS avg_retention_risk,
SUM(CASE WHEN overtime = 'Yes' THEN 1 ELSE 0 END) AS overtime_headcount,
SUM(promotion_stalled_flag) AS promotion_stalled_count
FROM best_practice_hr_analytics.doc_dwd_employee_timeline
GROUP BY department;
两个部门的绩效均值相近(3.11 vs 3.16),说明离职者并非低绩效员工,HR 应优先关注组织氛围和职业发展通道。
ADS 层:离职风险报告与薪酬分析
离职风险报告
CREATE DYNAMIC TABLE IF NOT EXISTS best_practice_hr_analytics.doc_ads_attrition_risk_report
AS
SELECT
emp_id,
department,
job_role,
job_level,
tenure_band,
years_at_company,
years_since_last_promo,
overtime,
job_satisfaction,
work_life_balance,
performance_rating,
monthly_income,
pct_salary_hike,
promotion_stalled_flag,
retention_risk_score,
attrition,
-- 风险等级
CASE
WHEN retention_risk_score >= 0.6 THEN 'HIGH'
WHEN retention_risk_score >= 0.3 THEN 'MEDIUM'
ELSE 'LOW'
END AS risk_level,
-- 主要离职信号
CASE
WHEN overtime = 'Yes' AND job_satisfaction <= 2 THEN 'Overwork+LowSatisfaction'
WHEN overtime = 'Yes' THEN 'Overwork'
WHEN job_satisfaction <= 2 THEN 'LowSatisfaction'
WHEN years_since_last_promo >= 3 THEN 'PromotionStalled'
WHEN pct_salary_hike <= 12 THEN 'LowPayRaise'
ELSE 'Normal'
END AS primary_risk_signal
FROM best_practice_hr_analytics.doc_dwd_employee_timeline;
REFRESH DYNAMIC TABLE best_practice_hr_analytics.doc_ads_attrition_risk_report;
SELECT risk_level,
COUNT(*) AS emp_count,
ROUND(AVG(retention_risk_score), 2) AS avg_risk,
SUM(CASE WHEN attrition = 'Yes' THEN 1 ELSE 0 END) AS actual_attrition
FROM best_practice_hr_analytics.doc_ads_attrition_risk_report
GROUP BY risk_level
ORDER BY avg_risk DESC;
SELECT
overtime,
job_satisfaction,
COUNT(*) AS emp_count,
ROUND(AVG(retention_risk_score), 2) AS avg_risk,
SUM(CASE WHEN attrition = 'Yes' THEN 1 ELSE 0 END) AS attrition_cnt
FROM best_practice_hr_analytics.doc_dwd_employee_timeline
GROUP BY overtime, job_satisfaction
ORDER BY avg_risk DESC
LIMIT 8;
SELECT
emp_id,
department,
job_role,
job_level,
monthly_income,
years_at_company,
pct_salary_hike,
AVG(monthly_income) OVER (PARTITION BY department, job_level) AS dept_level_avg_income,
ROUND(
(monthly_income - AVG(monthly_income) OVER (PARTITION BY department, job_level))
/ AVG(monthly_income) OVER (PARTITION BY department, job_level) * 100
, 1) AS income_vs_peer_pct,
RANK() OVER (PARTITION BY department ORDER BY monthly_income DESC) AS income_rank_in_dept
FROM best_practice_hr_analytics.doc_dwd_employee_timeline
WHERE department = 'Sales'
ORDER BY job_level DESC, monthly_income DESC;
SELECT
emp_id,
department,
job_role,
job_level,
years_at_company,
years_since_last_promo,
performance_rating,
LAG(performance_rating) OVER (
PARTITION BY department ORDER BY years_at_company
) AS prev_emp_perf,
years_since_last_promo - LAG(years_since_last_promo) OVER (
PARTITION BY department ORDER BY years_at_company
) AS promo_lag_delta
FROM best_practice_hr_analytics.doc_dwd_employee_timeline
WHERE department = 'Research & Development'
ORDER BY years_since_last_promo DESC
LIMIT 10;
emp_id | department | job_role | job_level | years_at_company | years_since_last_promo | performance_rating | prev_emp_perf | promo_lag_delta
-------+--------------------------+-----------------------------+-----------+------------------+------------------------+--------------------+---------------+----------------
16 | Research & Development | Manufacturing Director | 3 | 10 | 8 | 3 | 4 | 7
10 | Research & Development | Healthcare Representative | 2 | 7 | 7 | 3 | 3 | 4
27 | Research & Development | Research Scientist | 1 | 10 | 6 | 4 | 3 | -2
29 | Research & Development | Healthcare Representative | 3 | 22 | 5 | 3 | 3 | 1
13 | Research & Development | Research Scientist | 1 | 5 | 4 | 3 | 3 | 4
26 | Research & Development | Manager | 5 | 14 | 4 | 3 | 3 | 2