-- 第一步:创建指向对象存储的 Storage Connection
-- 阿里云 OSS 示例;AWS S3 / 腾讯云 COS 请按实际云厂商替换 TYPE、ENDPOINT 和凭据字段。
CREATE STORAGE CONNECTION IF NOT EXISTS lab_reports_oss_conn
TYPE oss
ENDPOINT = 'oss-cn-hangzhou.aliyuncs.com'
ACCESS_ID = '<your_access_key_id>'
ACCESS_KEY = '<your_access_key_secret>';
-- 第二步:将 LIS 检验报告所在目录挂载为 External Volume
CREATE EXTERNAL VOLUME IF NOT EXISTS best_practice_healthcare_dw.lab_reports_volume
LOCATION 'oss://<your-bucket>/lis/lab-reports/'
USING CONNECTION lab_reports_oss_conn
DIRECTORY = (enable = true, auto_refresh = true)
RECURSIVE = true;
完成挂载后,从
lab_reports_volume
lab_reports_volume
批量导入 JSON 检验报告:
COPY INTO best_practice_healthcare_dw.doc_ods_lab_results
FROM VOLUME best_practice_healthcare_dw.lab_reports_volume
USING json
ON_ERROR = 'CONTINUE';
⚠️ 注意:
ON_ERROR = 'CONTINUE'
ON_ERROR = 'CONTINUE'
会跳过格式不合法的文件并继续加载,建议在加载后查询
COPY_HISTORY
COPY_HISTORY
确认是否有文件被跳过。
ODS 层:Column Masking 患者 PII 脱敏
医疗数据中患者 ID 和诊断信息属于受保护的健康信息(PHI),需要根据访问者角色动态脱敏。ClickZetta 通过 Column Masking 在列级别绑定脱敏策略,对非特权用户透明屏蔽敏感字段。
创建脱敏策略函数
患者 ID 脱敏:非 workspace_admin 角色只显示首尾字符,中间替换为
*
*
。
CREATE OR REPLACE FUNCTION best_practice_healthcare_dw.mask_patient_id(pid STRING)
RETURNS STRING
AS
CASE
WHEN array_contains(current_roles(), 'workspace_admin') THEN pid
ELSE CONCAT(SUBSTR(pid, 1, 1), REPEAT('*', LENGTH(pid) - 2), SUBSTR(pid, LENGTH(pid), 1))
END;
CREATE OR REPLACE FUNCTION best_practice_healthcare_dw.mask_diagnosis(diag STRING)
RETURNS STRING
AS
CASE
WHEN array_contains(current_roles(), 'workspace_admin')
OR array_contains(current_roles(), 'workspace_dev') THEN diag
ELSE 'RESTRICTED'
END;
将脱敏策略绑定到列
-- 绑定患者 ID 脱敏
ALTER TABLE best_practice_healthcare_dw.doc_ods_admissions
CHANGE COLUMN patient_id
SET MASK best_practice_healthcare_dw.mask_patient_id;
-- 绑定诊断信息脱敏
ALTER TABLE best_practice_healthcare_dw.doc_ods_admissions
CHANGE COLUMN principal_diagnosis_desc
SET MASK best_practice_healthcare_dw.mask_diagnosis;
CREATE DYNAMIC TABLE best_practice_healthcare_dw.doc_dwd_patient_visits
REFRESH INTERVAL 60 MINUTE VCLUSTER DEFAULT
AS
SELECT
a.admission_id,
a.hospital_id,
h.hospital_name,
h.county AS hospital_county,
h.hospital_type,
h.is_teaching,
a.patient_id,
a.age,
CASE
WHEN a.age < 18 THEN 'Pediatric'
WHEN a.age < 45 THEN 'Adult'
WHEN a.age < 65 THEN 'Middle-Aged'
ELSE 'Senior'
END AS age_group,
a.sex,
a.race,
a.county_of_residence,
a.admission_date,
a.discharge_date,
a.los_days,
a.admission_type,
a.admission_source,
a.discharge_disposition,
a.principal_diagnosis_code,
a.principal_diagnosis_desc,
a.major_diagnostic_category,
a.department,
a.payer_type,
a.total_charges,
a.drg_code,
a.drg_description,
CASE
WHEN a.discharge_disposition IN ('Home', 'Home Health') THEN 'Routine'
WHEN a.discharge_disposition = 'SNF' THEN 'Extended Care'
ELSE 'Other'
END AS discharge_category,
CASE WHEN a.los_days > 7 THEN 1 ELSE 0 END AS is_long_stay,
a.load_time
FROM best_practice_healthcare_dw.doc_ods_admissions a
LEFT JOIN best_practice_healthcare_dw.doc_hospital_dim h
ON a.hospital_id = h.hospital_id;
检验报告事件宽表,计算检验周转时间(TAT):
CREATE DYNAMIC TABLE best_practice_healthcare_dw.doc_dwd_lab_events
REFRESH INTERVAL 60 MINUTE VCLUSTER DEFAULT
AS
SELECT
l.lab_id,
l.admission_id,
l.patient_id,
l.hospital_id,
h.hospital_name,
l.test_name,
l.test_code,
l.result_value,
l.reference_range,
l.abnormal_flag,
l.collection_time,
l.result_time,
TIMESTAMPDIFF(MINUTE, l.collection_time, l.result_time) AS tat_minutes,
l.ordering_physician,
l.department
FROM best_practice_healthcare_dw.doc_ods_lab_results l
LEFT JOIN best_practice_healthcare_dw.doc_hospital_dim h
ON l.hospital_id = h.hospital_id;
CREATE DYNAMIC TABLE best_practice_healthcare_dw.doc_dws_dept_monthly
REFRESH INTERVAL 60 MINUTE VCLUSTER DEFAULT
AS
SELECT
DATE_TRUNC('month', admission_date) AS admission_month,
hospital_id,
hospital_name,
department,
major_diagnostic_category,
COUNT(*) AS total_admissions,
COUNT(DISTINCT patient_id) AS unique_patients,
ROUND(AVG(los_days), 2) AS avg_los_days,
MAX(los_days) AS max_los_days,
SUM(CASE WHEN is_long_stay = 1 THEN 1 ELSE 0 END) AS long_stay_count,
ROUND(SUM(CASE WHEN is_long_stay = 1 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS long_stay_pct,
ROUND(SUM(total_charges), 2) AS total_charges,
ROUND(AVG(total_charges), 2) AS avg_charges,
SUM(CASE WHEN admission_type = 'Emergency' THEN 1 ELSE 0 END) AS emergency_count,
SUM(CASE WHEN payer_type = 'Medicare' THEN 1 ELSE 0 END) AS medicare_count,
SUM(CASE WHEN payer_type = 'Medicaid' THEN 1 ELSE 0 END) AS medicaid_count,
SUM(CASE WHEN payer_type = 'Commercial' THEN 1 ELSE 0 END) AS commercial_count
FROM best_practice_healthcare_dw.doc_dwd_patient_visits
GROUP BY 1, 2, 3, 4, 5;
SELECT
department, total_admissions, avg_los_days, long_stay_pct,
total_charges, emergency_count, medicare_count
FROM best_practice_healthcare_dw.doc_dws_dept_monthly
WHERE admission_month = '2025-01-01'
ORDER BY total_charges DESC
LIMIT 6;
SELECT
major_diagnostic_category,
COUNT(*) AS admission_count,
ROUND(AVG(los_days), 2) AS avg_los,
ROUND(AVG(total_charges), 0) AS avg_charges
FROM best_practice_healthcare_dw.doc_dwd_patient_visits
GROUP BY major_diagnostic_category
ORDER BY admission_count DESC
LIMIT 8;
SELECT
payer_type,
COUNT(*) AS admission_count,
ROUND(AVG(los_days), 2) AS avg_los,
ROUND(SUM(total_charges), 0) AS total_revenue,
ROUND(AVG(total_charges), 0) AS avg_charges
FROM best_practice_healthcare_dw.doc_dwd_patient_visits
GROUP BY payer_type
ORDER BY admission_count DESC;
CREATE DYNAMIC TABLE best_practice_healthcare_dw.doc_ads_hospital_kpi
REFRESH INTERVAL 60 MINUTE VCLUSTER DEFAULT
AS
SELECT
hospital_id,
hospital_name,
hospital_county,
hospital_type,
is_teaching,
COUNT(*) AS total_admissions,
COUNT(DISTINCT patient_id) AS unique_patients,
ROUND(AVG(los_days), 2) AS avg_los_days,
SUM(CASE WHEN is_long_stay = 1 THEN 1 ELSE 0 END) AS long_stay_count,
ROUND(SUM(CASE WHEN is_long_stay = 1 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS long_stay_rate_pct,
ROUND(SUM(total_charges), 2) AS total_revenue,
ROUND(AVG(total_charges), 2) AS avg_revenue_per_admission,
SUM(CASE WHEN admission_type = 'Emergency' THEN 1 ELSE 0 END) AS emergency_admissions,
ROUND(SUM(CASE WHEN admission_type = 'Emergency' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 1) AS emergency_rate_pct,
SUM(CASE WHEN discharge_category = 'Extended Care' THEN 1 ELSE 0 END) AS snf_discharges,
SUM(CASE WHEN payer_type = 'Medicare' THEN 1 ELSE 0 END) AS medicare_cases,
SUM(CASE WHEN payer_type = 'Medicaid' THEN 1 ELSE 0 END) AS medicaid_cases,
SUM(CASE WHEN payer_type = 'Commercial' THEN 1 ELSE 0 END) AS commercial_cases,
COUNT(DISTINCT department) AS active_departments
FROM best_practice_healthcare_dw.doc_dwd_patient_visits
GROUP BY hospital_id, hospital_name, hospital_county, hospital_type, is_teaching;
Cedars-Sinai 总收入最高($447,100),急诊入院率也最高(75%);Stanford Health Care 有 16.7% 的长住院率(los > 7 天),反映其收治重症病例比例较高。
按季度月份趋势分析:
SELECT
CASE EXTRACT(month FROM admission_date)
WHEN 1 THEN 'January' WHEN 2 THEN 'February' WHEN 3 THEN 'March'
END AS month_name,
COUNT(*) AS total_admissions,
ROUND(AVG(los_days), 2) AS avg_los,
ROUND(SUM(total_charges), 0) AS total_revenue,
SUM(CASE WHEN admission_type = 'Emergency' THEN 1 ELSE 0 END) AS emergency_count
FROM best_practice_healthcare_dw.doc_dwd_patient_visits
GROUP BY EXTRACT(month FROM admission_date),
CASE EXTRACT(month FROM admission_date)
WHEN 1 THEN 'January' WHEN 2 THEN 'February' WHEN 3 THEN 'March'
END
ORDER BY EXTRACT(month FROM admission_date);
-- 临床人员:可查看患者就诊事件(DWD 层),但患者 PII 字段受 Column Masking 控制
CREATE ROLE IF NOT EXISTS healthcare_clinical_viewer
COMMENT 'Clinical staff - view patient clinical data with PII masking';
-- 运营管理:只能访问科室级聚合数据(DWS/ADS 层),不接触患者粒度记录
CREATE ROLE IF NOT EXISTS healthcare_mgmt_analyst
COMMENT 'Management - view aggregate KPIs only, no patient-level access';
-- BI 分析师:只能访问 ADS 层 KPI 表,供报表和看板消费
CREATE ROLE IF NOT EXISTS healthcare_bi_analyst
COMMENT 'BI team - ADS layer only, read-only for reporting';
授权 Schema 访问权限
-- 临床人员:授予 Schema 的元数据读取权限(实际表级权限在表上单独控制)
GRANT READ METADATA ON SCHEMA best_practice_healthcare_dw
TO ROLE healthcare_clinical_viewer;
-- 管理层和 BI 分析师同样只授予元数据读取
GRANT READ METADATA ON SCHEMA best_practice_healthcare_dw
TO ROLE healthcare_mgmt_analyst;
GRANT READ METADATA ON SCHEMA best_practice_healthcare_dw
TO ROLE healthcare_bi_analyst;
权限分级总结
角色
可访问层
可见患者 PII
典型用户
workspace_admin
workspace_admin
ODS / DWD / DWS / ADS
完整数据
DBA、数据平台管理员
healthcare_clinical_viewer
healthcare_clinical_viewer
DWD 层就诊事件
patient_id 脱敏,诊断 RESTRICTED
临床医生、护士
healthcare_mgmt_analyst
healthcare_mgmt_analyst
DWS / ADS 层
无患者粒度字段
科室主任、运营管理
healthcare_bi_analyst
healthcare_bi_analyst
ADS 层仅 KPI
无患者粒度字段
BI 工程师、报表开发
⚠️ 注意:RBAC 角色授权后需将角色分配给具体用户(
GRANT ROLE <role_name> TO USER <username>
GRANT ROLE <role_name> TO USER <username>
),角色本身不能直接登录系统。
Time Travel:合规审计与医保对账
医疗行业的合规要求数据变更可追溯。ClickZetta Time Travel 通过
DESC HISTORY
DESC HISTORY
和
TIMESTAMP AS OF
TIMESTAMP AS OF
支持任意历史版本查询,适用于医保对账、监管机构审查和数据误操作恢复。
查看历史版本
DESC HISTORY best_practice_healthcare_dw.doc_ods_admissions;
-- 查询版本2(2026-06-06 13:28:29)时的数据快照
SELECT COUNT(*) AS row_count
FROM best_practice_healthcare_dw.doc_ods_admissions
TIMESTAMP AS OF '2026-06-06T13:28:29.718';