-- 数据产品层(统一接口层)
CREATE SCHEMA IF NOT EXISTS best_practice_data_mesh;
-- 各业务域 Schema
CREATE SCHEMA IF NOT EXISTS best_practice_data_mesh_sales;
CREATE SCHEMA IF NOT EXISTS best_practice_data_mesh_hr;
CREATE SCHEMA IF NOT EXISTS best_practice_data_mesh_finance;
Sales 域:建表与数据合同
建源数据表
Sales 域自主管理两张表。列注释(
COMMENT
COMMENT
)是数据合同的核心表达方式,消费方通过读注释理解字段语义,而不需要口头约定。
CREATE TABLE IF NOT EXISTS best_practice_data_mesh_sales.doc_sales_orders (
order_id STRING COMMENT 'Unique order identifier',
customer_id STRING COMMENT 'Customer identifier',
sales_rep_id STRING COMMENT 'Sales representative identifier',
region STRING COMMENT 'Sales region',
order_date DATE COMMENT 'Date the order was placed',
status STRING COMMENT 'Order status: pending/confirmed/shipped/cancelled',
total_amount DOUBLE COMMENT 'Total order amount in USD',
currency STRING COMMENT 'Currency code'
) COMMENT 'Sales domain: order header table (data contract v1.0)';
CREATE TABLE IF NOT EXISTS best_practice_data_mesh_sales.doc_sales_items (
item_id STRING COMMENT 'Unique line item identifier',
order_id STRING COMMENT 'Reference to doc_sales_orders.order_id',
product_id STRING COMMENT 'Product identifier',
product_name STRING COMMENT 'Product display name',
category STRING COMMENT 'Product category',
quantity INT COMMENT 'Ordered quantity',
unit_price DOUBLE COMMENT 'Unit price at time of order',
line_total DOUBLE COMMENT 'quantity * unit_price'
) COMMENT 'Sales domain: order line items table (data contract v1.0)';
表级
COMMENT
COMMENT
末尾写版本号(
data contract v1.0
data contract v1.0
),方便后续通过
DESC TABLE
DESC TABLE
查询合同版本,判断消费方是否需要适配。
写入模拟数据
从本地 CSV 导入数据(推荐):
-- 第一步:通过 SQL PUT 将本地 CSV 文件上传到 User Volume
PUT '/path/to/your/doc_sales_orders.csv' TO USER VOLUME FILE 'doc_sales_orders.csv';
-- 第二步:从 User Volume COPY INTO 表
COPY INTO best_practice_data_mesh_sales.doc_sales_orders
FROM USER VOLUME
USING csv
OPTIONS('header'='true', 'sep'=',', 'nullValue'='')
FILES ('doc_sales_orders.csv');
也可直接内联插入小批量测试数据(不需要 CSV 文件):
INSERT INTO best_practice_data_mesh_sales.doc_sales_orders VALUES
('ORD001','CUST001','REP001','APAC', CAST('2026-01-05' AS DATE),'shipped', 15200.00,'USD'),
('ORD002','CUST002','REP001','APAC', CAST('2026-01-08' AS DATE),'shipped', 8500.00,'USD'),
('ORD003','CUST003','REP002','EMEA', CAST('2026-01-12' AS DATE),'confirmed', 23000.00,'USD'),
('ORD004','CUST004','REP003','AMER', CAST('2026-01-15' AS DATE),'cancelled', 4200.00,'USD'),
('ORD005','CUST005','REP002','EMEA', CAST('2026-01-20' AS DATE),'shipped', 31500.00,'USD'),
('ORD006','CUST001','REP001','APAC', CAST('2026-02-03' AS DATE),'shipped', 9800.00,'USD'),
('ORD007','CUST006','REP004','AMER', CAST('2026-02-10' AS DATE),'pending', 6750.00,'USD'),
('ORD008','CUST007','REP003','AMER', CAST('2026-02-14' AS DATE),'shipped', 18400.00,'USD'),
('ORD009','CUST008','REP002','EMEA', CAST('2026-02-18' AS DATE),'confirmed', 11200.00,'USD'),
('ORD010','CUST009','REP005','APAC', CAST('2026-02-25' AS DATE),'shipped', 27600.00,'USD'),
('ORD011','CUST010','REP004','AMER', CAST('2026-03-02' AS DATE),'shipped', 5300.00,'USD'),
('ORD012','CUST002','REP001','APAC', CAST('2026-03-07' AS DATE),'shipped', 14100.00,'USD');
INSERT INTO best_practice_data_mesh_sales.doc_sales_items VALUES
('ITEM001','ORD001','PROD001','Laptop Pro 15','Electronics', 2, 4200.00, 8400.00),
('ITEM002','ORD001','PROD002','USB-C Hub', 'Accessories', 5, 160.00, 800.00),
('ITEM003','ORD001','PROD003','Laptop Bag', 'Accessories', 2, 80.00, 160.00),
('ITEM004','ORD002','PROD004','Wireless Mouse','Peripherals', 10, 85.00, 850.00),
('ITEM005','ORD003','PROD001','Laptop Pro 15', 'Electronics', 4, 4200.00, 16800.00),
('ITEM006','ORD003','PROD005','Monitor 27in', 'Electronics', 2, 3100.00, 6200.00),
('ITEM007','ORD005','PROD001','Laptop Pro 15', 'Electronics', 6, 4200.00, 25200.00),
('ITEM008','ORD005','PROD003','Laptop Bag', 'Accessories', 6, 80.00, 480.00),
('ITEM009','ORD006','PROD002','USB-C Hub', 'Accessories', 3, 160.00, 480.00),
('ITEM010','ORD006','PROD004','Wireless Mouse','Peripherals', 8, 85.00, 680.00),
('ITEM011','ORD008','PROD005','Monitor 27in', 'Electronics', 4, 3100.00, 12400.00),
('ITEM012','ORD010','PROD001','Laptop Pro 15', 'Electronics', 5, 4200.00, 21000.00),
('ITEM013','ORD010','PROD006','Keyboard MX', 'Peripherals', 10, 195.00, 1950.00),
('ITEM014','ORD011','PROD004','Wireless Mouse','Peripherals', 5, 85.00, 425.00),
('ITEM015','ORD012','PROD001','Laptop Pro 15', 'Electronics', 2, 4200.00, 8400.00);
验证 Sales 域数据量:
SELECT COUNT(*) AS order_count FROM best_practice_data_mesh_sales.doc_sales_orders;
SELECT COUNT(*) AS item_count FROM best_practice_data_mesh_sales.doc_sales_items;
CREATE TABLE IF NOT EXISTS best_practice_data_mesh_hr.doc_departments (
dept_id STRING COMMENT 'Unique department identifier',
dept_name STRING COMMENT 'Department name',
cost_center STRING COMMENT 'Finance cost center code',
location STRING COMMENT 'Office location',
head_count INT COMMENT 'Planned headcount'
) COMMENT 'HR domain: department master table (data contract v1.0)';
CREATE TABLE IF NOT EXISTS best_practice_data_mesh_hr.doc_employees (
employee_id STRING COMMENT 'Unique employee identifier',
dept_id STRING COMMENT 'Reference to doc_departments.dept_id',
full_name STRING COMMENT 'Employee full name',
job_title STRING COMMENT 'Job title',
hire_date DATE COMMENT 'Date of hire',
salary DOUBLE COMMENT 'Annual salary in USD',
status STRING COMMENT 'active / on_leave / terminated',
manager_id STRING COMMENT 'Direct manager employee_id (nullable for top level)'
) COMMENT 'HR domain: employee master table (data contract v1.0)';
写入模拟数据
INSERT INTO best_practice_data_mesh_hr.doc_departments VALUES
('DEPT01','Sales', 'CC-SALE','Shanghai', 30),
('DEPT02','Engineering', 'CC-ENG', 'Beijing', 80),
('DEPT03','Finance', 'CC-FIN', 'Shanghai', 20),
('DEPT04','Human Resources','CC-HR', 'Shanghai', 15),
('DEPT05','Marketing', 'CC-MKT', 'Shenzhen', 25);
INSERT INTO best_practice_data_mesh_hr.doc_employees VALUES
('EMP001','DEPT01','Alice Wang', 'Sales Manager', CAST('2020-03-01' AS DATE),180000.00,'active', NULL),
('EMP002','DEPT01','Bob Chen', 'Sales Rep', CAST('2021-06-15' AS DATE),110000.00,'active', 'EMP001'),
('EMP003','DEPT01','Carol Liu', 'Sales Rep', CAST('2021-09-20' AS DATE),108000.00,'active', 'EMP001'),
('EMP004','DEPT02','David Zhang', 'Engineering Lead', CAST('2019-01-10' AS DATE),220000.00,'active', NULL),
('EMP005','DEPT02','Eva Sun', 'Senior Engineer', CAST('2020-07-22' AS DATE),180000.00,'active', 'EMP004'),
('EMP006','DEPT02','Frank Zhao', 'Engineer', CAST('2022-04-05' AS DATE),140000.00,'active', 'EMP004'),
('EMP007','DEPT03','Grace Li', 'Finance Manager', CAST('2018-11-01' AS DATE),190000.00,'active', NULL),
('EMP008','DEPT03','Henry Wu', 'Accountant', CAST('2021-02-28' AS DATE),120000.00,'active', 'EMP007'),
('EMP009','DEPT04','Ivy Zhou', 'HR Manager', CAST('2019-05-15' AS DATE),160000.00,'active', NULL),
('EMP010','DEPT04','Jack Luo', 'HR Specialist', CAST('2022-08-01' AS DATE),100000.00,'on_leave','EMP009'),
('EMP011','DEPT05','Karen Xu', 'Marketing Manager', CAST('2020-10-12' AS DATE),170000.00,'active', NULL),
('EMP012','DEPT01','Leo Ma', 'Sales Rep', CAST('2023-01-16' AS DATE),105000.00,'active', 'EMP001');
Finance 域:建表与数据合同
建源数据表
Finance 域管理发票和收款两张表,通过
invoice_id
invoice_id
关联,构成应收账款(AR)体系。
CREATE TABLE IF NOT EXISTS best_practice_data_mesh_finance.doc_invoices (
invoice_id STRING COMMENT 'Unique invoice identifier',
order_id STRING COMMENT 'Reference to sales domain order_id',
customer_id STRING COMMENT 'Customer identifier',
issue_date DATE COMMENT 'Invoice issue date',
due_date DATE COMMENT 'Payment due date',
amount DOUBLE COMMENT 'Invoice amount in USD',
status STRING COMMENT 'draft / issued / paid / overdue'
) COMMENT 'Finance domain: invoice header table (data contract v1.0)';
CREATE TABLE IF NOT EXISTS best_practice_data_mesh_finance.doc_payments (
payment_id STRING COMMENT 'Unique payment identifier',
invoice_id STRING COMMENT 'Reference to doc_invoices.invoice_id',
payment_date DATE COMMENT 'Date payment was received',
amount_paid DOUBLE COMMENT 'Amount paid in USD',
method STRING COMMENT 'Payment method: bank_transfer / credit_card / check',
status STRING COMMENT 'completed / failed / pending'
) COMMENT 'Finance domain: payment records table (data contract v1.0)';
写入模拟数据
INSERT INTO best_practice_data_mesh_finance.doc_invoices VALUES
('INV001','ORD001','CUST001', CAST('2026-01-06' AS DATE), CAST('2026-02-06' AS DATE), 15200.00,'paid'),
('INV002','ORD002','CUST002', CAST('2026-01-09' AS DATE), CAST('2026-02-09' AS DATE), 8500.00,'paid'),
('INV003','ORD003','CUST003', CAST('2026-01-13' AS DATE), CAST('2026-02-13' AS DATE), 23000.00,'paid'),
('INV004','ORD005','CUST005', CAST('2026-01-21' AS DATE), CAST('2026-02-21' AS DATE), 31500.00,'overdue'),
('INV005','ORD006','CUST001', CAST('2026-02-04' AS DATE), CAST('2026-03-04' AS DATE), 9800.00,'paid'),
('INV006','ORD008','CUST007', CAST('2026-02-15' AS DATE), CAST('2026-03-15' AS DATE), 18400.00,'issued'),
('INV007','ORD009','CUST008', CAST('2026-02-19' AS DATE), CAST('2026-03-19' AS DATE), 11200.00,'paid'),
('INV008','ORD010','CUST009', CAST('2026-02-26' AS DATE), CAST('2026-03-26' AS DATE), 27600.00,'issued'),
('INV009','ORD011','CUST010', CAST('2026-03-03' AS DATE), CAST('2026-04-03' AS DATE), 5300.00,'paid'),
('INV010','ORD012','CUST002', CAST('2026-03-08' AS DATE), CAST('2026-04-08' AS DATE), 14100.00,'draft');
INSERT INTO best_practice_data_mesh_finance.doc_payments VALUES
('PAY001','INV001', CAST('2026-01-28' AS DATE), 15200.00,'bank_transfer','completed'),
('PAY002','INV002', CAST('2026-02-01' AS DATE), 8500.00,'credit_card', 'completed'),
('PAY003','INV003', CAST('2026-02-10' AS DATE), 23000.00,'bank_transfer','completed'),
('PAY004','INV005', CAST('2026-02-25' AS DATE), 9800.00,'bank_transfer','completed'),
('PAY005','INV007', CAST('2026-03-05' AS DATE), 11200.00,'credit_card', 'completed'),
('PAY006','INV009', CAST('2026-03-20' AS DATE), 5300.00,'check', 'completed');
CREATE OR REPLACE VIEW best_practice_data_mesh.dp_sales_revenue AS
SELECT
o.order_id,
o.customer_id,
o.region,
o.order_date,
o.status,
o.total_amount,
i.product_name,
i.category,
i.quantity,
i.unit_price,
i.line_total
FROM best_practice_data_mesh_sales.doc_sales_orders o
JOIN best_practice_data_mesh_sales.doc_sales_items i
ON o.order_id = i.order_id
WHERE o.status NOT IN ('cancelled');
这个视图做了两件事:JOIN 合并订单头和行项目,同时过滤掉
cancelled
cancelled
状态的订单。消费方查询时无需关心源表的 JOIN 逻辑,也不会意外包含已取消订单影响收入统计。
验证分区域收入:
SELECT region, COUNT(*) AS orders, ROUND(SUM(total_amount), 2) AS revenue
FROM best_practice_data_mesh.dp_sales_revenue
GROUP BY region ORDER BY revenue DESC;
SELECT category, COUNT(*) AS item_lines, ROUND(SUM(line_total), 2) AS category_revenue
FROM best_practice_data_mesh.dp_sales_revenue
GROUP BY category ORDER BY category_revenue DESC;
CREATE OR REPLACE VIEW best_practice_data_mesh.dp_hr_org AS
SELECT
e.employee_id,
e.dept_id,
d.dept_name,
d.cost_center,
d.location,
e.job_title,
e.hire_date,
e.status,
e.manager_id
FROM best_practice_data_mesh_hr.doc_employees e
JOIN best_practice_data_mesh_hr.doc_departments d
ON e.dept_id = d.dept_id
WHERE e.status = 'active';
验证各部门在职人数:
SELECT dept_name, COUNT(*) AS headcount, location
FROM best_practice_data_mesh.dp_hr_org
GROUP BY dept_name, location ORDER BY headcount DESC;
状态的员工(如 Jack Luo)不在消费结果中。如果消费方需要包含离职/请假记录,需要向 HR 域所有者申请专门视图,或在数据产品定义中增加版本选项。
Finance 数据产品:应收账款视图
Finance 域将发票和收款两张表聚合,对外暴露每张发票的应收余额和 AR 状态。
CREATE OR REPLACE VIEW best_practice_data_mesh.dp_finance_ar AS
SELECT
inv.invoice_id,
inv.order_id,
inv.customer_id,
inv.issue_date,
inv.due_date,
inv.amount,
inv.status AS invoice_status,
COALESCE(SUM(pay.amount_paid), 0) AS total_paid,
inv.amount - COALESCE(SUM(pay.amount_paid), 0) AS outstanding_balance,
CASE
WHEN inv.status = 'paid' THEN 'CLOSED'
WHEN inv.status = 'overdue' THEN 'OVERDUE'
WHEN CURRENT_DATE() > inv.due_date THEN 'OVERDUE'
ELSE 'OPEN'
END AS ar_status
FROM best_practice_data_mesh_finance.doc_invoices inv
LEFT JOIN best_practice_data_mesh_finance.doc_payments pay
ON inv.invoice_id = pay.invoice_id
GROUP BY
inv.invoice_id, inv.order_id, inv.customer_id,
inv.issue_date, inv.due_date, inv.amount, inv.status;
验证应收账款汇总:
SELECT ar_status, COUNT(*) AS cnt, ROUND(SUM(outstanding_balance), 2) AS total_outstanding
FROM best_practice_data_mesh.dp_finance_ar
GROUP BY ar_status ORDER BY total_outstanding DESC;
SELECT invoice_id, customer_id, amount, total_paid, outstanding_balance, ar_status
FROM best_practice_data_mesh.dp_finance_ar
ORDER BY ar_status, outstanding_balance DESC;
-- 将三个数据产品视图授权给分析师角色
GRANT SELECT ON VIEW best_practice_data_mesh.dp_sales_revenue TO ROLE workspace_analyst;
GRANT SELECT ON VIEW best_practice_data_mesh.dp_hr_org TO ROLE workspace_analyst;
GRANT SELECT ON VIEW best_practice_data_mesh.dp_finance_ar TO ROLE workspace_analyst;
验证授权结果
SHOW GRANTS ON VIEW best_practice_data_mesh.dp_sales_revenue;
CREATE OR REPLACE VIEW best_practice_data_mesh.dp_cross_domain_order_ar AS
SELECT
s.order_id,
s.customer_id,
s.region,
s.order_date,
ROUND(s.total_amount, 2) AS order_amount,
f.invoice_status,
ROUND(f.total_paid, 2) AS total_paid,
ROUND(f.outstanding_balance, 2) AS outstanding_balance,
f.ar_status
FROM best_practice_data_mesh.dp_sales_revenue s
LEFT JOIN best_practice_data_mesh.dp_finance_ar f
ON s.order_id = f.order_id;
查询逾期或开放中的订单:
SELECT order_id, customer_id, order_amount, invoice_status, outstanding_balance, ar_status
FROM best_practice_data_mesh.dp_cross_domain_order_ar
WHERE ar_status IN ('OVERDUE', 'OPEN')
ORDER BY outstanding_balance DESC;
CREATE DYNAMIC TABLE IF NOT EXISTS best_practice_data_mesh.dp_sales_rep_performance
AS
SELECT
o.sales_rep_id,
emp.full_name AS rep_name,
emp.dept_id,
o.region,
COUNT(DISTINCT o.order_id) AS order_count,
ROUND(SUM(o.total_amount), 2) AS total_revenue,
ROUND(AVG(o.total_amount), 2) AS avg_order_value,
SUM(CASE WHEN o.status = 'shipped' THEN 1 ELSE 0 END) AS shipped_count
FROM best_practice_data_mesh_sales.doc_sales_orders o
LEFT JOIN best_practice_data_mesh_hr.doc_employees emp
ON o.sales_rep_id = emp.employee_id
WHERE o.status != 'cancelled'
GROUP BY o.sales_rep_id, emp.full_name, emp.dept_id, o.region;
⚠️ 注意:本示例中
sales_rep_id
sales_rep_id
(如
REP001
REP001
)与
employee_id
employee_id
(如
EMP001
EMP001
)编码规则不同,导致 JOIN 结果
rep_name
rep_name
为 NULL。在真实 Data Mesh 场景中,跨域 JOIN 的 ID 对齐是数据产品设计的核心挑战——Sales 域需要在数据合同中明确
SELECT sales_rep_id, rep_name, region, order_count, total_revenue, avg_order_value, shipped_count
FROM best_practice_data_mesh.dp_sales_rep_performance
ORDER BY total_revenue DESC;
SELECT
SUBSTR(job_text, 1, 60) AS sql_preview,
COUNT(*) AS query_count,
ROUND(AVG(execution_time), 3) AS avg_elapsed_sec,
MAX(rows_produced) AS max_rows_produced,
SUM(CASE WHEN status = 'SUCCEED' THEN 1 ELSE 0 END) AS success_count
FROM sys.information_schema.job_history
WHERE workspace_name = 'quick_start'
AND pt_date = CURRENT_DATE()
AND job_text LIKE '%best_practice_data_mesh%'
GROUP BY SUBSTR(job_text, 1, 60)
ORDER BY query_count DESC
LIMIT 10;