数据写入与修改
DML 命令用于向表中写入、更新、删除数据,以及合并来自多个数据源的变更。
本章内容
常用操作
INSERT
-- 插入单行
INSERT INTO orders (order_id, customer_id, amount) VALUES (1001, 42, 299.00);
-- 从查询结果插入
INSERT INTO orders_archive
SELECT * FROM orders WHERE created_at < '2024-01-01';
-- 覆盖写入(清空后重新写入)
INSERT OVERWRITE orders_staging
SELECT * FROM orders WHERE status = 'PENDING';
UPDATE
-- 按条件更新
UPDATE orders
SET status = 'SHIPPED', updated_at = CURRENT_TIMESTAMP()
WHERE order_id = 1001;
DELETE
-- 按条件删除
DELETE FROM orders WHERE status = 'CANCELLED' AND created_at < '2023-01-01';
MERGE INTO(Upsert)
-- CDC Upsert:有则更新,无则插入,标记删除则删除
MERGE INTO orders AS target
USING orders_changes AS source
ON target.order_id = source.order_id
WHEN MATCHED AND source.op = 'D' THEN DELETE
WHEN MATCHED THEN UPDATE SET
target.status = source.status,
target.updated_at = source.updated_at
WHEN NOT MATCHED THEN INSERT (order_id, customer_id, amount, status, created_at)
VALUES (source.order_id, source.customer_id, source.amount, source.status, source.created_at);
TRUNCATE
-- 清空表(保留表结构)
TRUNCATE TABLE orders_staging;
注意事项
INSERT OVERWRITE
INSERT OVERWRITE
会清空目标表(或目标分区)后重新写入,操作不可逆
MERGE INTO
MERGE INTO
的 ON 条件中使用的列应具有唯一性,否则一行源数据匹配多行目标数据时行为未定义
TRUNCATE
TRUNCATE
比 DELETE FROM table
DELETE FROM table
(无 WHERE)更高效,但同样不可逆;Time Travel 可在保留期内恢复数据
相关文档