SELECT match_regexp('a b cd', 'c.*', map('analyzer', 'english'));
-- 返回: true
查询示例
创建倒排索引的表
CREATE TABLE bulkload_data(
id INT,
data STRING,
INDEX id_index (id) INVERTED,
INDEX data_index (data) INVERTED PROPERTIES('analyzer'='unicode')
);
插入测试数据:
INSERT INTO bulkload_data VALUES
(1, 'Lakehouse provides distributed storage'),
(2, 'Inverted index accelerates full text search'),
(3, 'Lakehouse supports multiple data types'),
(4, 'Full text search with inverted index'),
(5, 'Data analytics on Lakehouse platform');
全文检索示例:
--匹配所有,包含Lakehouse和storage
SELECT id, data FROM bulkload_data WHERE match_all(data, 'Lakehouse storage');
+----+---------------------------------------+
| id | data |
+----+---------------------------------------+
| 1 | Lakehouse provides distributed storage |
+----+---------------------------------------+
--匹配任意,包含search或analytics
SELECT id, data FROM bulkload_data WHERE match_any(data, 'search analytics');
+----+-------------------------------------------+
| id | data |
+----+-------------------------------------------+
| 2 | Inverted index accelerates full text search |
| 4 | Full text search with inverted index |
| 5 | Data analytics on Lakehouse platform |
+----+-------------------------------------------+
--匹配短语,包含full text search且连续
SELECT id, data FROM bulkload_data WHERE match_phrase(data, 'full text search');
+----+-------------------------------------------+
| id | data |
+----+-------------------------------------------+
| 2 | Inverted index accelerates full text search |
| 4 | Full text search with inverted index |
+----+-------------------------------------------+
等值查询示例:
--使用倒排索引加速id列的等值查询
SELECT id, data FROM bulkload_data WHERE id = 3;
+----+------------------------------------------+
| id | data |
+----+------------------------------------------+
| 3 | Lakehouse supports multiple data types |
+----+------------------------------------------+
--使用倒排索引加速id列的范围查询
SELECT id, data FROM bulkload_data WHERE id >
## 使用注意事项
### 倒排索引无法优化场景
* 在大多数情况下,倒排索引并不会显著提升亚秒级查询的性能。
* 不支持外部表。
* 仅支持数据类型一致的查询,例如:
```SQL
--可以查询加速的,表中数据类型string
where string_col='10086';
--对需要匹配的值进行转化
where string_col=cast(10086 as string);
--无法查询加速的,因为对表列进行了强制转化
where cast(string_col as int)=10086 ;