如果想对表中已经存在数据创建索引,需要使用 BUILD INDEX 命令构建索引(新插入的数据会自动构建索引)
-- 构建向量索引(实际验证过的命令)
BUILD INDEX idx_embedding ON vector_demo.simple_embeddings;
-- 构建文档表的向量索引(指定范围)
BUILD INDEX idx_doc_embedding ON vector_demo.document_embeddings
WHERE create_date >= '2024-01-01' AND create_date <= '2024-12-31';
4.2 监控索引构建进度
-- 查看表的索引列表
SHOW INDEXES FROM vector_demo.product_embeddings;
-- 查看表的详细信息,包括索引
DESC EXTENDED vector_demo.product_embeddings ;
5. 使用向量索引进行相似度搜索
5.1 基本向量搜索
WITH query_vector AS (
SELECT CAST(public.fc_embeddings(
'text',
'拍照功能强大的手机,相机系统好,影像能力出色',
'${API_KEY}',
'text-embedding-v4',
1024
) AS VECTOR(FLOAT, 1024)) as vec
)
SELECT
p.product_id,
p.product_name,
p.category,
p.description,
cosine_distance(p.embedding, q.vec) as similarity_score
FROM vector_demo.product_embeddings p, query_vector q
ORDER BY similarity_score ASC
LIMIT 5;
展示结果:
预览
5.3 带过滤条件的向量搜索
WITH query_vector AS (
SELECT CAST(public.fc_embeddings(
'text',
'国产高端旗舰手机',
'${API_KEY}',
'text-embedding-v4',
1024
) AS VECTOR(FLOAT, 1024)) as vec
)
SELECT
product_id,
product_name,
description,
ROUND(cosine_distance(embedding, vec), 4) as distance
FROM vector_demo.product_embeddings, query_vector
WHERE product_name NOT LIKE '%iPhone%' -- 排除苹果产品
ORDER BY distance ASC
LIMIT 3;
-- 使用向量列复用减少存储
CREATE INDEX idx_storage_optimized (embedding) USING VECTOR
PROPERTIES(
"reuse.vector.column" = "true",
"scalar.type" = "i8" -- 使用8位整数进一步压缩
);
7. 管理向量索引
7.1 查看索引信息
-- 查看表的所有索引
SHOW INDEXES FROM vector_demo.product_embeddings;
-- 查看索引详细信息
DESC INDEX idx_product_embedding ON vector_demo.product_embeddings;
7.2 删除和重建索引
-- 删除索引
DROP INDEX idx_product_embedding ON vector_demo.product_embeddings;
-- 重新创建索引(使用新参数)
CREATE INDEX idx_product_embedding_v2 ON vector_demo.product_embeddings(embedding)
USING VECTOR PROPERTIES(
"distance.function" = "negative_dot_product",
"m" = "24",
"ef.construction" = "300"
);
-- 构建新索引
BUILD INDEX idx_product_embedding_v2 ON vector_demo.product_embeddings;
8. 性能监控和故障排查
8.1 查询性能分析
-- 使用 EXPLAIN 分析查询计划
EXPLAIN
WITH query_vector AS (
SELECT public.fc_embeddings('text', '测试查询', '${API_KEY}', 'text-embedding-v4', 1024) as vec
)
SELECT * FROM vector_demo.product_embeddings p, query_vector q
ORDER BY cosine_distance(p.embedding, q.vec) ASC
LIMIT 10;
SET cz.sql.index.prewhere.enabled=true; -- 当前需要设置开关,后续版本会默认开启
SELECT id, doc, l2_distance(vec, vector(1,2,3,4)) as dist FROM some_table WHERE match_regexp(doc, '.*hello.*', map('analyzer', 'keyword')) AND l2_distance(vec, vector(1,2,3,4)) < 1000 ORDER BY dist LIMIT 100