From 4f525443f869f4c86a6d94a365d23d8e7998edd0 Mon Sep 17 00:00:00 2001 From: 8ga Date: Mon, 2 Mar 2026 13:45:31 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20=E9=83=A8=E7=BD=B2PostGIS.?= =?UTF-8?q?md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 部署PostGIS.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/部署PostGIS.md b/部署PostGIS.md index 8751afa..0d32367 100644 --- a/部署PostGIS.md +++ b/部署PostGIS.md @@ -119,4 +119,58 @@ WORKDIR / ```bash docker build -t pgsql-gis-fts . +``` + +## 启用中文分词扩展 + +```sql +-- 创建扩展 +CREATE EXTENSION IF NOT EXISTS zhparser; +``` + +```sql +-- 创建一个新的文本搜索配置 (命名为 chinese_mix) +-- 基于内置的 simple 配置(它能很好地处理英文和标点) +CREATE TEXT SEARCH CONFIGURATION chinese_mix (COPY = simple); +``` + +```sql +-- 将中文分词器添加到该配置中 +-- 这条命令的意思是:对于识别为中文名词、动词等的 token,使用 zhparser 进行分词 +-- 其他类型(如英文单词、数字)继续使用原有的 simple 处理 +ALTER TEXT SEARCH CONFIGURATION chinese_mix ADD MAPPING FOR n,v,a,i,e,l WITH zhparser; +``` + +## 测试 + +```sql +-- 测试分词效果 +SELECT to_tsvector('chinese_mix', 'PostgreSQL 是一个强大的开源数据库,支持中文全文检索!'); +-- 预期输出应该包含 'postgresql', '强大', '开源', '数据库', '支持', '中文', '全文', '检索' 等词 + +-- 创建测试表 +CREATE TABLE articles ( + id SERIAL PRIMARY KEY, + title TEXT, + content TEXT +); + +-- 插入测试数据 (中英混合) +INSERT INTO articles (title, content) VALUES +('Introduction to AI', 'Artificial Intelligence is changing the world.'), +('人工智能简介', '人工智能正在改变世界,PostgreSQL 是很好的存储工具。'), +('PG 16 New Features', 'PostgreSQL 16 brings better performance and json features.'); + +-- 创建 GIN 索引 (加速检索的关键) +CREATE INDEX idx_articles_content ON articles USING GIN (to_tsvector('chinese_mix', content)); + +-- 执行搜索查询 +-- 搜索 "人工智能" +SELECT title FROM articles WHERE to_tsvector('chinese_mix', content) @@ to_tsquery('chinese_mix', '人工智能'); + +-- 搜索 "PostgreSQL" (英文) +SELECT title FROM articles WHERE to_tsvector('chinese_mix', content) @@ to_tsquery('chinese_mix', 'PostgreSQL'); + +-- 搜索混合条件 +SELECT title FROM articles WHERE to_tsvector('chinese_mix', content) @@ to_tsquery('chinese_mix', '数据库 & 改变'); ``` \ No newline at end of file