3.3 KiB
3.3 KiB
目录结构
- pgsql-gis/
- docker-compose.yaml
mkdir pgsql-gis
docker-compose.yaml
version: '3.8'
services:
pgsql-gis-16:
# 支持全文检索的自编译镜像
image: pgsql-gis-fts:latest
container_name: pgsql-gis-16
ports:
- "35430:5432" # 替换
volumes:
- ./pgsql-gis:/var/lib/postgresql/data
environment:
POSTGRES_USER: # 替换
POSTGRES_PASSWORD: # 替换
POSTGRES_DB: # 替换
POSTGRES_INITDB_ARGS: --encoding=UTF8
restart: on-failure:3
添加全文索引支持
需要从 git 克隆 zhparser 添加中文分词支持,执行 docker build 命令的目录结构长这样:
- Dockerfile
- zhparser/
git clone https://github.com/amutu/zhparser.git
Dockerfile
FROM swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/postgis/postgis:16-3.5
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
build-essential \
git \
libcurl4-openssl-dev \
libxml2-dev \
wget \
postgresql-server-dev-16 \
&& rm -rf /var/lib/apt/lists/*
RUN pg_config --version
WORKDIR /tmp
RUN wget http://www.xunsearch.com/scws/down/scws-1.2.3.tar.bz2 \
&& tar xjf scws-1.2.3.tar.bz2 \
&& cd scws-1.2.3 \
&& ./configure \
&& make && make install \
&& cd ..
COPY zhparser zhparser
RUN cd zhparser \
&& export PG_CONFIG=$(which pg_config) \
&& echo "Using pg_config: $PG_CONFIG" \
&& make USE_PGXS=1 \
&& make USE_PGXS=1 install \
&& cd ..
RUN rm -rf /tmp/*
WORKDIR /
编译
docker build -t pgsql-gis-fts .
启用中文分词扩展
CREATE EXTENSION IF NOT EXISTS zhparser;
CREATE TEXT SEARCH CONFIGURATION chinese_mix (PARSER = zhparser);
-- DROP TEXT SEARCH CONFIGURATION IF EXISTS chinese_mix;
ALTER TEXT SEARCH CONFIGURATION chinese_mix ADD MAPPING FOR n,v,a,i,e,l,d,j,m,q,r,t,u,w,x,z WITH simple;
测试
CREATE TABLE articles (
id SERIAL PRIMARY KEY,
title TEXT,
content TEXT,
-- 生成列:自动维护 tsvector,避免每次查询都计算,提高性能
content_tsvector TSVECTOR GENERATED ALWAYS AS (to_tsvector('chinese_mix', content)) STORED
);
-- 插入一些混合数据
INSERT INTO articles (title, content) VALUES
('AI 技术展望', '人工智能 (AI) 正在改变世界,PostgreSQL 是存储这些数据的首选数据库。'),
('PostgreSQL 16 新特性', 'PostgreSQL 16 带来了更好的性能,支持 JSON 增强和中文分词优化。'),
('日常开发笔记', 'Today I learned about zhparser. 它让中文搜索变得很简单。');
-- 搜索包含 "人工智能" 的文章
SELECT title, content
FROM articles
WHERE content_tsvector @@ to_tsquery('chinese_mix', '人工智能');
-- 搜索包含 "PostgreSQL" 的文章
SELECT title, content
FROM articles
WHERE content_tsvector @@ to_tsquery('chinese_mix', 'PostgreSQL');
-- 搜索既包含 "数据库" 又包含 "PostgreSQL" 的文章
SELECT title, content
FROM articles
WHERE content_tsvector @@ to_tsquery('chinese_mix', '数据库 & PostgreSQL');
-- 搜索包含 "AI" 或者 "改变" 的文章
SELECT title, content
FROM articles
WHERE content_tsvector @@ to_tsquery('chinese_mix', 'AI | 改变');