(feat) 一种全文搜索的后端轻量化实现

This commit is contained in:
Andecheal 2023-12-25 13:39:20 +08:00
parent 38b2c50147
commit 1e4a6d85e5
18 changed files with 559 additions and 1 deletions

View File

@ -69,6 +69,30 @@
<artifactId>thumbnailator</artifactId>
<version>${thumbnailator.version}</version>
</dependency>
<!-- Lucene核心库 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>9.9.1</version>
</dependency>
<!-- Lucene解析库 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>9.9.1</version>
</dependency>
<!--中文分词-->
<dependency>
<groupId>com.github.magese</groupId>
<artifactId>ik-analyzer</artifactId>
<version>8.5.0</version>
</dependency>
<!-- Lucene结果高亮 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>9.9.1</version>
</dependency>
</dependencies>

View File

@ -0,0 +1,55 @@
package com.blossom.backend.base.search;
import com.blossom.backend.base.search.message.ArticleIndexMsg;
import com.blossom.backend.base.search.message.IndexMsg;
import com.blossom.backend.base.search.message.IndexMsgTypeEnum;
import com.blossom.backend.base.search.message.consumer.BatchIndexMsgConsumer;
import com.blossom.backend.server.article.draft.ArticleService;
import com.blossom.backend.server.article.draft.pojo.ArticleEntity;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* 对既有索引进行监控与维护
*/
@Component
@Slf4j
public class IndexObserver {
private SearchProperties searchProperties;
private ArticleService articleService;
private BatchIndexMsgConsumer batchIndexMsgConsumer;
IndexObserver(SearchProperties searchProperties, ArticleService articleService, BatchIndexMsgConsumer batchIndexMsgConsumer){
this.searchProperties = searchProperties;
this.articleService = articleService;
this.batchIndexMsgConsumer = batchIndexMsgConsumer;
}
/**
* 进行索引的维护
*/
@Scheduled(cron = "0 0 04 * * ?")
public void reloadIndex() throws IOException {
if (StringUtils.hasText(searchProperties.getPath())){
List<ArticleEntity> allArticleWithContent = articleService.listAllArticleWithContent();
List<IndexMsg> batchReloadMsgs = new ArrayList<>();
allArticleWithContent.forEach(article ->{
ArticleIndexMsg articleIndexMsg = new ArticleIndexMsg(IndexMsgTypeEnum.ADD,article.getId(), article.getName(), article.getMarkdown());
batchReloadMsgs.add(articleIndexMsg);
});
batchIndexMsgConsumer.batchReload(batchReloadMsgs);
}
}
}

View File

@ -0,0 +1,13 @@
package com.blossom.backend.base.search;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@Data
@Configuration
@ConfigurationProperties(prefix = "project.search")
public class SearchProperties {
private String path = "";
}

View File

@ -0,0 +1,23 @@
package com.blossom.backend.base.search;
import lombok.Data;
/**
* 全文搜索返回对象
*/
@Data
public class SearchResult {
/**
* 主键
*/
private Long id;
/**
* 标题
*/
private String title;
/**
* 正文
*/
private String content;
}

View File

@ -0,0 +1,104 @@
package com.blossom.backend.base.search;
import cn.hutool.core.convert.Convert;
import cn.hutool.core.util.ArrayUtil;
import com.blossom.common.base.exception.XzException500;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
@Component
public class Searcher {
private SimpleHTMLFormatter simpleHTMLFormatter;
private String[] queryField;
@Autowired
private SearchProperties searchProperties;
Searcher() {
// 构造f高亮显示formatter
this.simpleHTMLFormatter = new SimpleHTMLFormatter("<B>", "<B>");
// 构造默认查询域
this.queryField = new String[2];
this.queryField[0] = "title";
this.queryField[1] = "content";
}
public List<SearchResult> search(String keyword) {
List<SearchResult> result = new ArrayList<>();
if (!StringUtils.hasText(searchProperties.getPath())) {
throw new XzException500("未配置索引库地址,无法进行全文检索");
}
try (Directory directory = FSDirectory.open(new File(searchProperties.getPath()).toPath());
IndexReader indexReader = DirectoryReader.open(directory);
) {
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(queryField, new IKAnalyzer());
Query query = multiFieldQueryParser.parse(keyword);
TopDocs topDocs = indexSearcher.search(query, 10);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
if (!ArrayUtil.isEmpty(scoreDocs)) {
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(20));
for (ScoreDoc doc : scoreDocs) {
Document document = indexSearcher.doc(doc.doc);
String id = document.get("id");
String title = document.get("title");
String content = document.get("content");
SearchResult searchResult = new SearchResult();
searchResult.setId(Convert.toLong(id));
if (StringUtils.hasText(title)){
String matchTitle = highlighter.getBestFragment(new IKAnalyzer(), "title", title);
if (StringUtils.hasText(matchTitle)){
searchResult.setTitle(matchTitle);
}else {
searchResult.setTitle(title);
}
}else {
searchResult.setContent(title);
}
if (StringUtils.hasText(content)){
String matchContent = highlighter.getBestFragment(new IKAnalyzer(), "content", content);
if (StringUtils.hasText(matchContent)){
searchResult.setContent(matchContent);
}else {
searchResult.setTitle(content);
}
}else {
searchResult.setContent(content);
}
result.add(searchResult);
}
}
} catch (Exception e) {
throw new XzException500("索引查询异常");
}
return result;
}
}

View File

@ -0,0 +1,57 @@
package com.blossom.backend.base.search.message;
import cn.hutool.core.convert.Convert;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.springframework.util.StringUtils;
/**
* 文章索引消息的实现
*/
public class ArticleIndexMsg implements IndexMsg {
private IndexMsgTypeEnum type;
private Document data;
public ArticleIndexMsg(IndexMsgTypeEnum indexMsgTypeEnum, Long id, String title) {
this.type = indexMsgTypeEnum;
Document document = new Document();
document.add(new StringField("id", Convert.toStr(id), Field.Store.YES));
document.add(new StringField("title", title, Field.Store.YES));
this.data = document;
}
public ArticleIndexMsg(IndexMsgTypeEnum indexMsgTypeEnum, Long id) {
this.type = indexMsgTypeEnum;
Document document = new Document();
document.add(new StringField("id", Convert.toStr(id), Field.Store.YES));
this.data = document;
}
public ArticleIndexMsg(IndexMsgTypeEnum indexMsgTypeEnum, Long id, String title, String markdownContent) {
this.type = indexMsgTypeEnum;
Document document = new Document();
// 存储文章的id, content
document.add(new StringField("id", Convert.toStr(id), Field.Store.YES));
if (StringUtils.hasText(title)){
document.add(new TextField("title", title, Field.Store.YES));
}
if (StringUtils.hasText(markdownContent)){
document.add(new TextField("content", markdownContent, Field.Store.YES));
}
this.data = document;
}
@Override
public IndexMsgTypeEnum getType() {
return this.type;
}
@Override
public Document getData() {
return this.data;
}
}

View File

@ -0,0 +1,14 @@
package com.blossom.backend.base.search.message;
import org.apache.lucene.document.Document;
/**
* 索引消息接口
*/
public interface IndexMsg {
IndexMsgTypeEnum getType();
Document getData();
}

View File

@ -0,0 +1,10 @@
package com.blossom.backend.base.search.message;
/**
* 索引动作类型
*/
public enum IndexMsgTypeEnum {
ADD,DELETE
}

View File

@ -0,0 +1,58 @@
package com.blossom.backend.base.search.message.consumer;
import com.blossom.backend.base.search.SearchProperties;
import com.blossom.backend.base.search.message.IndexMsg;
import com.blossom.backend.base.search.message.IndexMsgTypeEnum;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.File;
import java.io.IOException;
import java.util.List;
@Component
@Slf4j
public class BatchIndexMsgConsumer {
private SearchProperties searchProperties;
BatchIndexMsgConsumer(SearchProperties searchProperties) {
this.searchProperties = searchProperties;
if (!StringUtils.hasText(searchProperties.getPath())) {
log.info("未配置索引库地址, 关闭全文搜索功能支持");
}
}
public void batchReload(List<IndexMsg> list) throws IOException {
try (Directory directory = FSDirectory.open(new File(searchProperties.getPath()).toPath());
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(new IKAnalyzer()));
){
for (IndexMsg indexMsg : list){
if (IndexMsgTypeEnum.ADD.equals(indexMsg.getType())) {
// 插入 or 更新索引
// 打开索引库
Document document = indexMsg.getData();
String id = document.get("id");
indexWriter.updateDocument(new Term("id", id), document);
} else if (IndexMsgTypeEnum.DELETE.equals(indexMsg.getType())) {
// 删除索引
Document document = indexMsg.getData();
String id = document.get("id");
indexWriter.deleteDocuments(new Term("id", id));
}
}
// 完成
indexWriter.flush();
indexWriter.commit();
}
}
}

View File

@ -0,0 +1,79 @@
package com.blossom.backend.base.search.message.consumer;
import com.blossom.backend.base.search.SearchProperties;
import com.blossom.backend.base.search.message.IndexMsg;
import com.blossom.backend.base.search.message.IndexMsgTypeEnum;
import com.blossom.backend.base.search.queue.IndexMsgQueue;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.File;
import java.util.concurrent.Executors;
/**
* 索引消息的消费者
*
* @author Andecheal
*/
@Component
@Slf4j
public class IndexMsgConsumer {
private SearchProperties searchProperties;
IndexMsgConsumer(SearchProperties searchProperties) {
this.searchProperties = searchProperties;
if (!StringUtils.hasText(searchProperties.getPath())) {
log.info("未配置索引库地址, 关闭全文搜索功能支持");
return;
}
Executors.newSingleThreadExecutor().submit(new Runnable() {
@Override
public void run() {
while (true) {
try {
IndexMsg indexMsg = IndexMsgQueue.take();
if (IndexMsgTypeEnum.ADD.equals(indexMsg.getType())) {
// 插入 or 更新索引
// 打开索引库
try (Directory directory = FSDirectory.open(new File(searchProperties.getPath()).toPath());
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(new IKAnalyzer()));
) {
Document document = indexMsg.getData();
String id = document.get("id");
indexWriter.updateDocument(new Term("id", id), document);
indexWriter.flush();
indexWriter.commit();
}
} else if (IndexMsgTypeEnum.DELETE.equals(indexMsg.getType())) {
// 删除索引
try (Directory directory = FSDirectory.open(new File(searchProperties.getPath()).toPath());
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(new IKAnalyzer()));
) {
Document document = indexMsg.getData();
String id = document.get("id");
indexWriter.deleteDocuments(new Term("id", id));
indexWriter.flush();
indexWriter.commit();
}
}
} catch (Exception e) {
log.error("消费失败" + e.getMessage());
}
}
}
});
}
}

View File

@ -0,0 +1,46 @@
package com.blossom.backend.base.search.queue;
import com.blossom.backend.base.search.message.IndexMsg;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
/**
* 消息处理使用的阻塞队列
* @author Andecheal
*/
public class IndexMsgQueue {
/**
* 阻塞队列 , 存放消息
*/
private static final BlockingQueue<IndexMsg> indexMsgQueue = new ArrayBlockingQueue<>(2048);
/**
* 应用提交消息
* @param msg
* @throws InterruptedException
*/
public static void add(IndexMsg msg) throws InterruptedException {
indexMsgQueue.add(msg);
}
/**
* 提供一个阻塞式消息入口
* @param msg
* @throws InterruptedException
*/
public static void put(IndexMsg msg) throws InterruptedException {
indexMsgQueue.put(msg);
}
/**
* 获取消息
* @return
* @throws InterruptedException
*/
public static IndexMsg take() throws InterruptedException {
return indexMsgQueue.take();
}
}

View File

@ -28,6 +28,8 @@ public interface ArticleMapper extends BaseMapper<ArticleEntity> {
*/
List<ArticleEntity> listAll(ArticleEntity entity);
List<ArticleEntity> listAllArticleWithContent();
/**
* 根据ID修改
*/

View File

@ -4,6 +4,9 @@ import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.ObjUtil;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.blossom.backend.base.search.message.ArticleIndexMsg;
import com.blossom.backend.base.search.message.IndexMsgTypeEnum;
import com.blossom.backend.base.search.queue.IndexMsgQueue;
import com.blossom.backend.server.article.TagEnum;
import com.blossom.backend.server.article.draft.pojo.ArticleEntity;
import com.blossom.backend.server.article.draft.pojo.ArticleQueryReq;
@ -79,6 +82,18 @@ public class ArticleService extends ServiceImpl<ArticleMapper, ArticleEntity> {
return articles;
}
/**
* 获取所有文章包含markdown字段用于索引的批量维护
* @return
*/
public List<ArticleEntity> listAllArticleWithContent() {
List<ArticleEntity> articles = baseMapper.listAllArticleWithContent();
if (CollUtil.isEmpty(articles)) {
return new ArrayList<>();
}
return articles;
}
/**
* 查询列表
* <p>避免在查询主要信息时返回正文信息造成的性能影响, 该接口不返回文章正文 toc/markdown/html</p>
@ -138,6 +153,13 @@ public class ArticleService extends ServiceImpl<ArticleMapper, ArticleEntity> {
@Transactional(rollbackFor = Exception.class)
public ArticleEntity insert(ArticleEntity req) {
baseMapper.insert(req);
ArticleIndexMsg articleIndexMsg = new ArticleIndexMsg(IndexMsgTypeEnum.ADD, req.getId(), req.getName());
try {
IndexMsgQueue.add(articleIndexMsg);
} catch (InterruptedException e) {
// 不抛出, 暂时先记录
log.error("索引更新失败" + e.getMessage());
}
return req;
}
@ -149,6 +171,13 @@ public class ArticleService extends ServiceImpl<ArticleMapper, ArticleEntity> {
public Long update(ArticleEntity req) {
XzException404.throwBy(req.getId() == null, "ID不得为空");
baseMapper.updById(req);
ArticleIndexMsg articleIndexMsg = new ArticleIndexMsg(IndexMsgTypeEnum.ADD, req.getId(), req.getName());
try {
IndexMsgQueue.add(articleIndexMsg);
} catch (InterruptedException e) {
// 不抛出, 暂时先记录
log.error("索引更新失败" + e.getMessage());
}
return req.getId();
}
@ -169,6 +198,14 @@ public class ArticleService extends ServiceImpl<ArticleMapper, ArticleEntity> {
baseMapper.updContentById(req);
referenceService.bind(req.getUserId(), req.getId(), req.getName(), req.getReferences());
logService.insert(req.getId(), 0, req.getMarkdown());
// 更新索引
ArticleIndexMsg articleIndexMsg = new ArticleIndexMsg(IndexMsgTypeEnum.ADD, req.getId(), req.getName(), req.getMarkdown());
try {
IndexMsgQueue.add(articleIndexMsg);
} catch (InterruptedException e) {
// 不抛出, 暂时先记录
log.error("索引更新失败" + e.getMessage());
}
return req.getWords();
}
@ -197,6 +234,14 @@ public class ArticleService extends ServiceImpl<ArticleMapper, ArticleEntity> {
referenceService.delete(id);
// 删除访问记录
viewService.delete(id);
// 删除索引
ArticleIndexMsg articleIndexMsg = new ArticleIndexMsg(IndexMsgTypeEnum.DELETE, id);
try {
IndexMsgQueue.add(articleIndexMsg);
} catch (InterruptedException e) {
// 不抛出, 暂时先记录
log.error("索引更新失败" + e.getMessage());
}
}
/**

View File

@ -6,6 +6,9 @@ import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.blossom.backend.base.param.ParamEnum;
import com.blossom.backend.base.param.ParamService;
import com.blossom.backend.base.param.pojo.ParamEntity;
import com.blossom.backend.base.search.message.ArticleIndexMsg;
import com.blossom.backend.base.search.message.IndexMsgTypeEnum;
import com.blossom.backend.base.search.queue.IndexMsgQueue;
import com.blossom.backend.server.article.recycle.pojo.ArticleRecycleEntity;
import com.blossom.backend.server.folder.FolderService;
import com.blossom.backend.server.folder.pojo.FolderEntity;
@ -58,6 +61,13 @@ public class ArticleRecycleService extends ServiceImpl<ArticleRecycleMapper, Art
baseMapper.restore(id, folder.getId());
}
baseMapper.deleteById(id);
ArticleIndexMsg articleIndexMsg = new ArticleIndexMsg(IndexMsgTypeEnum.ADD, article.getId(), article.getName(), article.getMarkdown());
try {
IndexMsgQueue.add(articleIndexMsg);
} catch (InterruptedException e) {
// 不抛出, 暂时先记录
log.error("索引更新失败" + e.getMessage());
}
}
/**

View File

@ -77,4 +77,7 @@ project:
# 请以 /pic 结尾, 如果你在 nginx 中配置有代理, 注意别忘了添加你的代理路径
domain: "http://localhost:9999/pic/"
# 请以 / 开头, / 结尾, 简短的路径在文章中有更好的显示效果, 过长一定程度会使文章内容混乱
default-path: "/home/bl/img/"
default-path: "/home/bl/img/"
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ 全文搜索 ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
search:
path: "/home/bl/img/"

View File

@ -79,3 +79,6 @@ project:
domain: "https://www.wangyunf.com/bl/pic/"
# 请以 / 开头, / 结尾, 简短的路径在文章中有更好的显示效果, 过长一定程度会使文章内容混乱
default-path: "/home/bl/img/"
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ 全文搜索 ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
search:
path: "/home/bl/img/"

View File

@ -81,3 +81,6 @@ project:
domain: "https://www.wangyunf.com/blall/pic/"
# 请以 / 开头, / 结尾, 简短的路径在文章中有更好的显示效果, 过长一定程度会使文章内容混乱
default-path: "/home/blall/img/"
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ 全文搜索 ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
search:
path: "/home/bl/img/"

View File

@ -48,6 +48,15 @@
</where>
</select>
<!-- 查询全部文章,包含id, name , markdown字段用于批量索引的建立 -->
<select id="listAllArticleWithContent" resultType="com.blossom.backend.server.article.draft.pojo.ArticleEntity">
select
id,
`name`,
markdown
from blossom_article
</select>
<!-- 根据ID修改 -->
<update id="updById">