浏览代码

【新增】AI 知识库:文档切片向量化入库

xiaoxin 8 月之前
父节点
当前提交
8e54eef8af
共有 14 个文件被更改,包括 190 次插入34 次删除
  1. 39 0
      yudao-module-ai/yudao-module-ai-api/src/main/java/cn/iocoder/yudao/module/ai/enums/knowledge/AiKnowledgeDocumentStatusEnum.java
  2. 2 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/AiKnowledgeCreateMyReqVO.java
  3. 27 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/AiKnowledgeDocumentCreateReqVO.java
  4. 9 3
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeBaseDO.java
  5. 9 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java
  6. 5 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java
  7. 2 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeBaseMapper.java
  8. 2 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeDocumentMapper.java
  9. 2 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java
  10. 4 3
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiEmbeddingService.java
  11. 4 19
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiEmbeddingServiceImpl.java
  12. 6 3
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeBaseServiceImpl.java
  13. 11 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentService.java
  14. 68 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java

+ 39 - 0
yudao-module-ai/yudao-module-ai-api/src/main/java/cn/iocoder/yudao/module/ai/enums/knowledge/AiKnowledgeDocumentStatusEnum.java

@@ -0,0 +1,39 @@
+package cn.iocoder.yudao.module.ai.enums.knowledge;
+
+import cn.iocoder.yudao.framework.common.core.IntArrayValuable;
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+
+import java.util.Arrays;
+
+/**
+ * AI 知识库-文档状态的枚举
+ *
+ * @author xiaoxin
+ */
+@AllArgsConstructor
+@Getter
+public enum AiKnowledgeDocumentStatusEnum implements IntArrayValuable {
+
+    IN_PROGRESS(10, "索引中"),
+    SUCCESS(20, "可用"),
+    FAIL(30, "失败");
+
+    /**
+     * 状态
+     */
+    private final Integer status;
+
+    /**
+     * 状态名
+     */
+    private final String name;
+
+    public static final int[] ARRAYS = Arrays.stream(values()).mapToInt(AiKnowledgeDocumentStatusEnum::getStatus).toArray();
+
+    @Override
+    public int[] array() {
+        return ARRAYS;
+    }
+
+}

+ 2 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/AiKnowledgeCreateMyReqVO.java

@@ -14,11 +14,11 @@ import java.util.List;
 @Data
 public class AiKnowledgeCreateMyReqVO {
 
-    @Schema(description = "知识库名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "")
+    @Schema(description = "知识库名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "ruoyi-vue-pro 用户指南")
     @NotBlank(message = "知识库名称不能为空")
     private String name;
 
-    @Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "")
+    @Schema(description = "知识库描述", requiredMode = Schema.RequiredMode.REQUIRED, example = "存储 ruoyi-vue-pro 操作文档")
     private String description;
 
     @Schema(description = "可见权限,只能选择哪些人可见", requiredMode = Schema.RequiredMode.REQUIRED, example = "[1]")

+ 27 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/AiKnowledgeDocumentCreateReqVO.java

@@ -0,0 +1,27 @@
+package cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import jakarta.validation.constraints.NotBlank;
+import jakarta.validation.constraints.NotNull;
+import lombok.Data;
+
+/**
+ * @author xiaoxin
+ */
+@Schema(description = "管理后台 - AI 知识库【创建文档】 Request VO")
+@Data
+public class AiKnowledgeDocumentCreateReqVO {
+
+
+    @Schema(description = "知识库编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204")
+    @NotNull(message = "知识库编号不能为空")
+    private Long knowledgeId;
+
+    @Schema(description = "文档名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "三方登陆")
+    @NotBlank(message = "文档名称不能为空")
+    private String name;
+
+    @Schema(description = "文档 url", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn")
+    private String url;
+
+}

+ 9 - 3
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeBaseDO.java

@@ -1,10 +1,13 @@
 package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
 
 
+import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
 import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
 import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableField;
 import com.baomidou.mybatisplus.annotation.TableId;
 import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
 import lombok.Data;
 
 import java.util.List;
@@ -40,7 +43,8 @@ public class AiKnowledgeBaseDO extends BaseDO {
     /**
      * 可见权限,只能选择哪些人可见
      */
-    private List<String> visibilityPermissions;
+    @TableField(typeHandler = JacksonTypeHandler.class)
+    private List<Long> visibilityPermissions;
     /**
      * 嵌入模型编号,高质量模式时维护
      */
@@ -50,7 +54,9 @@ public class AiKnowledgeBaseDO extends BaseDO {
      */
     private String model;
     /**
-     * 是否启用
+     * 状态
+     * <p>
+     * 枚举 {@link CommonStatusEnum}
      */
-    private Boolean status;
+    private Integer status;
 }

+ 9 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java

@@ -1,6 +1,8 @@
 package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
 
+import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
 import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
+import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum;
 import com.baomidou.mybatisplus.annotation.IdType;
 import com.baomidou.mybatisplus.annotation.TableId;
 import com.baomidou.mybatisplus.annotation.TableName;
@@ -46,10 +48,15 @@ public class AiKnowledgeDocumentDO extends BaseDO {
     private Integer wordCount;
     /**
      * 切片状态
+     * <p>
+     * 枚举 {@link AiKnowledgeDocumentStatusEnum}
      */
     private Integer sliceStatus;
+
     /**
-     * 是否启用
+     * 状态
+     * <p>
+     * 枚举 {@link CommonStatusEnum}
      */
-    private Boolean status;
+    private Integer status;
 }

+ 5 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java

@@ -1,5 +1,6 @@
 package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
 
+import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
 import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
 import com.baomidou.mybatisplus.annotation.IdType;
 import com.baomidou.mybatisplus.annotation.TableId;
@@ -41,8 +42,10 @@ public class AiKnowledgeSegmentDO extends BaseDO {
      */
     private Integer tokens;
     /**
-     * 是否启用
+     * 状态
+     * <p>
+     * 枚举 {@link CommonStatusEnum}
      */
-    private Boolean status;
+    private Integer status;
 
 }

+ 2 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeBaseMapper.java

@@ -2,11 +2,13 @@ package cn.iocoder.yudao.module.ai.dal.mysql.knowledge;
 
 import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeBaseDO;
+import org.apache.ibatis.annotations.Mapper;
 
 /**
  * AI 知识库基础信息 Mapper
  *
  * @author xiaoxin
  */
+@Mapper
 public interface AiKnowledgeBaseMapper extends BaseMapperX<AiKnowledgeBaseDO> {
 }

+ 2 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeDocumentMapper.java

@@ -2,11 +2,13 @@ package cn.iocoder.yudao.module.ai.dal.mysql.knowledge;
 
 import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
+import org.apache.ibatis.annotations.Mapper;
 
 /**
  * AI 知识库-文档 Mapper
  *
  * @author xiaoxin
  */
+@Mapper
 public interface AiKnowledgeDocumentMapper extends BaseMapperX<AiKnowledgeDocumentDO> {
 }

+ 2 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java

@@ -2,11 +2,13 @@ package cn.iocoder.yudao.module.ai.dal.mysql.knowledge;
 
 import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
+import org.apache.ibatis.annotations.Mapper;
 
 /**
  * AI 知识库-分片 Mapper
  *
  * @author xiaoxin
  */
+@Mapper
 public interface AiKnowledgeSegmentMapper extends BaseMapperX<AiKnowledgeSegmentDO> {
 }

+ 4 - 3
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiEmbeddingService.java

@@ -1,6 +1,7 @@
 package cn.iocoder.yudao.module.ai.service.knowledge;
 
 import org.springframework.ai.document.Document;
+import org.springframework.ai.vectorstore.SearchRequest;
 
 import java.util.List;
 
@@ -12,9 +13,9 @@ import java.util.List;
 public interface AiEmbeddingService {
 
     /**
-     * 向量化文档
+     * 向量化文档并存储
      */
-    void embeddingDoc();
+    void add(List<Document> documents);
 
 
     /**
@@ -22,5 +23,5 @@ public interface AiEmbeddingService {
      *
      * @param content 查询内容
      */
-    List<Document> similaritySearch(String content);
+    List<Document> similaritySearch(SearchRequest request);
 }

+ 4 - 19
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiEmbeddingServiceImpl.java

@@ -2,11 +2,9 @@ package cn.iocoder.yudao.module.ai.service.knowledge;
 
 import jakarta.annotation.Resource;
 import org.springframework.ai.document.Document;
-import org.springframework.ai.reader.tika.TikaDocumentReader;
-import org.springframework.ai.transformer.splitter.TokenTextSplitter;
 import org.springframework.ai.vectorstore.RedisVectorStore;
 import org.springframework.ai.vectorstore.SearchRequest;
-import org.springframework.beans.factory.annotation.Value;
+import org.springframework.scheduling.annotation.Async;
 import org.springframework.stereotype.Service;
 
 import java.util.List;
@@ -21,27 +19,14 @@ public class AiEmbeddingServiceImpl implements AiEmbeddingService {
 
     @Resource
     private RedisVectorStore vectorStore;
-    @Resource
-    private TokenTextSplitter tokenTextSplitter;
-
-    // TODO @xin 临时测试用,后续删
-    @Value("classpath:/webapp/test/Fel.pdf")
-    private org.springframework.core.io.Resource data;
 
     @Override
-    public void embeddingDoc() {
-        // 读取文件
-        TikaDocumentReader loader = new TikaDocumentReader(data);
-        List<Document> documents = loader.get();
-        // 文档分段
-        List<Document> segments = tokenTextSplitter.apply(documents);
-        // 向量化并存储
-        vectorStore.add(segments);
+    public void add(List<Document> documents) {
+        vectorStore.add(documents);
     }
 
     @Override
-    public List<Document> similaritySearch(String content) {
-        SearchRequest request = SearchRequest.query(content);
+    public List<Document> similaritySearch(SearchRequest request) {
         return vectorStore.similaritySearch(request);
     }
 }

+ 6 - 3
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeBaseServiceImpl.java

@@ -2,6 +2,7 @@ package cn.iocoder.yudao.module.ai.service.knowledge;
 
 import cn.hutool.core.lang.Assert;
 import cn.hutool.core.util.ObjUtil;
+import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
 import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO;
@@ -25,17 +26,19 @@ import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_NOT_
 @Slf4j
 public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
 
-    @Resource
-    private AiKnowledgeBaseMapper knowledgeBaseMapper;
     @Resource
     private AiChatModelService chatModalService;
 
+    @Resource
+    private AiKnowledgeBaseMapper knowledgeBaseMapper;
+
+
     @Override
     public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
         AiChatModelDO model = validateChatModel(createReqVO.getModelId());
 
         AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class);
-        knowledgeBaseDO.setModel(model.getModel()).setUserId(userId);
+        knowledgeBaseDO.setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus());
 
         knowledgeBaseMapper.insert(knowledgeBaseDO);
         return knowledgeBaseDO.getId();

+ 11 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentService.java

@@ -1,5 +1,7 @@
 package cn.iocoder.yudao.module.ai.service.knowledge;
 
+import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeDocumentCreateReqVO;
+
 /**
  * AI 知识库-文档 Service 接口
  *
@@ -7,4 +9,13 @@ package cn.iocoder.yudao.module.ai.service.knowledge;
  */
 public interface AiKnowledgeDocumentService {
 
+
+    /**
+     * 创建文档
+     *
+     * @param createReqVO 文档创建 Request VO
+     * @return 文档编号
+     */
+    Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO);
+
 }

+ 68 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java

@@ -1,7 +1,25 @@
 package cn.iocoder.yudao.module.ai.service.knowledge;
 
+import cn.hutool.core.collection.CollUtil;
+import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
+import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
+import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
+import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeDocumentCreateReqVO;
+import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
+import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
+import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeDocumentMapper;
+import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeSegmentMapper;
+import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum;
+import jakarta.annotation.Resource;
 import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.document.Document;
+import org.springframework.ai.reader.tika.TikaDocumentReader;
+import org.springframework.ai.transformer.splitter.TokenTextSplitter;
+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.util.List;
 
 /**
  * AI 知识库-文档 Service 实现类
@@ -12,5 +30,55 @@ import org.springframework.stereotype.Service;
 @Slf4j
 public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentService {
 
+    @Resource
+    private AiKnowledgeDocumentMapper documentMapper;
+    @Resource
+    private AiKnowledgeSegmentMapper segmentMapper;
+
+    @Resource
+    private TokenTextSplitter tokenTextSplitter;
+
+    @Resource
+    private AiEmbeddingService embeddingService;
+
+    // TODO @xin 临时测试用,后续删
+    @Value("classpath:/webapp/test/Fel.pdf")
+    private org.springframework.core.io.Resource data;
+
+
+    @Override
+    @Transactional(rollbackFor = Exception.class)
+    public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
+        AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class);
+        documentDO
+                //todo
+                .setTokens(0).setWordCount(0)
+                .setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus());
+        documentMapper.insert(documentDO);
+
+        TikaDocumentReader loader = new TikaDocumentReader(data);
+        List<Document> documents = loader.get();
+        Long documentId = documentDO.getId();
+        if (CollUtil.isEmpty(documents)) {
+            log.info("文档内容为空");
+            return documentId;
+        }
+
+        // 文档分段
+        List<Document> segments = tokenTextSplitter.apply(documents);
+
+        List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments,
+                segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId)
+                        //todo
+                        .setTokens(0).setWordCount(0)
+                        .setStatus(CommonStatusEnum.ENABLE.getStatus()));
+
+        // 分段内容入库
+        segmentMapper.insertBatch(segmentDOList);
+
+        //向量化并存储
+        embeddingService.add(segments);
 
+        return documentId;
+    }
 }