Browse Source

【新增】AI 知识库:支持读取 PDF、DOC/DOCX、PPT/PPTX 和 HTML等格式

xiaoxin 7 months ago
parent
commit
d698ef70b5

+ 2 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/DocServiceImpl.java

@@ -3,7 +3,7 @@ package cn.iocoder.yudao.module.ai.service.knowledge;
 import jakarta.annotation.Resource;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.ai.document.Document;
-import org.springframework.ai.reader.TextReader;
+import org.springframework.ai.reader.tika.TikaDocumentReader;
 import org.springframework.ai.transformer.splitter.TokenTextSplitter;
 import org.springframework.ai.vectorstore.RedisVectorStore;
 import org.springframework.beans.factory.annotation.Value;
@@ -34,7 +34,7 @@ public class DocServiceImpl implements DocService {
     public void embeddingDoc() {
         // 读取文件
         org.springframework.core.io.Resource file = data;
-        TextReader loader = new TextReader(file);
+        TikaDocumentReader loader = new TikaDocumentReader(file);
         List<Document> documents = loader.get();
         // 文档分段
         List<Document> segments = tokenTextSplitter.apply(documents);

+ 6 - 0
yudao-module-ai/yudao-spring-boot-starter-ai/pom.xml

@@ -39,11 +39,17 @@
             <artifactId>spring-ai-stability-ai-spring-boot-starter</artifactId>
             <version>${spring-ai.version}</version>
         </dependency>
+
         <dependency>
             <groupId>org.springframework.ai</groupId>
             <artifactId>spring-ai-transformers-spring-boot-starter</artifactId>
             <version>${spring-ai.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.springframework.ai</groupId>
+            <artifactId>spring-ai-tika-document-reader</artifactId>
+            <version>${spring-ai.version}</version>
+        </dependency>
         <dependency>
             <groupId>org.springframework.ai</groupId>
             <artifactId>spring-ai-redis-store</artifactId>

+ 36 - 0
yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/config/YudaoAiAutoConfiguration.java

@@ -10,11 +10,18 @@ import cn.iocoder.yudao.framework.ai.core.model.xinghuo.XingHuoChatModel;
 import cn.iocoder.yudao.framework.ai.core.model.xinghuo.XingHuoChatOptions;
 import com.alibaba.cloud.ai.tongyi.TongYiAutoConfiguration;
 import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.autoconfigure.vectorstore.redis.RedisVectorStoreProperties;
+import org.springframework.ai.document.MetadataMode;
+import org.springframework.ai.transformer.splitter.TokenTextSplitter;
+import org.springframework.ai.transformers.TransformersEmbeddingModel;
+import org.springframework.ai.vectorstore.RedisVectorStore;
 import org.springframework.boot.autoconfigure.AutoConfiguration;
 import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.boot.autoconfigure.data.redis.RedisProperties;
 import org.springframework.boot.context.properties.EnableConfigurationProperties;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Import;
+import redis.clients.jedis.JedisPooled;
 
 /**
  * 芋道 AI 自动配置
@@ -73,4 +80,33 @@ public class YudaoAiAutoConfiguration {
         return new SunoApi(yudaoAiProperties.getSuno().getBaseUrl());
     }
 
+    // ========== rag 相关 ==========
+    @Bean
+    public TransformersEmbeddingModel transformersEmbeddingClient() {
+        return new TransformersEmbeddingModel(MetadataMode.EMBED);
+    }
+
+    /**
+     * 我们启动有加载很多 Embedding 模型,不晓得取哪个好,先 new 个 TransformersEmbeddingModel 跑
+     */
+    @Bean
+    public RedisVectorStore vectorStore(TransformersEmbeddingModel transformersEmbeddingModel, RedisVectorStoreProperties properties,
+                                        RedisProperties redisProperties) {
+        var config = RedisVectorStore.RedisVectorStoreConfig.builder()
+                .withIndexName(properties.getIndex())
+                .withPrefix(properties.getPrefix())
+                .build();
+
+        RedisVectorStore redisVectorStore = new RedisVectorStore(config, transformersEmbeddingModel,
+                new JedisPooled(redisProperties.getHost(), redisProperties.getPort()),
+                properties.isInitializeSchema());
+        redisVectorStore.afterPropertiesSet();
+        return redisVectorStore;
+    }
+
+    @Bean
+    public TokenTextSplitter tokenTextSplitter() {
+        return new TokenTextSplitter(500, 100, 5, 10000, true);
+    }
+
 }