Skip to content

Commit 0d8fc8d

Browse files
committed
feat: add rag API
1 parent 3f442ea commit 0d8fc8d

File tree

5 files changed

+26
-26
lines changed

5 files changed

+26
-26
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ COPY --from=build /app/app/target/tiny-engine-app-*.jar /app/tiny-engine-app.jar
1616
COPY --from=build /app/base/target/tiny-engine-base-*.jar /app/tiny-engine-base.jar
1717
# 设置环境变量
1818

19-
ENV FOLDER_PATH = "/app/documents"
19+
ENV FOLDER_PATH="/app/documents"
2020
# 替换为自己的域名接口路径
2121
ENV TINY_ENGINE_URL="https://agent.opentiny.design/material-center/api/resource/download"
2222
ENTRYPOINT ["java", "-jar", "tiny-engine-app.jar", "--spring.profiles.active=alpha"]

base/src/main/java/com/tinyengine/it/controller/AiChatController.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,10 @@
1212

1313
package com.tinyengine.it.controller;
1414

15-
import com.tinyengine.it.common.base.Result;
1615
import com.tinyengine.it.common.log.SystemControllerLog;
1716
import com.tinyengine.it.model.dto.ChatRequest;
1817

19-
import com.tinyengine.it.rag.service.StorageService;
20-
import com.tinyengine.it.rag.entity.EmbeddingMatchDto;
2118
import com.tinyengine.it.service.app.v1.AiChatV1Service;
22-
import dev.langchain4j.data.segment.TextSegment;
23-
import dev.langchain4j.store.embedding.EmbeddingMatch;
2419
import io.swagger.v3.oas.annotations.Operation;
2520
import io.swagger.v3.oas.annotations.Parameter;
2621
import io.swagger.v3.oas.annotations.media.Content;
@@ -40,9 +35,6 @@
4035
import org.springframework.web.bind.annotation.RestController;
4136
import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody;
4237

43-
import java.util.List;
44-
import java.util.stream.Collectors;
45-
4638
/**
4739
* The type Ai chat controller.
4840
*

base/src/main/java/com/tinyengine/it/controller/VectorStorageController.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import io.swagger.v3.oas.annotations.media.Schema;
2727
import io.swagger.v3.oas.annotations.responses.ApiResponse;
2828
import io.swagger.v3.oas.annotations.tags.Tag;
29+
import jakarta.validation.constraints.NotEmpty;
2930
import org.springframework.beans.factory.annotation.Autowired;
3031
import org.springframework.validation.annotation.Validated;
3132
import org.springframework.web.bind.annotation.DeleteMapping;
@@ -136,11 +137,11 @@ public Result<List<EmbeddingMatchDto>> searchInCollection(@RequestBody SearchReq
136137
@ApiResponse(responseCode = "400", description = "请求失败")
137138
})
138139
@SystemControllerLog(description = "跨集合搜索")
139-
@GetMapping("/vector-storage/all-collections")
140+
@PostMapping("/vector-storage/all-collections")
140141
public Result<Map<String, List<EmbeddingMatchDto>>> searchAllCollections(
141142
@RequestBody SearchRequest searchDto) {
142143
Map<String, List<EmbeddingMatchDto>> results =
143-
vectorStorageService.searchAcrossCollections(searchDto);
144+
vectorStorageService.searchAcrossCollections(searchDto);
144145
return Result.success(results);
145146
}
146147

@@ -219,7 +220,7 @@ public Result<DeleteResult> deleteByFilePath(@RequestParam String filePath, @Req
219220
@SystemControllerLog(description = "通过路径和集合名称批量删除知识库文档")
220221
@DeleteMapping("/vector-storage/batch/{collection}")
221222
public Result<BatchDeleteResult> deleteMultipleFiles(@PathVariable String collection,
222-
@RequestBody List<String> filePaths) {
223+
@RequestBody @NotEmpty List<@NotEmpty String> filePaths) {
223224
BatchDeleteResult result = vectorStorageService.deleteMultipleFiles(filePaths,collection);
224225
return Result.success(result);
225226
}

base/src/main/java/com/tinyengine/it/rag/config/RAGConfig.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public class RAGConfig {
3434
private String chromaBaseUrl = "http://localhost:8000";
3535
private String chromaCollectionName = "tinyengine_documents";
3636
private String modelPath = "./all-MiniLM-L6-v2/model.onnx";
37-
private String tokenizerPath = ".//all-MiniLM-L6-v2/tokenizer.json";
37+
private String tokenizerPath = "./all-MiniLM-L6-v2/tokenizer.json";
3838

3939
// 连接配置
4040
private int timeoutSeconds = 30;

base/src/main/java/com/tinyengine/it/rag/service/StorageService.java

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ private boolean isValidCollection(String collectionName) {
132132
public VectorDocument autoAddFolderToKnowledgeBase() {
133133
try {
134134
String folderPath = System.getenv("FOLDER_PATH");
135+
if (folderPath == null || folderPath.isBlank()) {
136+
throw new ServiceException(ExceptionEnum.CM329.getResultCode(), "FOLDER_PATH does not exist: " + folderPath);
137+
}
135138
// 验证文件夹路径
136139
Path folder = Paths.get(folderPath);
137140
if (!Files.exists(folder) || !Files.isDirectory(folder)) {
@@ -585,26 +588,30 @@ public DeleteResult deleteByFilePath(String filePath, String collectionName) {
585588
*/
586589
private List<EmbeddingMatch<TextSegment>> searchBySource(String sourcePath, String collectionName) {
587590
try {
591+
// 使用更合理的查询文本
592+
String queryText = "document content analysis";
593+
588594
EmbeddingSearchRequest searchRequest = EmbeddingSearchRequest.builder()
589-
.queryEmbedding(embeddingModel.embed("test").content())
590-
.maxResults(10000)
591-
.minScore(0.0)
592-
.build();
595+
.queryEmbedding(embeddingModel.embed(queryText).content())
596+
.maxResults(1000)
597+
.minScore(0.1)
598+
.build();
593599

594600
List<EmbeddingMatch<TextSegment>> allMatches = embeddingStore.search(searchRequest).matches();
595601

596-
// 根据源文件路径和集合名称过滤
602+
// 在应用层过滤
597603
return allMatches.stream()
598-
.filter(match -> {
599-
String source = match.embedded().metadata().getString("source");
600-
String collection = match.embedded().metadata().getString("collection");
604+
.filter(match -> {
605+
String source = match.embedded().metadata().getString("source");
606+
String collection = match.embedded().metadata().getString("collection");
601607

602-
boolean sourceMatch = source != null && source.equals(sourcePath);
603-
boolean collectionMatch = collectionName == null ||
604-
(collection != null && collection.equals(collectionName));
608+
boolean sourceMatch = source != null && source.equals(sourcePath);
609+
boolean collectionMatch = collectionName == null ||
610+
(collection != null && collection.equals(collectionName));
605611

606-
return sourceMatch && collectionMatch;
607-
}).collect(Collectors.toList());
612+
return sourceMatch && collectionMatch;
613+
})
614+
.collect(Collectors.toList());
608615

609616
} catch (Exception e) {
610617
log.error("Failed to search vectors by source: {} in collection: {}", sourcePath, collectionName, e);

0 commit comments

Comments
 (0)