diff --git a/.gitignore b/.gitignore index 5ff6309..1af12b4 100644 --- a/.gitignore +++ b/.gitignore @@ -35,4 +35,6 @@ build/ .vscode/ ### Mac OS ### -.DS_Store \ No newline at end of file +.DS_Store +/ai-rag-app/cloned-repo/ +/.idea/ diff --git a/ai-rag-api/src/main/java/com/storm/dev/api/IRAGService.java b/ai-rag-api/src/main/java/com/storm/dev/api/IRAGService.java index c4c9685..080520d 100644 --- a/ai-rag-api/src/main/java/com/storm/dev/api/IRAGService.java +++ b/ai-rag-api/src/main/java/com/storm/dev/api/IRAGService.java @@ -31,4 +31,6 @@ public interface IRAGService { ChatResponse generateStreamRag(String model, String ragTag, String message); + Response analyzeGitRepository(String repoUrl, String userName, String token) throws Exception; + } diff --git a/ai-rag-app/src/test/java/com/storm/dev/text/GitTest.java b/ai-rag-app/src/test/java/com/storm/dev/text/GitTest.java new file mode 100644 index 0000000..e7844a0 --- /dev/null +++ b/ai-rag-app/src/test/java/com/storm/dev/text/GitTest.java @@ -0,0 +1,86 @@ +package com.storm.dev.text; + +import jakarta.annotation.Resource; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.FileUtils; +import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.transport.UsernamePasswordCredentialsProvider; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.ai.document.Document; +import org.springframework.ai.ollama.OllamaChatClient; +import org.springframework.ai.reader.tika.TikaDocumentReader; +import org.springframework.ai.transformer.splitter.TokenTextSplitter; +import org.springframework.ai.vectorstore.PgVectorStore; +import org.springframework.ai.vectorstore.SimpleVectorStore; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.core.io.PathResource; +import org.springframework.test.context.junit4.SpringRunner; + +import java.io.File; +import java.io.IOException; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.List; + +/** + * @author: lyd + * @date: 2026/1/18 14:55 + */ +@Slf4j +@RunWith(SpringRunner.class) +@SpringBootTest +public class GitTest { + @Resource + private OllamaChatClient ollamaChatClient; + @Resource + private TokenTextSplitter tokenTextSplitter; + @Resource + private SimpleVectorStore simpleVectorStore; + @Resource + private PgVectorStore pgVectorStore; + + public final String LOCALPATH = "./cloned-repo"; + @Test + public void test() throws Exception { + String repoUrl = "https://gitee.com/liyongde/java-trial.git"; + String username = "liyongde"; + String password = "a1c280a3bfe97eb5a53f7f04a01e7fca"; + + + log.info("克隆路径:" + new File(LOCALPATH).getAbsolutePath()); + + FileUtils.deleteDirectory(new File(LOCALPATH)); + + Git git = Git.cloneRepository() + .setURI(repoUrl) + .setDirectory(new File(LOCALPATH)) + .setCredentialsProvider(new UsernamePasswordCredentialsProvider(username, password)) + .call(); + + git.close(); + } + + @Test + public void test_file() throws IOException { + Files.walkFileTree(Path.of(LOCALPATH), new SimpleFileVisitor<>() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + log.info("文件路径:{}", file.toString()); + PathResource resource = new PathResource(file); + TikaDocumentReader reader = new TikaDocumentReader(resource); + + List documents = reader.get(); + List documentSplitterList = tokenTextSplitter.apply(documents); + documents.forEach(doc -> doc.getMetadata().put("knowledge", "java-trial")); + documentSplitterList.forEach(doc -> doc.getMetadata().put("knowledge", "java-trial")); + + pgVectorStore.accept(documentSplitterList); + return super.visitFile(file, attrs); + } + }); + } +} diff --git a/ai-rag-trigger/src/main/java/com/storm/dev/trigger/http/RAGController.java b/ai-rag-trigger/src/main/java/com/storm/dev/trigger/http/RAGController.java index 22d85c0..f4e088a 100644 --- a/ai-rag-trigger/src/main/java/com/storm/dev/trigger/http/RAGController.java +++ b/ai-rag-trigger/src/main/java/com/storm/dev/trigger/http/RAGController.java @@ -5,6 +5,9 @@ import com.storm.dev.api.IRAGService; import com.storm.dev.api.response.Response; import jakarta.annotation.Resource; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.FileUtils; +import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.transport.UsernamePasswordCredentialsProvider; import org.redisson.api.RList; import org.redisson.api.RedissonClient; import org.springframework.ai.chat.ChatResponse; @@ -20,10 +23,15 @@ import org.springframework.ai.transformer.splitter.TokenTextSplitter; import org.springframework.ai.vectorstore.PgVectorStore; import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.SimpleVectorStore; +import org.springframework.core.io.PathResource; import org.springframework.web.bind.annotation.*; import org.springframework.web.multipart.MultipartFile; import reactor.core.publisher.Flux; +import java.io.File; +import java.io.IOException; +import java.nio.file.*; +import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -113,4 +121,67 @@ public class RAGController implements IRAGService { log.info("测试结果:{}", call); return call; } + + @RequestMapping(value = "analyze_git_repository", method = RequestMethod.POST) + @Override + public Response analyzeGitRepository(@RequestParam String repoUrl, @RequestParam String userName, @RequestParam String token) throws Exception { + String localPath = "./git-cloned-repo"; + String repoProjectName = extractProjectName(repoUrl); + log.info("克隆路径:{}", new File(localPath).getAbsolutePath()); + + FileUtils.deleteDirectory(new File(localPath)); + + Git git = Git.cloneRepository() + .setURI(repoUrl) + .setDirectory(new File(localPath)) + .setCredentialsProvider(new UsernamePasswordCredentialsProvider(userName, token)) + .call(); + + Files.walkFileTree(Paths.get(localPath), new SimpleFileVisitor<>() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + log.info("{} 遍历解析路径,上传知识库:{}", repoProjectName, file.getFileName()); + try { + TikaDocumentReader reader = new TikaDocumentReader(new PathResource(file)); + List documents = reader.get(); + List documentSplitterList = tokenTextSplitter.apply(documents); + + documents.forEach(doc -> doc.getMetadata().put("knowledge", repoProjectName)); + + documentSplitterList.forEach(doc -> doc.getMetadata().put("knowledge", repoProjectName)); + + pgVectorStore.accept(documentSplitterList); + } catch (Exception e) { + log.error("遍历解析路径,上传知识库失败:{}", file.getFileName()); + } + + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + log.info("Failed to access file: {} - {}", file.toString(), exc.getMessage()); + return FileVisitResult.CONTINUE; + } + }); + + FileUtils.deleteDirectory(new File(localPath)); + + RList elements = redissonClient.getList("ragTag"); + if (!elements.contains(repoProjectName)) { + elements.add(repoProjectName); + } + + git.close(); + + log.info("遍历解析路径,上传完成:{}", repoUrl); + + return Response.builder().code("0000").info("调用成功").build(); + } + + private String extractProjectName(String repoUrl) { + String[] parts = repoUrl.split("/"); + String projectNameWithGit = parts[parts.length - 1]; + return projectNameWithGit.replace(".git", ""); + } }