Skip to content

Commit ea47a94

Browse files
ICKostiantyn.IvanovICKostiantyn.Ivanov
ICKostiantyn.Ivanov
authored and
ICKostiantyn.Ivanov
committed
BAEL-9198 - Building an AI Chatbot in Java with Langchain4j and MongoDB Atlas
1 parent ddff9b1 commit ea47a94

File tree

11 files changed

+988
-0
lines changed

11 files changed

+988
-0
lines changed

libraries-llms-2/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
## Relevant Articles

libraries-llms-2/pom.xml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
<artifactId>libraries-llms-2</artifactId>
7+
<name>libraries-llms-2</name>
8+
9+
<parent>
10+
<groupId>com.baeldung</groupId>
11+
<artifactId>parent-modules</artifactId>
12+
<version>1.0.0-SNAPSHOT</version>
13+
</parent>
14+
15+
<dependencies>
16+
<dependency>
17+
<groupId>org.springframework.boot</groupId>
18+
<artifactId>spring-boot-starter-web</artifactId>
19+
<version>${spring-boot.version}</version>
20+
</dependency>
21+
<!-- dependencies for langchain4j -->
22+
<dependency>
23+
<groupId>dev.langchain4j</groupId>
24+
<artifactId>langchain4j-mongodb-atlas</artifactId>
25+
<version>${langchain4j.version}</version>
26+
</dependency>
27+
<dependency>
28+
<groupId>dev.langchain4j</groupId>
29+
<artifactId>langchain4j</artifactId>
30+
<version>${langchain4j.version}</version>
31+
</dependency>
32+
<dependency>
33+
<groupId>dev.langchain4j</groupId>
34+
<artifactId>langchain4j-open-ai</artifactId>
35+
<version>${langchain4j.version}</version>
36+
</dependency>
37+
<dependency>
38+
<groupId>org.springframework.boot</groupId>
39+
<artifactId>spring-boot-starter-test</artifactId>
40+
<version>${spring-boot.version}</version>
41+
<scope>test</scope>
42+
</dependency>
43+
</dependencies>
44+
45+
<properties>
46+
<langchain4j.version>1.0.0-beta1</langchain4j.version>
47+
<spring-boot.version>3.3.2</spring-boot.version>
48+
</properties>
49+
50+
</project>
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package com.baeldung.chatbot.mongodb.assistants;
2+
3+
public interface ArticleBasedAssistant {
4+
String answer(String question);
5+
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
package com.baeldung.chatbot.mongodb.configuration;
2+
3+
import com.baeldung.chatbot.mongodb.assistants.ArticleBasedAssistant;
4+
import com.mongodb.client.MongoClient;
5+
import com.mongodb.client.MongoClients;
6+
import com.mongodb.client.model.CreateCollectionOptions;
7+
import dev.langchain4j.data.segment.TextSegment;
8+
import dev.langchain4j.model.chat.ChatLanguageModel;
9+
import dev.langchain4j.model.embedding.EmbeddingModel;
10+
import dev.langchain4j.model.openai.OpenAiChatModel;
11+
import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
12+
import dev.langchain4j.rag.content.retriever.ContentRetriever;
13+
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
14+
import dev.langchain4j.service.AiServices;
15+
import dev.langchain4j.store.embedding.EmbeddingStore;
16+
import dev.langchain4j.store.embedding.mongodb.IndexMapping;
17+
import dev.langchain4j.store.embedding.mongodb.MongoDbEmbeddingStore;
18+
import org.bson.conversions.Bson;
19+
import org.springframework.beans.factory.annotation.Value;
20+
import org.springframework.context.annotation.Bean;
21+
import org.springframework.context.annotation.Configuration;
22+
23+
import java.util.HashSet;
24+
25+
import static dev.langchain4j.model.openai.OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL;
26+
27+
@Configuration
28+
public class ChatBotConfiguration {
29+
30+
@Value("${app.mongodb.url}")
31+
private String mongodbUrl;
32+
33+
@Value("${app.mongodb.db-name}")
34+
private String databaseName;
35+
36+
@Value("${app.openai.apiKey}")
37+
private String apiKey;
38+
39+
40+
@Bean
41+
public MongoClient mongoClient() {
42+
return MongoClients.create(mongodbUrl);
43+
}
44+
45+
@Bean
46+
public EmbeddingStore<TextSegment> embeddingStore(MongoClient mongoClient) {
47+
String collectionName = "embeddings";
48+
String indexName = "embedding";
49+
Long maxResultRatio = 10L;
50+
CreateCollectionOptions createCollectionOptions = new CreateCollectionOptions();
51+
Bson filter = null;
52+
IndexMapping indexMapping = IndexMapping.builder()
53+
.dimension(TEXT_EMBEDDING_3_SMALL.dimension())
54+
.metadataFieldNames(new HashSet<>())
55+
.build();
56+
Boolean createIndex = true;
57+
58+
return new MongoDbEmbeddingStore(
59+
mongoClient,
60+
databaseName,
61+
collectionName,
62+
indexName,
63+
maxResultRatio,
64+
createCollectionOptions,
65+
filter,
66+
indexMapping,
67+
createIndex
68+
);
69+
}
70+
71+
@Bean
72+
public EmbeddingModel embeddingModel() {
73+
return OpenAiEmbeddingModel.builder()
74+
.apiKey(apiKey)
75+
.modelName(TEXT_EMBEDDING_3_SMALL)
76+
.build();
77+
}
78+
79+
@Bean
80+
public ContentRetriever contentRetriever(EmbeddingStore<TextSegment> embeddingStore, EmbeddingModel embeddingModel) {
81+
return EmbeddingStoreContentRetriever.builder()
82+
.embeddingStore(embeddingStore)
83+
.embeddingModel(embeddingModel)
84+
.maxResults(10)
85+
.minScore(0.8)
86+
.build();
87+
}
88+
89+
@Bean
90+
public ChatLanguageModel chatModel() {
91+
return OpenAiChatModel.builder()
92+
.apiKey(apiKey)
93+
.modelName("gpt-4o-mini")
94+
.build();
95+
}
96+
97+
@Bean
98+
public ArticleBasedAssistant articleBasedAssistant(ChatLanguageModel chatModel, ContentRetriever contentRetriever) {
99+
return AiServices.builder(ArticleBasedAssistant.class)
100+
.chatLanguageModel(chatModel)
101+
.contentRetriever(contentRetriever)
102+
.build();
103+
}
104+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package com.baeldung.chatbot.mongodb.controllers;
2+
3+
import com.baeldung.chatbot.mongodb.assistants.ArticleBasedAssistant;
4+
import org.springframework.beans.factory.annotation.Autowired;
5+
import org.springframework.web.bind.annotation.*;
6+
7+
@RestController
8+
public class ChatBotController {
9+
private final ArticleBasedAssistant assistant;
10+
11+
@Autowired
12+
public ChatBotController(ArticleBasedAssistant assistant) {
13+
this.assistant = assistant;
14+
}
15+
16+
@GetMapping("/chat-bot")
17+
public String answer(@RequestParam("question") String question) {
18+
return assistant.answer(question);
19+
}
20+
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
package com.baeldung.chatbot.mongodb.repositories;
2+
3+
import com.fasterxml.jackson.databind.JsonNode;
4+
import com.fasterxml.jackson.databind.ObjectMapper;
5+
import dev.langchain4j.data.document.Document;
6+
import dev.langchain4j.data.document.DocumentSplitter;
7+
import dev.langchain4j.data.document.Metadata;
8+
import dev.langchain4j.data.document.splitter.DocumentSplitters;
9+
import dev.langchain4j.data.embedding.Embedding;
10+
import dev.langchain4j.data.segment.TextSegment;
11+
import dev.langchain4j.model.embedding.EmbeddingModel;
12+
import dev.langchain4j.model.openai.OpenAiEmbeddingModelName;
13+
import dev.langchain4j.model.openai.OpenAiTokenizer;
14+
import dev.langchain4j.store.embedding.EmbeddingStore;
15+
import org.slf4j.Logger;
16+
import org.slf4j.LoggerFactory;
17+
import org.springframework.beans.factory.annotation.Autowired;
18+
import org.springframework.beans.factory.annotation.Value;
19+
import org.springframework.stereotype.Component;
20+
21+
import java.io.*;
22+
import java.util.ArrayList;
23+
import java.util.Iterator;
24+
import java.util.List;
25+
26+
@Component
27+
public class ArticlesRepository {
28+
private static final Logger log = LoggerFactory.getLogger(ArticlesRepository.class);
29+
30+
private final EmbeddingStore<TextSegment> embeddingStore;
31+
private final EmbeddingModel embeddingModel;
32+
private final ObjectMapper objectMapper = new ObjectMapper();
33+
34+
@Autowired
35+
public ArticlesRepository(@Value("${app.load-articles}") Boolean shouldLoadArticles,
36+
EmbeddingStore<TextSegment> embeddingStore, EmbeddingModel embeddingModel) throws IOException {
37+
this.embeddingStore = embeddingStore;
38+
this.embeddingModel = embeddingModel;
39+
40+
if (shouldLoadArticles) {
41+
loadArticles();
42+
}
43+
}
44+
45+
private void loadArticles() throws IOException {
46+
String resourcePath = "articles.json";
47+
int maxTokensPerChunk = 8000;
48+
int overlapTokens = 800;
49+
50+
List<TextSegment> documents = loadJsonDocuments(resourcePath, maxTokensPerChunk, overlapTokens);
51+
52+
log.info("Documents to store: " + documents.size());
53+
54+
for (TextSegment document : documents) {
55+
Embedding embedding = embeddingModel.embed(document.text()).content();
56+
embeddingStore.add(embedding, document);
57+
}
58+
59+
log.info("Documents are uploaded");
60+
}
61+
62+
private List<TextSegment> loadJsonDocuments(String resourcePath, int maxTokensPerChunk, int overlapTokens) throws IOException {
63+
64+
InputStream inputStream = ArticlesRepository.class.getClassLoader().getResourceAsStream(resourcePath);
65+
66+
if (inputStream == null) {
67+
throw new FileNotFoundException("Resource not found: " + resourcePath);
68+
}
69+
70+
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
71+
72+
int batchSize = 500;
73+
List<Document> batch = new ArrayList<>();
74+
List<TextSegment> textSegments = new ArrayList<>();
75+
76+
String line;
77+
while ((line = reader.readLine()) != null) {
78+
JsonNode jsonNode = objectMapper.readTree(line);
79+
80+
String title = jsonNode.path("title").asText(null);
81+
String body = jsonNode.path("body").asText(null);
82+
JsonNode metadataNode = jsonNode.path("metadata");
83+
84+
if (body != null) {
85+
addDocumentToBatch(title, body, metadataNode, batch);
86+
87+
if (batch.size() >= batchSize) {
88+
textSegments.addAll(splitIntoChunks(batch, maxTokensPerChunk, overlapTokens));
89+
batch.clear();
90+
}
91+
}
92+
}
93+
94+
if (!batch.isEmpty()) {
95+
textSegments.addAll(splitIntoChunks(batch, maxTokensPerChunk, overlapTokens));
96+
}
97+
98+
return textSegments;
99+
}
100+
101+
private void addDocumentToBatch(String title, String body, JsonNode metadataNode, List<Document> batch) {
102+
String text = (title != null ? title + "\n\n" + body : body);
103+
104+
Metadata metadata = new Metadata();
105+
if (metadataNode != null && metadataNode.isObject()) {
106+
Iterator<String> fieldNames = metadataNode.fieldNames();
107+
while (fieldNames.hasNext()) {
108+
String fieldName = fieldNames.next();
109+
metadata.put(fieldName, metadataNode.path(fieldName).asText());
110+
}
111+
}
112+
113+
Document document = Document.from(text, metadata);
114+
batch.add(document);
115+
}
116+
117+
private List<TextSegment> splitIntoChunks(List<Document> documents, int maxTokensPerChunk, int overlapTokens) {
118+
OpenAiTokenizer tokenizer = new OpenAiTokenizer(OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL);
119+
120+
DocumentSplitter splitter = DocumentSplitters.recursive(
121+
maxTokensPerChunk,
122+
overlapTokens,
123+
tokenizer
124+
);
125+
126+
List<TextSegment> allSegments = new ArrayList<>();
127+
for (Document document : documents) {
128+
List<TextSegment> segments = splitter.split(document);
129+
allSegments.addAll(segments);
130+
}
131+
132+
return allSegments;
133+
}
134+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
app.mongodb.url=mongodb://wikiuser:password@localhost:27017/admin
2+
app.mongodb.db-name=chatbot_db
3+
4+
app.openai.apiKey=${OPENAI_API_KEY}
5+
app.load-articles=false
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
version: '3.1'
2+
3+
services:
4+
my-mongodb:
5+
image: mongodb/mongodb-atlas-local:7.0.9
6+
container_name: my-mongodb
7+
environment:
8+
- MONGODB_INITDB_ROOT_USERNAME=wikiuser
9+
- MONGODB_INITDB_ROOT_PASSWORD=password
10+
ports:
11+
- 27017:27017
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package com.baeldung.chatbot.mongodb;
2+
3+
import com.baeldung.chatbot.mongodb.configuration.ChatBotConfiguration;
4+
import com.baeldung.chatbot.mongodb.controllers.ChatBotController;
5+
import com.baeldung.chatbot.mongodb.repositories.ArticlesRepository;
6+
import org.junit.jupiter.api.Assertions;
7+
import org.junit.jupiter.api.Test;
8+
import org.slf4j.Logger;
9+
import org.slf4j.LoggerFactory;
10+
import org.springframework.beans.factory.annotation.Autowired;
11+
import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc;
12+
import org.springframework.boot.test.context.SpringBootTest;
13+
import org.springframework.test.web.servlet.MockMvc;
14+
15+
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get;
16+
17+
@AutoConfigureMockMvc
18+
@SpringBootTest(classes = {ChatBotConfiguration.class, ArticlesRepository.class, ChatBotController.class})
19+
class ChatBotLiveTest {
20+
21+
Logger log = LoggerFactory.getLogger(ChatBotLiveTest.class);
22+
23+
@Autowired
24+
private MockMvc mockMvc;
25+
26+
@Test
27+
void givenChatBotApi_whenCallingGetEndpointWithQuestion_thenExpectedAnswersIsPresent() throws Exception {
28+
String chatResponse = mockMvc
29+
.perform(get("/chat-bot")
30+
.param("question", "Steps to implement Spring boot app and MongoDB"))
31+
.andReturn()
32+
.getResponse()
33+
.getContentAsString();
34+
35+
log.info(chatResponse);
36+
Assertions.assertTrue(chatResponse.contains("Step 1"));
37+
}
38+
}

0 commit comments

Comments
 (0)