Skip to content

Commit 9fc0f18

Browse files
codebase/testing-llm-responses-using-spring-ai-evaluators [BAEL-9143] (#18248)
* add codebase * remove QA advisor for evaluator ChatClient * add test case to check wrong answer relevant to topic via RelevancyEvaluator * fix: grammar in test class * delete unusable TestApplication * remove unnecessary documents
1 parent f8f9e6a commit 9fc0f18

File tree

8 files changed

+297
-0
lines changed

8 files changed

+297
-0
lines changed

spring-ai-2/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@
4343
<groupId>org.springframework.boot</groupId>
4444
<artifactId>spring-boot-starter-web</artifactId>
4545
</dependency>
46+
<dependency>
47+
<groupId>org.springframework.ai</groupId>
48+
<artifactId>spring-ai-markdown-document-reader</artifactId>
49+
</dependency>
4650
<dependency>
4751
<groupId>org.springframework.ai</groupId>
4852
<artifactId>spring-ai-ollama-spring-boot-starter</artifactId>
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package com.baeldung.springai.evaluator;
2+
3+
import org.springframework.ai.autoconfigure.anthropic.AnthropicAutoConfiguration;
4+
import org.springframework.ai.autoconfigure.bedrock.converse.BedrockConverseProxyChatAutoConfiguration;
5+
import org.springframework.ai.autoconfigure.openai.OpenAiAutoConfiguration;
6+
import org.springframework.ai.autoconfigure.vectorstore.chroma.ChromaVectorStoreAutoConfiguration;
7+
import org.springframework.boot.SpringApplication;
8+
import org.springframework.boot.autoconfigure.SpringBootApplication;
9+
import org.springframework.context.annotation.PropertySource;
10+
11+
/**
12+
* Excluding the below auto-configurations to avoid start up
13+
* failure. Their corresponding starters are present on the classpath but are
14+
* only needed by other articles in the shared codebase.
15+
*/
16+
@SpringBootApplication(exclude = {
17+
OpenAiAutoConfiguration.class,
18+
AnthropicAutoConfiguration.class,
19+
ChromaVectorStoreAutoConfiguration.class,
20+
BedrockConverseProxyChatAutoConfiguration.class
21+
})
22+
@PropertySource("classpath:application-evaluator.properties")
23+
public class Application {
24+
25+
public static void main(String[] args) {
26+
SpringApplication.run(Application.class, args);
27+
}
28+
29+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package com.baeldung.springai.evaluator;
2+
3+
import org.springframework.ai.chat.client.ChatClient;
4+
import org.springframework.ai.chat.client.advisor.QuestionAnswerAdvisor;
5+
import org.springframework.ai.chat.model.ChatModel;
6+
import org.springframework.ai.embedding.EmbeddingModel;
7+
import org.springframework.ai.evaluation.FactCheckingEvaluator;
8+
import org.springframework.ai.evaluation.RelevancyEvaluator;
9+
import org.springframework.ai.ollama.OllamaChatModel;
10+
import org.springframework.ai.ollama.api.OllamaApi;
11+
import org.springframework.ai.ollama.api.OllamaOptions;
12+
import org.springframework.ai.ollama.management.ModelManagementOptions;
13+
import org.springframework.ai.ollama.management.PullModelStrategy;
14+
import org.springframework.ai.vectorstore.SimpleVectorStore;
15+
import org.springframework.ai.vectorstore.VectorStore;
16+
import org.springframework.beans.factory.annotation.Qualifier;
17+
import org.springframework.beans.factory.annotation.Value;
18+
import org.springframework.context.annotation.Bean;
19+
import org.springframework.context.annotation.Configuration;
20+
21+
@Configuration
22+
public class LLMConfiguration {
23+
24+
@Bean
25+
public VectorStore vectorStore(EmbeddingModel embeddingModel) {
26+
return SimpleVectorStore
27+
.builder(embeddingModel)
28+
.build();
29+
}
30+
31+
@Bean
32+
public ChatClient contentGenerator(ChatModel chatModel, VectorStore vectorStore) {
33+
return ChatClient.builder(chatModel)
34+
.defaultAdvisors(new QuestionAnswerAdvisor(vectorStore))
35+
.build();
36+
}
37+
38+
@Bean
39+
public ChatClient contentEvaluator(
40+
OllamaApi olamaApi,
41+
@Value("${com.baeldung.evaluation.model}") String evaluationModel) {
42+
ChatModel chatModel = OllamaChatModel.builder()
43+
.ollamaApi(olamaApi)
44+
.defaultOptions(OllamaOptions.builder()
45+
.model(evaluationModel)
46+
.build())
47+
.modelManagementOptions(ModelManagementOptions.builder()
48+
.pullModelStrategy(PullModelStrategy.WHEN_MISSING)
49+
.build())
50+
.build();
51+
return ChatClient.builder(chatModel)
52+
.build();
53+
}
54+
55+
@Bean
56+
public FactCheckingEvaluator factCheckingEvaluator(@Qualifier("contentEvaluator") ChatClient chatClient) {
57+
return new FactCheckingEvaluator(chatClient.mutate());
58+
}
59+
60+
@Bean
61+
public RelevancyEvaluator relevancyEvaluator(@Qualifier("contentEvaluator") ChatClient chatClient) {
62+
return new RelevancyEvaluator(chatClient.mutate());
63+
}
64+
65+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package com.baeldung.springai.evaluator;
2+
3+
import org.springframework.ai.document.Document;
4+
import org.springframework.ai.reader.markdown.MarkdownDocumentReader;
5+
import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig;
6+
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
7+
import org.springframework.ai.vectorstore.VectorStore;
8+
import org.springframework.boot.ApplicationArguments;
9+
import org.springframework.boot.ApplicationRunner;
10+
import org.springframework.core.io.Resource;
11+
import org.springframework.core.io.support.ResourcePatternResolver;
12+
import org.springframework.stereotype.Component;
13+
14+
import java.io.IOException;
15+
import java.util.ArrayList;
16+
import java.util.Arrays;
17+
import java.util.List;
18+
19+
@Component
20+
class VectorStoreInitializer implements ApplicationRunner {
21+
22+
private final VectorStore vectorStore;
23+
private final ResourcePatternResolver resourcePatternResolver;
24+
25+
public VectorStoreInitializer(VectorStore vectorStore, ResourcePatternResolver resourcePatternResolver) {
26+
this.vectorStore = vectorStore;
27+
this.resourcePatternResolver = resourcePatternResolver;
28+
}
29+
30+
@Override
31+
public void run(ApplicationArguments args) throws IOException {
32+
List<Document> documents = new ArrayList<>();
33+
Resource[] resources = resourcePatternResolver.getResources("classpath:documents/*.md");
34+
Arrays.stream(resources).forEach(resource -> {
35+
MarkdownDocumentReader markdownDocumentReader = new MarkdownDocumentReader(resource, MarkdownDocumentReaderConfig.defaultConfig());
36+
documents.addAll(markdownDocumentReader.read());
37+
});
38+
vectorStore.add(new TokenTextSplitter().split(documents));
39+
}
40+
41+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
com.baeldung.evaluation.model=bespoke-minicheck
2+
3+
spring.ai.ollama.chat.options.model=llama3.3
4+
spring.ai.ollama.embedding.options.model=nomic-embed-text
5+
spring.ai.ollama.init.pull-model-strategy=when_missing
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Leave Management Policy
2+
3+
We offer comprehensive leave benefits to support work-life balance.
4+
5+
## Types of Leave
6+
- Annual Leave: 20 days per year, accrued monthly
7+
- Sick Leave: 14 days per year, requires medical certificate for 3+ consecutive days
8+
- Parental Leave: 16 weeks for primary caregivers, 4 weeks for secondary
9+
- Bereavement Leave: 5 days for immediate family
10+
11+
## Application Process
12+
1. Submit request through HR portal minimum 2 weeks in advance (except sick leave)
13+
2. Manager reviews within 48 hours
14+
3. HR processes approved requests within 24 hours
15+
16+
## Leave Balance
17+
Leave balance resets annually on January 1st. Maximum 5 days carry-forward allowed.
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package com.baeldung.springai.evaluator;
2+
3+
import org.junit.jupiter.api.Test;
4+
import org.springframework.ai.chat.client.ChatClient;
5+
import org.springframework.ai.chat.client.advisor.QuestionAnswerAdvisor;
6+
import org.springframework.ai.chat.model.ChatResponse;
7+
import org.springframework.ai.document.Document;
8+
import org.springframework.ai.evaluation.EvaluationRequest;
9+
import org.springframework.ai.evaluation.EvaluationResponse;
10+
import org.springframework.ai.evaluation.FactCheckingEvaluator;
11+
import org.springframework.ai.evaluation.RelevancyEvaluator;
12+
import org.springframework.beans.factory.annotation.Autowired;
13+
import org.springframework.boot.test.context.SpringBootTest;
14+
import org.springframework.context.annotation.Import;
15+
16+
import java.util.List;
17+
18+
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
19+
20+
@SpringBootTest
21+
@Import(TestcontainersConfiguration.class)
22+
class LLMResponseEvaluatorLiveTest {
23+
24+
@Autowired
25+
private ChatClient contentGenerator;
26+
27+
@Autowired
28+
private RelevancyEvaluator relevancyEvaluator;
29+
30+
@Autowired
31+
private FactCheckingEvaluator factCheckingEvaluator;
32+
33+
@Test
34+
void whenChatClientProvidesAnswerRelevantToTopic_thenRelevancyEvaluationSucceeds() {
35+
String question = "How many days sick leave can I take?";
36+
ChatResponse chatResponse = contentGenerator.prompt()
37+
.user(question)
38+
.call()
39+
.chatResponse();
40+
41+
String answer = chatResponse.getResult().getOutput().getContent();
42+
List<Document> documents = chatResponse.getMetadata().get(QuestionAnswerAdvisor.RETRIEVED_DOCUMENTS);
43+
EvaluationRequest evaluationRequest = new EvaluationRequest(question, documents, answer);
44+
45+
EvaluationResponse evaluationResponse = relevancyEvaluator.evaluate(evaluationRequest);
46+
assertThat(evaluationResponse.isPass()).isTrue();
47+
}
48+
49+
@Test
50+
void whenChatClientProvidesAnswerIrrelevantToTopic_thenRelevancyEvaluationFails() {
51+
String question = "How many days sick leave can I take?";
52+
ChatResponse chatResponse = contentGenerator.prompt()
53+
.user(question)
54+
.call()
55+
.chatResponse();
56+
57+
String nonRelevantAnswer = "A lion is the king of the jungle";
58+
List<Document> documents = chatResponse.getMetadata().get(QuestionAnswerAdvisor.RETRIEVED_DOCUMENTS);
59+
EvaluationRequest evaluationRequest = new EvaluationRequest(question, documents, nonRelevantAnswer);
60+
61+
EvaluationResponse evaluationResponse = relevancyEvaluator.evaluate(evaluationRequest);
62+
assertThat(evaluationResponse.isPass()).isFalse();
63+
}
64+
65+
@Test
66+
void whenChatClientProvidesWrongAnswerRelevantToTopic_thenRelevancyEvaluationFails() {
67+
String question = "How many days sick leave can I take?";
68+
ChatResponse chatResponse = contentGenerator.prompt()
69+
.user(question)
70+
.call()
71+
.chatResponse();
72+
73+
String wrongAnswer = "You can take no leaves. Get back to work!";
74+
List<Document> documents = chatResponse.getMetadata().get(QuestionAnswerAdvisor.RETRIEVED_DOCUMENTS);
75+
EvaluationRequest evaluationRequest = new EvaluationRequest(question, documents, wrongAnswer);
76+
77+
EvaluationResponse evaluationResponse = relevancyEvaluator.evaluate(evaluationRequest);
78+
assertThat(evaluationResponse.isPass()).isFalse();
79+
}
80+
81+
@Test
82+
void whenChatClientProvidesFactuallyCorrectAnswer_thenFactCheckingEvaluationSucceeds() {
83+
String question = "How many days sick leave can I take?";
84+
ChatResponse chatResponse = contentGenerator.prompt()
85+
.user(question)
86+
.call()
87+
.chatResponse();
88+
89+
String answer = chatResponse.getResult().getOutput().getContent();
90+
List<Document> documents = chatResponse.getMetadata().get(QuestionAnswerAdvisor.RETRIEVED_DOCUMENTS);
91+
EvaluationRequest evaluationRequest = new EvaluationRequest(question, documents, answer);
92+
93+
EvaluationResponse evaluationResponse = factCheckingEvaluator.evaluate(evaluationRequest);
94+
assertThat(evaluationResponse.isPass()).isTrue();
95+
}
96+
97+
@Test
98+
void whenChatClientProvidesFactuallyIncorrectAnswer_thenFactCheckingEvaluationFails() {
99+
String question = "How many days sick leave can I take?";
100+
ChatResponse chatResponse = contentGenerator.prompt()
101+
.user(question)
102+
.call()
103+
.chatResponse();
104+
105+
String wrongAnswer = "You can take no leaves. Get back to work!";
106+
List<Document> documents = chatResponse.getMetadata().get(QuestionAnswerAdvisor.RETRIEVED_DOCUMENTS);
107+
EvaluationRequest evaluationRequest = new EvaluationRequest(question, documents, wrongAnswer);
108+
109+
EvaluationResponse evaluationResponse = factCheckingEvaluator.evaluate(evaluationRequest);
110+
assertThat(evaluationResponse.isPass()).isFalse();
111+
}
112+
113+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package com.baeldung.springai.evaluator;
2+
3+
import org.springframework.boot.test.context.TestConfiguration;
4+
import org.springframework.context.annotation.Bean;
5+
import org.springframework.test.context.DynamicPropertyRegistrar;
6+
import org.testcontainers.ollama.OllamaContainer;
7+
8+
@TestConfiguration(proxyBeanMethods = false)
9+
class TestcontainersConfiguration {
10+
11+
@Bean
12+
public OllamaContainer ollamaContainer() {
13+
return new OllamaContainer("ollama/ollama:0.5.7");
14+
}
15+
16+
@Bean
17+
public DynamicPropertyRegistrar dynamicPropertyRegistrar(OllamaContainer ollamaContainer) {
18+
return registry -> {
19+
registry.add("spring.ai.ollama.base-url", ollamaContainer::getEndpoint);
20+
};
21+
}
22+
23+
}

0 commit comments

Comments
 (0)