From cc25b149cf3bf04d354b16a70d2789528c671980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=A5=9A=E5=AD=94=E5=93=8D?= <1014853731@qq.com> Date: Thu, 4 Sep 2025 11:58:56 +0800 Subject: [PATCH 1/5] Google genai response multimodality support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Author: 楚孔响 <1014853731@qq.com> Signed-off-by: 楚孔响 <1014853731@qq.com> Signed-off-by: ckx521 <1014853731@qq.com> --- .../ai/google/genai/GoogleGenAiChatModel.java | 21 +++++++++-- .../google/genai/GoogleGenAiChatOptions.java | 36 +++++++++++++++++-- .../genai/GoogleGenAiChatOptionsTest.java | 11 ++++++ 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java index 668c1e5a0d7..f64e4590fa8 100644 --- a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java +++ b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java @@ -91,6 +91,7 @@ import org.springframework.util.Assert; import org.springframework.util.CollectionUtils; import org.springframework.util.StringUtils; +import org.springframework.util.MimeType; /** * Google GenAI Chat Model implementation that provides access to Google's Gemini language @@ -626,7 +627,19 @@ protected List responseCandidateToGeneration(Candidate candidate) { .parts() .orElse(List.of()) .stream() - .map(part -> new AssistantMessage(part.text().orElse(""), messageMetadata)) + .map(part -> { + // Multimodality Response Support + List media = part.inlineData() + .filter(blob -> blob.data().isPresent() && blob.mimeType().isPresent()) + .map(blob -> Media + .builder() + .mimeType(MimeType.valueOf(blob.mimeType().get())) + .data(blob.data().get()) + .build()) + .map(List::of) + .orElse(List.of()); + return new AssistantMessage(part.text().orElse(""), messageMetadata, List.of(), media); + }) .map(assistantMessage -> new Generation(assistantMessage, chatGenerationMetadata)) .toList(); } @@ -725,6 +738,10 @@ GeminiRequest createGeminiRequest(Prompt prompt) { configBuilder.systemInstruction(systemContents.get(0)); } + if (!CollectionUtils.isEmpty(requestOptions.getResponseModalities())) { + configBuilder.responseModalities(requestOptions.getResponseModalities()); + } + GenerateContentConfig config = configBuilder.build(); // Create message contents @@ -850,7 +867,7 @@ public static final class Builder { private GoogleGenAiChatOptions defaultOptions = GoogleGenAiChatOptions.builder() .temperature(0.7) .topP(1.0) - .model(GoogleGenAiChatModel.ChatModel.GEMINI_2_0_FLASH) + .model(ChatModel.GEMINI_2_0_FLASH) .build(); private ToolCallingManager toolCallingManager; diff --git a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java index 7e05e5fc921..464565d70c7 100644 --- a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java +++ b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java @@ -113,6 +113,12 @@ public class GoogleGenAiChatOptions implements ToolCallingChatOptions { */ private @JsonProperty("thinkingBudget") Integer thinkingBudget; + /** + * Optional. Response Modalities. + * @see com.google.genai.types.Modality.Known + */ + private @JsonProperty("responseModalities") List responseModalities = new ArrayList<>(); + /** * Collection of {@link ToolCallback}s to be used for tool calling in the chat * completion requests. @@ -174,6 +180,7 @@ public static GoogleGenAiChatOptions fromOptions(GoogleGenAiChatOptions fromOpti options.setToolContext(fromOptions.getToolContext()); options.setThinkingBudget(fromOptions.getThinkingBudget()); options.setLabels(fromOptions.getLabels()); + options.setResponseModalities(fromOptions.getResponseModalities()); return options; } @@ -355,6 +362,15 @@ public void setToolContext(Map toolContext) { this.toolContext = toolContext; } + public List getResponseModalities() { + return responseModalities; + } + + public void setResponseModalities(List responseModalities) { + Assert.notNull(responseModalities, "responseModalities cannot be null"); + this.responseModalities = responseModalities; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -376,7 +392,8 @@ public boolean equals(Object o) { && Objects.equals(this.toolNames, that.toolNames) && Objects.equals(this.safetySettings, that.safetySettings) && Objects.equals(this.internalToolExecutionEnabled, that.internalToolExecutionEnabled) - && Objects.equals(this.toolContext, that.toolContext) && Objects.equals(this.labels, that.labels); + && Objects.equals(this.toolContext, that.toolContext) && Objects.equals(this.labels, that.labels) + && Objects.equals(this.responseModalities, that.responseModalities); } @Override @@ -384,7 +401,8 @@ public int hashCode() { return Objects.hash(this.stopSequences, this.temperature, this.topP, this.topK, this.candidateCount, this.frequencyPenalty, this.presencePenalty, this.thinkingBudget, this.maxOutputTokens, this.model, this.responseMimeType, this.toolCallbacks, this.toolNames, this.googleSearchRetrieval, - this.safetySettings, this.internalToolExecutionEnabled, this.toolContext, this.labels); + this.safetySettings, this.internalToolExecutionEnabled, this.toolContext, this.labels, + this.responseModalities); } @Override @@ -396,7 +414,7 @@ public String toString() { + this.model + '\'' + ", responseMimeType='" + this.responseMimeType + '\'' + ", toolCallbacks=" + this.toolCallbacks + ", toolNames=" + this.toolNames + ", googleSearchRetrieval=" + this.googleSearchRetrieval + ", safetySettings=" + this.safetySettings + ", labels=" + this.labels - + '}'; + + ", responseModalities=" + this.responseModalities + '}'; } @Override @@ -530,6 +548,18 @@ public Builder labels(Map labels) { return this; } + public Builder responseModalities(List responseModalities) { + Assert.notNull(responseModalities, "responseModalities must not be null"); + this.options.responseModalities = responseModalities; + return this; + } + + public Builder responseModalitie(String responseModalitie) { + Assert.hasText(responseModalitie, "responseModalitie must not be empty"); + this.options.responseModalities.add(responseModalitie); + return this; + } + public GoogleGenAiChatOptions build() { return this.options; } diff --git a/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiChatOptionsTest.java b/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiChatOptionsTest.java index 3521213bfb5..3b6b99a68ec 100644 --- a/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiChatOptionsTest.java +++ b/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiChatOptionsTest.java @@ -16,8 +16,10 @@ package org.springframework.ai.google.genai; +import java.util.List; import java.util.Map; +import com.google.genai.types.Modality; import org.junit.jupiter.api.Test; import static org.assertj.core.api.Assertions.assertThat; @@ -153,4 +155,13 @@ public void testToStringWithLabels() { assertThat(toString).contains("test-model"); } + @Test + public void testResponseMultimodality() { + GoogleGenAiChatOptions options = GoogleGenAiChatOptions.builder() + .responseModalities(List.of(Modality.Known.TEXT.name(), Modality.Known.IMAGE.name())) + .build(); + String toString = options.toString(); + assertThat(toString).contains("responseModalities=[TEXT, IMAGE]"); + } + } From e70987422070c28eb1bffca0129e8a7e2f71660c Mon Sep 17 00:00:00 2001 From: ckx521 <1014853731@qq.com> Date: Thu, 4 Sep 2025 12:44:07 +0800 Subject: [PATCH 2/5] apply format Signed-off-by: ckx521 <1014853731@qq.com> --- .../ai/google/genai/GoogleGenAiChatModel.java | 34 +++++++------------ .../google/genai/GoogleGenAiChatOptions.java | 2 +- .../genai/GoogleGenAiChatOptionsTest.java | 4 +-- 3 files changed, 16 insertions(+), 24 deletions(-) diff --git a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java index f64e4590fa8..53be6289824 100644 --- a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java +++ b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java @@ -90,8 +90,8 @@ import org.springframework.retry.support.RetryTemplate; import org.springframework.util.Assert; import org.springframework.util.CollectionUtils; -import org.springframework.util.StringUtils; import org.springframework.util.MimeType; +import org.springframework.util.StringUtils; /** * Google GenAI Chat Model implementation that provides access to Google's Gemini language @@ -622,26 +622,18 @@ protected List responseCandidateToGeneration(Candidate candidate) { return List.of(new Generation(assistantMessage, chatGenerationMetadata)); } else { - return candidate.content() - .get() - .parts() - .orElse(List.of()) - .stream() - .map(part -> { - // Multimodality Response Support - List media = part.inlineData() - .filter(blob -> blob.data().isPresent() && blob.mimeType().isPresent()) - .map(blob -> Media - .builder() - .mimeType(MimeType.valueOf(blob.mimeType().get())) - .data(blob.data().get()) - .build()) - .map(List::of) - .orElse(List.of()); - return new AssistantMessage(part.text().orElse(""), messageMetadata, List.of(), media); - }) - .map(assistantMessage -> new Generation(assistantMessage, chatGenerationMetadata)) - .toList(); + return candidate.content().get().parts().orElse(List.of()).stream().map(part -> { + // Multimodality Response Support + List media = part.inlineData() + .filter(blob -> blob.data().isPresent() && blob.mimeType().isPresent()) + .map(blob -> Media.builder() + .mimeType(MimeType.valueOf(blob.mimeType().get())) + .data(blob.data().get()) + .build()) + .map(List::of) + .orElse(List.of()); + return new AssistantMessage(part.text().orElse(""), messageMetadata, List.of(), media); + }).map(assistantMessage -> new Generation(assistantMessage, chatGenerationMetadata)).toList(); } } diff --git a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java index 464565d70c7..28bc6fe5a7a 100644 --- a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java +++ b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java @@ -363,7 +363,7 @@ public void setToolContext(Map toolContext) { } public List getResponseModalities() { - return responseModalities; + return this.responseModalities; } public void setResponseModalities(List responseModalities) { diff --git a/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiChatOptionsTest.java b/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiChatOptionsTest.java index 3b6b99a68ec..8592a182b7a 100644 --- a/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiChatOptionsTest.java +++ b/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiChatOptionsTest.java @@ -158,8 +158,8 @@ public void testToStringWithLabels() { @Test public void testResponseMultimodality() { GoogleGenAiChatOptions options = GoogleGenAiChatOptions.builder() - .responseModalities(List.of(Modality.Known.TEXT.name(), Modality.Known.IMAGE.name())) - .build(); + .responseModalities(List.of(Modality.Known.TEXT.name(), Modality.Known.IMAGE.name())) + .build(); String toString = options.toString(); assertThat(toString).contains("responseModalities=[TEXT, IMAGE]"); } From 763afeddbf37c6e9ccb411aa5d6081f9b56bf614 Mon Sep 17 00:00:00 2001 From: ckx521 <1014853731@qq.com> Date: Thu, 4 Sep 2025 15:57:05 +0800 Subject: [PATCH 3/5] chat memory message aggregator media support Signed-off-by: ckx521 <1014853731@qq.com> --- .../ai/chat/model/MessageAggregator.java | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/spring-ai-model/src/main/java/org/springframework/ai/chat/model/MessageAggregator.java b/spring-ai-model/src/main/java/org/springframework/ai/chat/model/MessageAggregator.java index 498c35b8d17..6f9120a8a7c 100644 --- a/spring-ai-model/src/main/java/org/springframework/ai/chat/model/MessageAggregator.java +++ b/spring-ai-model/src/main/java/org/springframework/ai/chat/model/MessageAggregator.java @@ -34,6 +34,7 @@ import org.springframework.ai.chat.metadata.PromptMetadata; import org.springframework.ai.chat.metadata.RateLimit; import org.springframework.ai.chat.metadata.Usage; +import org.springframework.ai.content.Media; import org.springframework.util.CollectionUtils; import org.springframework.util.StringUtils; @@ -60,6 +61,7 @@ public Flux aggregate(Flux fluxChatResponse, AtomicReference messageTextContentRef = new AtomicReference<>(new StringBuilder()); AtomicReference> messageMetadataMapRef = new AtomicReference<>(); AtomicReference> toolCallsRef = new AtomicReference<>(new ArrayList<>()); + AtomicReference> mediasRef = new AtomicReference<>(new ArrayList<>()); // ChatGeneration Metadata AtomicReference generationMetadataRef = new AtomicReference<>( @@ -80,6 +82,7 @@ public Flux aggregate(Flux fluxChatResponse, messageTextContentRef.set(new StringBuilder()); messageMetadataMapRef.set(new HashMap<>()); toolCallsRef.set(new ArrayList<>()); + mediasRef.set(new ArrayList<>()); metadataIdRef.set(""); metadataModelRef.set(""); metadataUsagePromptTokensRef.set(0); @@ -105,7 +108,9 @@ public Flux aggregate(Flux fluxChatResponse, if (!CollectionUtils.isEmpty(outputMessage.getToolCalls())) { toolCallsRef.get().addAll(outputMessage.getToolCalls()); } - + if (!CollectionUtils.isEmpty(outputMessage.getMedia())) { + mediasRef.get().addAll(outputMessage.getMedia()); + } } if (chatResponse.getMetadata() != null) { if (chatResponse.getMetadata().getUsage() != null) { @@ -137,6 +142,12 @@ public Flux aggregate(Flux fluxChatResponse, List toolCallsList = (List) toolCallsFromMetadata; toolCallsRef.get().addAll(toolCallsList); } + Object mediasFromMetadata = chatResponse.getMetadata().get("medias"); + if (mediasFromMetadata instanceof List) { + @SuppressWarnings("unchecked") + List mediasList = (List) mediasFromMetadata; + mediasRef.get().addAll(mediasList); + } } }).doOnComplete(() -> { @@ -152,18 +163,12 @@ public Flux aggregate(Flux fluxChatResponse, .promptMetadata(metadataPromptMetadataRef.get()) .build(); - AssistantMessage finalAssistantMessage; List collectedToolCalls = toolCallsRef.get(); + List collectedMedias = mediasRef.get(); - if (!CollectionUtils.isEmpty(collectedToolCalls)) { + AssistantMessage finalAssistantMessage = new AssistantMessage(messageTextContentRef.get().toString(), + messageMetadataMapRef.get(), collectedToolCalls, collectedMedias); - finalAssistantMessage = new AssistantMessage(messageTextContentRef.get().toString(), - messageMetadataMapRef.get(), collectedToolCalls); - } - else { - finalAssistantMessage = new AssistantMessage(messageTextContentRef.get().toString(), - messageMetadataMapRef.get()); - } onAggregationComplete.accept(new ChatResponse(List.of(new Generation(finalAssistantMessage, generationMetadataRef.get())), chatResponseMetadata)); @@ -171,6 +176,7 @@ public Flux aggregate(Flux fluxChatResponse, messageTextContentRef.set(new StringBuilder()); messageMetadataMapRef.set(new HashMap<>()); toolCallsRef.set(new ArrayList<>()); + mediasRef.set(new ArrayList<>()); metadataIdRef.set(""); metadataModelRef.set(""); metadataUsagePromptTokensRef.set(0); From b05864d4119ee91fcb96e631e806918cdd87b86f Mon Sep 17 00:00:00 2001 From: ckx521 <1014853731@qq.com> Date: Thu, 4 Sep 2025 22:07:41 +0800 Subject: [PATCH 4/5] Google genai response empty content bug fix Signed-off-by: ckx521 <1014853731@qq.com> --- .../springframework/ai/google/genai/GoogleGenAiChatModel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java index 53be6289824..7bc334917ba 100644 --- a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java +++ b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java @@ -622,7 +622,7 @@ protected List responseCandidateToGeneration(Candidate candidate) { return List.of(new Generation(assistantMessage, chatGenerationMetadata)); } else { - return candidate.content().get().parts().orElse(List.of()).stream().map(part -> { + return candidate.content().flatMap(Content::parts).orElse(List.of()).stream().map(part -> { // Multimodality Response Support List media = part.inlineData() .filter(blob -> blob.data().isPresent() && blob.mimeType().isPresent()) From e0ad29795e6961ef6be084ef2247035c5d65c54b Mon Sep 17 00:00:00 2001 From: ckx521 <1014853731@qq.com> Date: Mon, 8 Sep 2025 21:54:38 +0800 Subject: [PATCH 5/5] Google genai update sdk to 1.15.0 Signed-off-by: ckx521 <1014853731@qq.com> --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5031dc90eed..9b00b5ea5d5 100644 --- a/pom.xml +++ b/pom.xml @@ -282,7 +282,7 @@ 1.19.2 3.63.1 26.60.0 - 1.10.0 + 1.15.0 9.20.0 4.37.0 2.2.30