diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index 40aeb2b85d7..e45647ba69e 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -65,6 +65,7 @@ package com.google.firebase.ai { } @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenModel { + method public suspend Object? editImage(String prompt, com.google.firebase.ai.type.ImagenEditingConfig config, kotlin.coroutines.Continuation>); method public suspend Object? generateImages(String prompt, kotlin.coroutines.Continuation>); } @@ -104,6 +105,7 @@ package com.google.firebase.ai.java { } @com.google.firebase.ai.type.PublicPreviewAPI public abstract class ImagenModelFutures { + method public abstract com.google.common.util.concurrent.ListenableFuture> editImage(String prompt, com.google.firebase.ai.type.ImagenEditingConfig config); method public static final com.google.firebase.ai.java.ImagenModelFutures from(com.google.firebase.ai.ImagenModel model); method public abstract com.google.common.util.concurrent.ListenableFuture> generateImages(String prompt); method public abstract com.google.firebase.ai.ImagenModel getImageModel(); @@ -484,6 +486,47 @@ package com.google.firebase.ai.type { public static final class ImagenAspectRatio.Companion { } + public final class ImagenEditMode { + field public static final com.google.firebase.ai.type.ImagenEditMode.Companion Companion; + } + + public static final class ImagenEditMode.Companion { + method public com.google.firebase.ai.type.ImagenEditMode getINPAINT_INSERTION(); + method public com.google.firebase.ai.type.ImagenEditMode getINPAINT_REMOVAL(); + method public com.google.firebase.ai.type.ImagenEditMode getOUTPAINT(); + property public final com.google.firebase.ai.type.ImagenEditMode INPAINT_INSERTION; + property public final com.google.firebase.ai.type.ImagenEditMode INPAINT_REMOVAL; + property public final com.google.firebase.ai.type.ImagenEditMode OUTPAINT; + } + + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenEditingConfig { + ctor public ImagenEditingConfig(com.google.firebase.ai.type.ImagenInlineImage image, com.google.firebase.ai.type.ImagenEditMode editMode, com.google.firebase.ai.type.ImagenInlineImage? mask = null, Double? maskDilation = null, Integer? editSteps = null); + field public static final com.google.firebase.ai.type.ImagenEditingConfig.Companion Companion; + } + + public static final class ImagenEditingConfig.Builder { + ctor public ImagenEditingConfig.Builder(); + method public com.google.firebase.ai.type.ImagenEditingConfig build(); + method public com.google.firebase.ai.type.ImagenEditingConfig.Builder setEditMode(com.google.firebase.ai.type.ImagenEditMode editMode); + method public com.google.firebase.ai.type.ImagenEditingConfig.Builder setEditSteps(int editSteps); + method public com.google.firebase.ai.type.ImagenEditingConfig.Builder setImage(com.google.firebase.ai.type.ImagenInlineImage image); + method public com.google.firebase.ai.type.ImagenEditingConfig.Builder setMask(com.google.firebase.ai.type.ImagenInlineImage mask); + method public com.google.firebase.ai.type.ImagenEditingConfig.Builder setMaskDilation(double maskDilation); + field public com.google.firebase.ai.type.ImagenEditMode? editMode; + field public Integer? editSteps; + field public com.google.firebase.ai.type.ImagenInlineImage? image; + field public com.google.firebase.ai.type.ImagenInlineImage? mask; + field public Double? maskDilation; + } + + public static final class ImagenEditingConfig.Companion { + method public com.google.firebase.ai.type.ImagenEditingConfig.Builder builder(); + } + + public final class ImagenEditingConfigKt { + method @com.google.firebase.ai.type.PublicPreviewAPI public static com.google.firebase.ai.type.ImagenEditingConfig imagenEditingConfig(kotlin.jvm.functions.Function1 init); + } + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenGenerationConfig { ctor public ImagenGenerationConfig(String? negativePrompt = null, Integer? numberOfImages = 1, com.google.firebase.ai.type.ImagenAspectRatio? aspectRatio = null, com.google.firebase.ai.type.ImagenImageFormat? imageFormat = null, Boolean? addWatermark = null); method public Boolean? getAddWatermark(); @@ -552,6 +595,10 @@ package com.google.firebase.ai.type { property public final String mimeType; } + public final class ImagenInlineImageKt { + method @com.google.firebase.ai.type.PublicPreviewAPI public static com.google.firebase.ai.type.ImagenInlineImage toImagenInlineImage(android.graphics.Bitmap); + } + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenPersonFilterLevel { field public static final com.google.firebase.ai.type.ImagenPersonFilterLevel ALLOW_ADULT; field public static final com.google.firebase.ai.type.ImagenPersonFilterLevel ALLOW_ALL; diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/ImagenModel.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/ImagenModel.kt index 4d88d09b1e1..6e7d9cd6f35 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/ImagenModel.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/ImagenModel.kt @@ -22,6 +22,7 @@ import com.google.firebase.ai.common.AppCheckHeaderProvider import com.google.firebase.ai.common.ContentBlockedException import com.google.firebase.ai.common.GenerateImageRequest import com.google.firebase.ai.type.FirebaseAIException +import com.google.firebase.ai.type.ImagenEditingConfig import com.google.firebase.ai.type.ImagenGenerationConfig import com.google.firebase.ai.type.ImagenGenerationResponse import com.google.firebase.ai.type.ImagenInlineImage @@ -75,30 +76,92 @@ internal constructor( public suspend fun generateImages(prompt: String): ImagenGenerationResponse = try { controller - .generateImage(constructRequest(prompt, null, generationConfig)) + .generateImage(constructGenerateImageRequest(prompt, generationConfig)) .validate() .toPublicInline() } catch (e: Throwable) { throw FirebaseAIException.from(e) } - private fun constructRequest( + public suspend fun editImage( prompt: String, - gcsUri: String?, - config: ImagenGenerationConfig?, + config: ImagenEditingConfig + ): ImagenGenerationResponse = + try { + controller.generateImage(constructEditRequest(prompt, config)).validate().toPublicInline() + } catch (e: Throwable) { + throw FirebaseAIException.from(e) + } + + private fun constructGenerateImageRequest( + prompt: String, + generationConfig: ImagenGenerationConfig? = null, ): GenerateImageRequest { return GenerateImageRequest( listOf(GenerateImageRequest.ImagenPrompt(prompt)), GenerateImageRequest.ImagenParameters( - sampleCount = config?.numberOfImages ?: 1, + sampleCount = generationConfig?.numberOfImages ?: 1, + includeRaiReason = true, + addWatermark = generationConfig?.addWatermark, + personGeneration = safetySettings?.personFilterLevel?.internalVal, + negativePrompt = generationConfig?.negativePrompt, + safetySetting = safetySettings?.safetyFilterLevel?.internalVal, + storageUri = null, + aspectRatio = generationConfig?.aspectRatio?.internalVal, + imageOutputOptions = generationConfig?.imageFormat?.toInternal(), + editMode = null, + editConfig = null + ), + ) + } + + private fun constructEditRequest( + prompt: String, + editConfig: ImagenEditingConfig, + ): GenerateImageRequest { + return GenerateImageRequest( + listOf( + GenerateImageRequest.ImagenPrompt( + prompt = prompt, + referenceImages = + buildList { + add( + GenerateImageRequest.ReferenceImage( + referenceType = GenerateImageRequest.ReferenceType.RAW, + referenceId = 1, + referenceImage = editConfig.image.toInternal(), + maskImageConfig = null + ) + ) + if (editConfig.mask != null) { + add( + GenerateImageRequest.ReferenceImage( + referenceType = GenerateImageRequest.ReferenceType.MASK, + referenceId = 2, + referenceImage = editConfig.mask.toInternal(), + maskImageConfig = + GenerateImageRequest.MaskImageConfig( + maskMode = GenerateImageRequest.MaskMode.USER_PROVIDED, + dilation = editConfig.maskDilation + ) + ) + ) + } + } + ) + ), + GenerateImageRequest.ImagenParameters( + sampleCount = generationConfig?.numberOfImages ?: 1, includeRaiReason = true, addWatermark = generationConfig?.addWatermark, personGeneration = safetySettings?.personFilterLevel?.internalVal, - negativePrompt = config?.negativePrompt, + negativePrompt = generationConfig?.negativePrompt, safetySetting = safetySettings?.safetyFilterLevel?.internalVal, - storageUri = gcsUri, - aspectRatio = config?.aspectRatio?.internalVal, + storageUri = null, + aspectRatio = generationConfig?.aspectRatio?.internalVal, imageOutputOptions = generationConfig?.imageFormat?.toInternal(), + editMode = editConfig.editMode.value, + editConfig = editConfig.toInternal() ), ) } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/Request.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/Request.kt index ebc3db7f282..e34eda4a31f 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/Request.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/Request.kt @@ -21,7 +21,9 @@ import com.google.firebase.ai.common.util.fullModelName import com.google.firebase.ai.common.util.trimmedModelName import com.google.firebase.ai.type.Content import com.google.firebase.ai.type.GenerationConfig +import com.google.firebase.ai.type.ImagenEditingConfig import com.google.firebase.ai.type.ImagenImageFormat +import com.google.firebase.ai.type.ImagenInlineImage import com.google.firebase.ai.type.PublicPreviewAPI import com.google.firebase.ai.type.SafetySetting import com.google.firebase.ai.type.Tool @@ -75,11 +77,17 @@ internal data class CountTokensRequest( } @Serializable +@PublicPreviewAPI internal data class GenerateImageRequest( val instances: List, val parameters: ImagenParameters, ) : Request { - @Serializable internal data class ImagenPrompt(val prompt: String) + @Serializable + internal data class ImagenPrompt( + val prompt: String? = null, + val image: ImagenInlineImage.Internal? = null, + val referenceImages: List? = null + ) @OptIn(PublicPreviewAPI::class) @Serializable @@ -93,5 +101,38 @@ internal data class GenerateImageRequest( val personGeneration: String?, val addWatermark: Boolean?, val imageOutputOptions: ImagenImageFormat.Internal?, + val editMode: String?, + val editConfig: ImagenEditingConfig.Internal?, + ) + + @Serializable + internal enum class ReferenceType { + @SerialName("REFERENCE_TYPE_UNSPECIFIED") UNSPECIFIED, + @SerialName("REFERENCE_TYPE_RAW") RAW, + @SerialName("REFERENCE_TYPE_MASK") MASK, + @SerialName("REFERENCE_TYPE_CONTROL") CONTROL, + @SerialName("REFERENCE_TYPE_STYLE") STYLE, + @SerialName("REFERENCE_TYPE_SUBJECT") SUBJECT, + @SerialName("REFERENCE_TYPE_MASKED_SUBJECT") MASKED_SUBJECT, + @SerialName("REFERENCE_TYPE_PRODUCT") PRODUCT + } + + @Serializable + internal enum class MaskMode { + @SerialName("MASK_MODE_DEFAULT") DEFAULT, + @SerialName("MASK_MODE_USER_PROVIDED") USER_PROVIDED, + @SerialName("MASK_MODE_BACKGROUND") BACKGROUND, + @SerialName("MASK_MODE_FOREGROUND") FOREGROUND, + @SerialName("MASK_MODE_SEMANTIC") SEMANTIC + } + + @Serializable internal data class MaskImageConfig(val maskMode: MaskMode, val dilation: Double?) + + @Serializable + internal data class ReferenceImage( + val referenceType: ReferenceType, + val referenceId: Int, + val referenceImage: ImagenInlineImage.Internal, + val maskImageConfig: MaskImageConfig? ) } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/ImagenModelFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/ImagenModelFutures.kt index 99d42d32732..e2703b71b01 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/ImagenModelFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/ImagenModelFutures.kt @@ -19,6 +19,7 @@ package com.google.firebase.ai.java import androidx.concurrent.futures.SuspendToFutureAdapter import com.google.common.util.concurrent.ListenableFuture import com.google.firebase.ai.ImagenModel +import com.google.firebase.ai.type.ImagenEditingConfig import com.google.firebase.ai.type.ImagenGenerationResponse import com.google.firebase.ai.type.ImagenInlineImage import com.google.firebase.ai.type.PublicPreviewAPI @@ -39,6 +40,11 @@ public abstract class ImagenModelFutures internal constructor() { prompt: String, ): ListenableFuture> + public abstract fun editImage( + prompt: String, + config: ImagenEditingConfig + ): ListenableFuture> + /** Returns the [ImagenModel] object wrapped by this object. */ public abstract fun getImageModel(): ImagenModel @@ -48,6 +54,12 @@ public abstract class ImagenModelFutures internal constructor() { ): ListenableFuture> = SuspendToFutureAdapter.launchFuture { model.generateImages(prompt) } + override fun editImage( + prompt: String, + config: ImagenEditingConfig + ): ListenableFuture> = + SuspendToFutureAdapter.launchFuture { model.editImage(prompt, config) } + override fun getImageModel(): ImagenModel = model } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditMode.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditMode.kt new file mode 100644 index 00000000000..339ac440ea1 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditMode.kt @@ -0,0 +1,10 @@ +package com.google.firebase.ai.type + +public class ImagenEditMode private constructor(internal val value: String) { + + public companion object { + public val INPAINT_INSERTION: ImagenEditMode = ImagenEditMode("EDIT_MODE_INPAINT_INSERTION") + public val INPAINT_REMOVAL: ImagenEditMode = ImagenEditMode("EDIT_MODE_INPAINT_REMOVAL") + public val OUTPAINT: ImagenEditMode = ImagenEditMode("EDIT_MODE_OUTPAINT") + } +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditingConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditingConfig.kt new file mode 100644 index 00000000000..507d0d85704 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditingConfig.kt @@ -0,0 +1,68 @@ +package com.google.firebase.ai.type + +import kotlinx.serialization.Serializable + +@PublicPreviewAPI +public class ImagenEditingConfig( + internal val image: ImagenInlineImage, + internal val editMode: ImagenEditMode, + internal val mask: ImagenInlineImage? = null, + internal val maskDilation: Double? = null, + internal val editSteps: Int? = null, +) { + public companion object { + public fun builder(): Builder = Builder() + } + + public class Builder { + @JvmField public var image: ImagenInlineImage? = null + @JvmField public var editMode: ImagenEditMode? = null + @JvmField public var mask: ImagenInlineImage? = null + @JvmField public var maskDilation: Double? = null + @JvmField public var editSteps: Int? = null + + public fun setImage(image: ImagenInlineImage): Builder = apply { this.image = image } + + public fun setEditMode(editMode: ImagenEditMode): Builder = apply { this.editMode = editMode } + + public fun setMask(mask: ImagenInlineImage): Builder = apply { this.mask = mask } + + public fun setMaskDilation(maskDilation: Double): Builder = apply { + this.maskDilation = maskDilation + } + + public fun setEditSteps(editSteps: Int): Builder = apply { this.editSteps = editSteps } + + public fun build(): ImagenEditingConfig { + if (image == null) { + throw IllegalStateException("ImagenEditingConfig must contain an image") + } + if (editMode == null) { + throw IllegalStateException("ImagenEditingConfig must contain an editMode") + } + return ImagenEditingConfig( + image = image!!, + editMode = editMode!!, + mask = mask, + maskDilation = maskDilation, + editSteps = editSteps, + ) + } + } + + internal fun toInternal(): Internal { + return Internal(baseSteps = editSteps) + } + + @Serializable + internal data class Internal( + val baseSteps: Int?, + ) +} + +@PublicPreviewAPI +public fun imagenEditingConfig(init: ImagenEditingConfig.Builder.() -> Unit): ImagenEditingConfig { + val builder = ImagenEditingConfig.builder() + builder.init() + return builder.build() +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenInlineImage.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenInlineImage.kt index 5fa1d0e183b..be09cb69c73 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenInlineImage.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenInlineImage.kt @@ -18,6 +18,9 @@ package com.google.firebase.ai.type import android.graphics.Bitmap import android.graphics.BitmapFactory +import android.util.Base64 +import java.io.ByteArrayOutputStream +import kotlinx.serialization.Serializable /** * Represents an Imagen-generated image that is returned as inline data. @@ -36,4 +39,19 @@ internal constructor(public val data: ByteArray, public val mimeType: String) { public fun asBitmap(): Bitmap { return BitmapFactory.decodeByteArray(data, 0, data.size) } + + @Serializable internal data class Internal(val bytesBase64Encoded: String) + + internal fun toInternal(): Internal { + val base64 = Base64.encodeToString(data, Base64.NO_WRAP) + return Internal(base64) + } +} + +@PublicPreviewAPI +public fun Bitmap.toImagenInlineImage(): ImagenInlineImage { + val byteArrayOutputStream = ByteArrayOutputStream() + this.compress(Bitmap.CompressFormat.PNG, 100, byteArrayOutputStream) + val byteArray = byteArrayOutputStream.toByteArray() + return ImagenInlineImage(data = byteArray, mimeType = "image/png") }