diff --git a/firebase-ai/CHANGELOG.md b/firebase-ai/CHANGELOG.md index 36e2d4f3309..7f86ca48d2b 100644 --- a/firebase-ai/CHANGELOG.md +++ b/firebase-ai/CHANGELOG.md @@ -1,5 +1,6 @@ # Unreleased - +* [feature] added support for Imagen Editing, including inpainting, outpainting, control, style + transfer, and subject references (#7075) # 17.0.0 * [feature] Added support for configuring the "thinking" budget when using Gemini diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt index 70b35587515..b979c361b52 100644 --- a/firebase-ai/api.txt +++ b/firebase-ai/api.txt @@ -68,7 +68,10 @@ package com.google.firebase.ai { } @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenModel { + method public suspend Object? editImage(java.util.List referenceImages, String prompt, com.google.firebase.ai.type.ImagenEditingConfig? config = null, kotlin.coroutines.Continuation>); method public suspend Object? generateImages(String prompt, kotlin.coroutines.Continuation>); + method public suspend Object? inpaintImage(com.google.firebase.ai.type.ImagenInlineImage image, String prompt, com.google.firebase.ai.type.ImagenMaskReference mask, com.google.firebase.ai.type.ImagenEditingConfig config, kotlin.coroutines.Continuation>); + method public suspend Object? outpaintImage(com.google.firebase.ai.type.ImagenInlineImage image, com.google.firebase.ai.type.Dimensions newDimensions, com.google.firebase.ai.type.ImagenImagePlacement newPosition = com.google.firebase.ai.type.ImagenImagePlacement.CENTER, String prompt = "", com.google.firebase.ai.type.ImagenEditingConfig? config = null, kotlin.coroutines.Continuation>); } @com.google.firebase.ai.type.PublicPreviewAPI public final class LiveGenerativeModel { @@ -107,9 +110,13 @@ package com.google.firebase.ai.java { } @com.google.firebase.ai.type.PublicPreviewAPI public abstract class ImagenModelFutures { + method public abstract com.google.common.util.concurrent.ListenableFuture> editImage(java.util.List referenceImages, String prompt); + method public abstract com.google.common.util.concurrent.ListenableFuture> editImage(java.util.List referenceImages, String prompt, com.google.firebase.ai.type.ImagenEditingConfig? config = null); method public static final com.google.firebase.ai.java.ImagenModelFutures from(com.google.firebase.ai.ImagenModel model); method public abstract com.google.common.util.concurrent.ListenableFuture> generateImages(String prompt); method public abstract com.google.firebase.ai.ImagenModel getImageModel(); + method public abstract com.google.common.util.concurrent.ListenableFuture> inpaintImage(com.google.firebase.ai.type.ImagenInlineImage image, String prompt, com.google.firebase.ai.type.ImagenMaskReference mask, com.google.firebase.ai.type.ImagenEditingConfig config); + method public abstract com.google.common.util.concurrent.ListenableFuture> outpaintImage(com.google.firebase.ai.type.ImagenInlineImage image, com.google.firebase.ai.type.Dimensions newDimensions, com.google.firebase.ai.type.ImagenImagePlacement newPosition = com.google.firebase.ai.type.ImagenImagePlacement.CENTER, String prompt = "", com.google.firebase.ai.type.ImagenEditingConfig? config = null); field public static final com.google.firebase.ai.java.ImagenModelFutures.Companion Companion; } @@ -262,6 +269,14 @@ package com.google.firebase.ai.type { property public final int totalTokens; } + public final class Dimensions { + ctor public Dimensions(int width, int height); + method public int getHeight(); + method public int getWidth(); + property public final int height; + property public final int width; + } + public final class FileDataPart implements com.google.firebase.ai.type.Part { ctor public FileDataPart(String uri, String mimeType); method public String getMimeType(); @@ -533,6 +548,43 @@ package com.google.firebase.ai.type { public static final class ImagenAspectRatio.Companion { } + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenBackgroundMask extends com.google.firebase.ai.type.ImagenMaskReference { + ctor public ImagenBackgroundMask(Double? dilation = null); + } + + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenControlReference extends com.google.firebase.ai.type.ImagenReferenceImage { + ctor public ImagenControlReference(com.google.firebase.ai.type.ImagenControlType type, com.google.firebase.ai.type.ImagenInlineImage? image = null, Integer? referenceId = null, Boolean? enableComputation = null, Integer? superpixelRegionSize = null, Integer? superpixelRuler = null); + } + + public final class ImagenControlType { + field public static final com.google.firebase.ai.type.ImagenControlType CANNY; + field public static final com.google.firebase.ai.type.ImagenControlType COLOR_SUPERPIXEL; + field public static final com.google.firebase.ai.type.ImagenControlType.Companion Companion; + field public static final com.google.firebase.ai.type.ImagenControlType FACE_MESH; + field public static final com.google.firebase.ai.type.ImagenControlType SCRIBBLE; + } + + public static final class ImagenControlType.Companion { + } + + public final class ImagenEditMode { + field public static final com.google.firebase.ai.type.ImagenEditMode.Companion Companion; + field public static final com.google.firebase.ai.type.ImagenEditMode INPAINT_INSERTION; + field public static final com.google.firebase.ai.type.ImagenEditMode INPAINT_REMOVAL; + field public static final com.google.firebase.ai.type.ImagenEditMode OUTPAINT; + } + + public static final class ImagenEditMode.Companion { + } + + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenEditingConfig { + ctor public ImagenEditingConfig(com.google.firebase.ai.type.ImagenEditMode? editMode = null, Integer? editSteps = null); + } + + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenForegroundMask extends com.google.firebase.ai.type.ImagenMaskReference { + ctor public ImagenForegroundMask(Double? dilation = null); + } + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenGenerationConfig { ctor public ImagenGenerationConfig(String? negativePrompt = null, Integer? numberOfImages = 1, com.google.firebase.ai.type.ImagenAspectRatio? aspectRatio = null, com.google.firebase.ai.type.ImagenImageFormat? imageFormat = null, Boolean? addWatermark = null); method public Boolean? getAddWatermark(); @@ -593,6 +645,28 @@ package com.google.firebase.ai.type { method public com.google.firebase.ai.type.ImagenImageFormat png(); } + public final class ImagenImagePlacement { + method public static com.google.firebase.ai.type.ImagenImagePlacement fromCoordinate(int x, int y); + method public Integer? getX(); + method public Integer? getY(); + property public final Integer? x; + property public final Integer? y; + field public static final com.google.firebase.ai.type.ImagenImagePlacement BOTTOM_CENTER; + field public static final com.google.firebase.ai.type.ImagenImagePlacement BOTTOM_LEFT; + field public static final com.google.firebase.ai.type.ImagenImagePlacement BOTTOM_RIGHT; + field public static final com.google.firebase.ai.type.ImagenImagePlacement CENTER; + field public static final com.google.firebase.ai.type.ImagenImagePlacement.Companion Companion; + field public static final com.google.firebase.ai.type.ImagenImagePlacement LEFT_CENTER; + field public static final com.google.firebase.ai.type.ImagenImagePlacement RIGHT_CENTER; + field public static final com.google.firebase.ai.type.ImagenImagePlacement TOP_CENTER; + field public static final com.google.firebase.ai.type.ImagenImagePlacement TOP_LEFT; + field public static final com.google.firebase.ai.type.ImagenImagePlacement TOP_RIGHT; + } + + public static final class ImagenImagePlacement.Companion { + method public com.google.firebase.ai.type.ImagenImagePlacement fromCoordinate(int x, int y); + } + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenInlineImage { method public android.graphics.Bitmap asBitmap(); method public byte[] getData(); @@ -601,6 +675,21 @@ package com.google.firebase.ai.type { property public final String mimeType; } + public final class ImagenInlineImageKt { + method @com.google.firebase.ai.type.PublicPreviewAPI public static com.google.firebase.ai.type.ImagenInlineImage toImagenInlineImage(android.graphics.Bitmap); + } + + @com.google.firebase.ai.type.PublicPreviewAPI public abstract class ImagenMaskReference extends com.google.firebase.ai.type.ImagenReferenceImage { + method public static final java.util.List generateMaskAndPadForOutpainting(com.google.firebase.ai.type.ImagenInlineImage image, com.google.firebase.ai.type.Dimensions newDimensions); + method public static final java.util.List generateMaskAndPadForOutpainting(com.google.firebase.ai.type.ImagenInlineImage image, com.google.firebase.ai.type.Dimensions newDimensions, com.google.firebase.ai.type.ImagenImagePlacement newPosition = com.google.firebase.ai.type.ImagenImagePlacement.CENTER); + field public static final com.google.firebase.ai.type.ImagenMaskReference.Companion Companion; + } + + public static final class ImagenMaskReference.Companion { + method public java.util.List generateMaskAndPadForOutpainting(com.google.firebase.ai.type.ImagenInlineImage image, com.google.firebase.ai.type.Dimensions newDimensions); + method public java.util.List generateMaskAndPadForOutpainting(com.google.firebase.ai.type.ImagenInlineImage image, com.google.firebase.ai.type.Dimensions newDimensions, com.google.firebase.ai.type.ImagenImagePlacement newPosition = com.google.firebase.ai.type.ImagenImagePlacement.CENTER); + } + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenPersonFilterLevel { field public static final com.google.firebase.ai.type.ImagenPersonFilterLevel ALLOW_ADULT; field public static final com.google.firebase.ai.type.ImagenPersonFilterLevel ALLOW_ALL; @@ -611,6 +700,21 @@ package com.google.firebase.ai.type { public static final class ImagenPersonFilterLevel.Companion { } + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenRawImage extends com.google.firebase.ai.type.ImagenReferenceImage { + ctor public ImagenRawImage(com.google.firebase.ai.type.ImagenInlineImage image); + } + + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenRawMask extends com.google.firebase.ai.type.ImagenMaskReference { + ctor public ImagenRawMask(com.google.firebase.ai.type.ImagenInlineImage mask, Double? dilation = null); + } + + @com.google.firebase.ai.type.PublicPreviewAPI public abstract class ImagenReferenceImage { + method public final com.google.firebase.ai.type.ImagenInlineImage? getImage(); + method public final Integer? getReferenceId(); + property public final com.google.firebase.ai.type.ImagenInlineImage? image; + property public final Integer? referenceId; + } + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenSafetyFilterLevel { field public static final com.google.firebase.ai.type.ImagenSafetyFilterLevel BLOCK_LOW_AND_ABOVE; field public static final com.google.firebase.ai.type.ImagenSafetyFilterLevel BLOCK_MEDIUM_AND_ABOVE; @@ -626,6 +730,28 @@ package com.google.firebase.ai.type { ctor public ImagenSafetySettings(com.google.firebase.ai.type.ImagenSafetyFilterLevel safetyFilterLevel, com.google.firebase.ai.type.ImagenPersonFilterLevel personFilterLevel); } + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenSemanticMask extends com.google.firebase.ai.type.ImagenMaskReference { + ctor public ImagenSemanticMask(java.util.List classes, Double? dilation = null); + } + + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenStyleReference extends com.google.firebase.ai.type.ImagenReferenceImage { + ctor public ImagenStyleReference(com.google.firebase.ai.type.ImagenInlineImage image, Integer? referenceId = null, String? description = null); + } + + @com.google.firebase.ai.type.PublicPreviewAPI public final class ImagenSubjectReference extends com.google.firebase.ai.type.ImagenReferenceImage { + ctor public ImagenSubjectReference(com.google.firebase.ai.type.ImagenInlineImage image, Integer? referenceId = null, String? description = null, com.google.firebase.ai.type.ImagenSubjectReferenceType? subjectType = null); + } + + public final class ImagenSubjectReferenceType { + field public static final com.google.firebase.ai.type.ImagenSubjectReferenceType ANIMAL; + field public static final com.google.firebase.ai.type.ImagenSubjectReferenceType.Companion Companion; + field public static final com.google.firebase.ai.type.ImagenSubjectReferenceType PERSON; + field public static final com.google.firebase.ai.type.ImagenSubjectReferenceType PRODUCT; + } + + public static final class ImagenSubjectReferenceType.Companion { + } + public final class InlineDataPart implements com.google.firebase.ai.type.Part { ctor public InlineDataPart(byte[] inlineData, String mimeType); method public byte[] getInlineData(); diff --git a/firebase-ai/gradle.properties b/firebase-ai/gradle.properties index ca9f42eb826..40274ac5e9a 100644 --- a/firebase-ai/gradle.properties +++ b/firebase-ai/gradle.properties @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -version=17.0.1 +version=17.1.0 latestReleasedVersion=17.0.0 diff --git a/firebase-ai/src/androidTest/kotlin/com/google/firebase/ai/ImagenTests.kt b/firebase-ai/src/androidTest/kotlin/com/google/firebase/ai/ImagenTests.kt new file mode 100644 index 00000000000..dbc35a71dad --- /dev/null +++ b/firebase-ai/src/androidTest/kotlin/com/google/firebase/ai/ImagenTests.kt @@ -0,0 +1,45 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai + +import com.google.firebase.ai.AIModels.Companion.app +import com.google.firebase.ai.type.ImagenBackgroundMask +import com.google.firebase.ai.type.ImagenEditMode +import com.google.firebase.ai.type.ImagenEditingConfig +import com.google.firebase.ai.type.ImagenRawImage +import com.google.firebase.ai.type.PublicPreviewAPI +import kotlinx.coroutines.runBlocking +import org.junit.Test + +@OptIn(PublicPreviewAPI::class) +class ImagenTests { + @Test + fun testGenerateAndEditImage() { + val imageGenerationModel = FirebaseAI.getInstance(app()).imagenModel("imagen-3.0-generate-002") + val imageEditingModel = FirebaseAI.getInstance(app()).imagenModel("imagen-3.0-capability-001") + + runBlocking { + val catImage = imageGenerationModel.generateImages("A cat").images.first() + val editedCatImage = + imageEditingModel.editImage( + listOf(ImagenRawImage(catImage), ImagenBackgroundMask()), + "A cat flying through space", + ImagenEditingConfig(ImagenEditMode.INPAINT_INSERTION) + ) + assert(editedCatImage.images.size == 1) + } + } +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/ImagenModel.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/ImagenModel.kt index 4d88d09b1e1..399da4c77cf 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/ImagenModel.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/ImagenModel.kt @@ -21,10 +21,17 @@ import com.google.firebase.ai.common.APIController import com.google.firebase.ai.common.AppCheckHeaderProvider import com.google.firebase.ai.common.ContentBlockedException import com.google.firebase.ai.common.GenerateImageRequest +import com.google.firebase.ai.type.Dimensions import com.google.firebase.ai.type.FirebaseAIException +import com.google.firebase.ai.type.ImagenEditMode +import com.google.firebase.ai.type.ImagenEditingConfig import com.google.firebase.ai.type.ImagenGenerationConfig import com.google.firebase.ai.type.ImagenGenerationResponse +import com.google.firebase.ai.type.ImagenImagePlacement import com.google.firebase.ai.type.ImagenInlineImage +import com.google.firebase.ai.type.ImagenMaskReference +import com.google.firebase.ai.type.ImagenRawImage +import com.google.firebase.ai.type.ImagenReferenceImage import com.google.firebase.ai.type.ImagenSafetySettings import com.google.firebase.ai.type.PublicPreviewAPI import com.google.firebase.ai.type.RequestOptions @@ -75,30 +82,130 @@ internal constructor( public suspend fun generateImages(prompt: String): ImagenGenerationResponse = try { controller - .generateImage(constructRequest(prompt, null, generationConfig)) + .generateImage(constructGenerateImageRequest(prompt, generationConfig)) .validate() .toPublicInline() } catch (e: Throwable) { throw FirebaseAIException.from(e) } - private fun constructRequest( + /** + * Generates an image from a single or set of base images, returning the result directly to the + * caller. + * + * @param referenceImages the image inputs given to the model as a prompt + * @param prompt the text input given to the model as a prompt + * @param config the editing configuration settings + */ + public suspend fun editImage( + referenceImages: List, + prompt: String, + config: ImagenEditingConfig? = null, + ): ImagenGenerationResponse = + try { + controller + .generateImage(constructEditRequest(referenceImages, prompt, config)) + .validate() + .toPublicInline() + } catch (e: Throwable) { + throw FirebaseAIException.from(e) + } + + /** + * Generates an image by inpainting a masked off part of a base image. Inpainting is the process + * of filling in missing or masked off parts of the image using context from the original image + * and prompt. + * + * @param image the base image + * @param prompt the text input given to the model as a prompt + * @param mask the mask which defines where in the image can be painted by Imagen. + * @param config the editing configuration settings, it should include an [ImagenEditMode] + */ + public suspend fun inpaintImage( + image: ImagenInlineImage, prompt: String, - gcsUri: String?, - config: ImagenGenerationConfig?, + mask: ImagenMaskReference, + config: ImagenEditingConfig, + ): ImagenGenerationResponse { + return editImage(listOf(ImagenRawImage(image), mask), prompt, config) + } + + /** + * Generates an image by outpainting the given image, extending its content beyond the original + * borders using context from the original image, and optionally, the prompt. + * + * @param image the base image + * @param newDimensions the new dimensions for the image, *must* be larger than the original + * image. + * @param newPosition the placement of the base image within the new image. This can either be + * coordinates (0,0 is the top left corner) or an alignment (ex: + * [ImagenImagePlacement.BOTTOM_CENTER]) + * @param prompt optional, can be used to specify the background generated if context is + * insufficient + * @param config the editing configuration settings + * @see [ImagenMaskReference.generateMaskAndPadForOutpainting] + */ + public suspend fun outpaintImage( + image: ImagenInlineImage, + newDimensions: Dimensions, + newPosition: ImagenImagePlacement = ImagenImagePlacement.CENTER, + prompt: String = "", + config: ImagenEditingConfig? = null, + ): ImagenGenerationResponse { + return editImage( + ImagenMaskReference.generateMaskAndPadForOutpainting(image, newDimensions, newPosition), + prompt, + ImagenEditingConfig(ImagenEditMode.OUTPAINT, config?.editSteps) + ) + } + + private fun constructGenerateImageRequest( + prompt: String, + generationConfig: ImagenGenerationConfig? = null, ): GenerateImageRequest { return GenerateImageRequest( - listOf(GenerateImageRequest.ImagenPrompt(prompt)), + listOf(GenerateImageRequest.ImagenPrompt(prompt, null)), + GenerateImageRequest.ImagenParameters( + sampleCount = generationConfig?.numberOfImages ?: 1, + includeRaiReason = true, + addWatermark = generationConfig?.addWatermark, + personGeneration = safetySettings?.personFilterLevel?.internalVal, + negativePrompt = generationConfig?.negativePrompt, + safetySetting = safetySettings?.safetyFilterLevel?.internalVal, + storageUri = null, + aspectRatio = generationConfig?.aspectRatio?.internalVal, + imageOutputOptions = generationConfig?.imageFormat?.toInternal(), + editMode = null, + editConfig = null, + ), + ) + } + + private fun constructEditRequest( + referenceImages: List, + prompt: String, + editConfig: ImagenEditingConfig?, + ): GenerateImageRequest { + var maxRefId = referenceImages.mapNotNull { it.referenceId }.maxOrNull() ?: 1 + return GenerateImageRequest( + listOf( + GenerateImageRequest.ImagenPrompt( + prompt = prompt, + referenceImages = referenceImages.map { it.toInternal(++maxRefId) }, + ) + ), GenerateImageRequest.ImagenParameters( - sampleCount = config?.numberOfImages ?: 1, + sampleCount = generationConfig?.numberOfImages ?: 1, includeRaiReason = true, addWatermark = generationConfig?.addWatermark, personGeneration = safetySettings?.personFilterLevel?.internalVal, - negativePrompt = config?.negativePrompt, + negativePrompt = generationConfig?.negativePrompt, safetySetting = safetySettings?.safetyFilterLevel?.internalVal, - storageUri = gcsUri, - aspectRatio = config?.aspectRatio?.internalVal, + storageUri = null, + aspectRatio = generationConfig?.aspectRatio?.internalVal, imageOutputOptions = generationConfig?.imageFormat?.toInternal(), + editMode = editConfig?.editMode?.value, + editConfig = editConfig?.toInternal(), ), ) } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/Request.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/Request.kt index ebc3db7f282..3f8e0ae079d 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/Request.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/common/Request.kt @@ -21,7 +21,9 @@ import com.google.firebase.ai.common.util.fullModelName import com.google.firebase.ai.common.util.trimmedModelName import com.google.firebase.ai.type.Content import com.google.firebase.ai.type.GenerationConfig +import com.google.firebase.ai.type.ImagenEditingConfig import com.google.firebase.ai.type.ImagenImageFormat +import com.google.firebase.ai.type.ImagenReferenceImage import com.google.firebase.ai.type.PublicPreviewAPI import com.google.firebase.ai.type.SafetySetting import com.google.firebase.ai.type.Tool @@ -75,11 +77,16 @@ internal data class CountTokensRequest( } @Serializable +@PublicPreviewAPI internal data class GenerateImageRequest( val instances: List, val parameters: ImagenParameters, ) : Request { - @Serializable internal data class ImagenPrompt(val prompt: String) + @Serializable + internal data class ImagenPrompt( + val prompt: String?, + val referenceImages: List? + ) @OptIn(PublicPreviewAPI::class) @Serializable @@ -93,5 +100,19 @@ internal data class GenerateImageRequest( val personGeneration: String?, val addWatermark: Boolean?, val imageOutputOptions: ImagenImageFormat.Internal?, + val editMode: String?, + val editConfig: ImagenEditingConfig.Internal?, ) + + @Serializable + internal enum class ReferenceType { + @SerialName("REFERENCE_TYPE_UNSPECIFIED") UNSPECIFIED, + @SerialName("REFERENCE_TYPE_RAW") RAW, + @SerialName("REFERENCE_TYPE_MASK") MASK, + @SerialName("REFERENCE_TYPE_CONTROL") CONTROL, + @SerialName("REFERENCE_TYPE_STYLE") STYLE, + @SerialName("REFERENCE_TYPE_SUBJECT") SUBJECT, + @SerialName("REFERENCE_TYPE_MASKED_SUBJECT") MASKED_SUBJECT, + @SerialName("REFERENCE_TYPE_PRODUCT") PRODUCT + } } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/ImagenModelFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/ImagenModelFutures.kt index 99d42d32732..2f0299da406 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/ImagenModelFutures.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/ImagenModelFutures.kt @@ -19,8 +19,14 @@ package com.google.firebase.ai.java import androidx.concurrent.futures.SuspendToFutureAdapter import com.google.common.util.concurrent.ListenableFuture import com.google.firebase.ai.ImagenModel +import com.google.firebase.ai.type.Dimensions +import com.google.firebase.ai.type.ImagenEditMode +import com.google.firebase.ai.type.ImagenEditingConfig import com.google.firebase.ai.type.ImagenGenerationResponse +import com.google.firebase.ai.type.ImagenImagePlacement import com.google.firebase.ai.type.ImagenInlineImage +import com.google.firebase.ai.type.ImagenMaskReference +import com.google.firebase.ai.type.ImagenReferenceImage import com.google.firebase.ai.type.PublicPreviewAPI /** @@ -39,6 +45,69 @@ public abstract class ImagenModelFutures internal constructor() { prompt: String, ): ListenableFuture> + /** + * Generates an image from a single or set of base images, returning the result directly to the + * caller. + * + * @param prompt the text input given to the model as a prompt + * @param referenceImages the image inputs given to the model as a prompt + * @param config the editing configuration settings + */ + public abstract fun editImage( + referenceImages: List, + prompt: String, + config: ImagenEditingConfig? = null + ): ListenableFuture> + + /** + * Generates an image from a single or set of base images, returning the result directly to the + * caller. + * + * @param prompt the text input given to the model as a prompt + * @param referenceImages the image inputs given to the model as a prompt + */ + public abstract fun editImage( + referenceImages: List, + prompt: String, + ): ListenableFuture> + + /** + * Generates an image by inpainting a masked off part of a base image. + * + * @param image the base image + * @param prompt the text input given to the model as a prompt + * @param mask the mask which defines where in the image can be painted by imagen. + * @param config the editing configuration settings, it should include an [ImagenEditMode] + */ + public abstract fun inpaintImage( + image: ImagenInlineImage, + prompt: String, + mask: ImagenMaskReference, + config: ImagenEditingConfig, + ): ListenableFuture> + + /** + * Generates an image by outpainting the image, extending its borders + * + * @param image the base image + * @param newDimensions the new dimensions for the image, *must* be larger than the original + * image. + * @param newPosition the placement of the base image within the new image. This can either be + * coordinates (0,0 is the top left corner) or an alignment (ex: + * [ImagenImagePlacement.BOTTOM_CENTER]) + * @param prompt optional, but can be used to specify the background generated if context is + * insufficient + * @param config the editing configuration settings + * @see [ImagenMaskReference.generateMaskAndPadForOutpainting] + */ + public abstract fun outpaintImage( + image: ImagenInlineImage, + newDimensions: Dimensions, + newPosition: ImagenImagePlacement = ImagenImagePlacement.CENTER, + prompt: String = "", + config: ImagenEditingConfig? = null, + ): ListenableFuture> + /** Returns the [ImagenModel] object wrapped by this object. */ public abstract fun getImageModel(): ImagenModel @@ -48,6 +117,38 @@ public abstract class ImagenModelFutures internal constructor() { ): ListenableFuture> = SuspendToFutureAdapter.launchFuture { model.generateImages(prompt) } + override fun editImage( + referenceImages: List, + prompt: String, + config: ImagenEditingConfig? + ): ListenableFuture> = + SuspendToFutureAdapter.launchFuture { model.editImage(referenceImages, prompt, config) } + + override fun editImage( + referenceImages: List, + prompt: String, + ): ListenableFuture> = + editImage(referenceImages, prompt, null) + + override fun inpaintImage( + image: ImagenInlineImage, + prompt: String, + mask: ImagenMaskReference, + config: ImagenEditingConfig + ): ListenableFuture> = + SuspendToFutureAdapter.launchFuture { model.inpaintImage(image, prompt, mask, config) } + + override fun outpaintImage( + image: ImagenInlineImage, + newDimensions: Dimensions, + newPosition: ImagenImagePlacement, + prompt: String, + config: ImagenEditingConfig? + ): ListenableFuture> = + SuspendToFutureAdapter.launchFuture { + model.outpaintImage(image, newDimensions, newPosition, prompt, config) + } + override fun getImageModel(): ImagenModel = model } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Dimensions.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Dimensions.kt new file mode 100644 index 00000000000..98f256f39b2 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Dimensions.kt @@ -0,0 +1,23 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +/** + * Represents the dimensions of an image in pixels + * @param width the width of the image in pixels + * @param height the height of the image in pixels + */ +public class Dimensions(public val width: Int, public val height: Int) {} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenControlConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenControlConfig.kt new file mode 100644 index 00000000000..c6f3b01af6c --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenControlConfig.kt @@ -0,0 +1,43 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +import kotlinx.serialization.Serializable + +internal class ImagenControlConfig( + internal val controlType: ImagenControlType, + internal val enableComputation: Boolean? = null, + internal val superpixelRegionSize: Int? = null, + internal val superpixelRuler: Int? = null +) { + + fun toInternal(): Internal { + return Internal( + controlType = controlType.value, + enableControlImageComputation = enableComputation, + superpixelRegionSize = superpixelRegionSize, + superpixelRuler = superpixelRuler + ) + } + + @Serializable + internal class Internal( + val controlType: String?, + val enableControlImageComputation: Boolean?, + val superpixelRegionSize: Int?, + val superpixelRuler: Int? + ) +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenControlType.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenControlType.kt new file mode 100644 index 00000000000..0732b9a0553 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenControlType.kt @@ -0,0 +1,39 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +/** Represents a control type for controlled Imagen generation/editing */ +public class ImagenControlType internal constructor(internal val value: String) { + public companion object { + + /** Use edge detection to ensure the new image follow the same outlines */ + @JvmField public val CANNY: ImagenControlType = ImagenControlType("CONTROL_TYPE_CANNY") + + /** Use enhanced edge detection to ensure the new image follow similar outlines */ + @JvmField public val SCRIBBLE: ImagenControlType = ImagenControlType("CONTROL_TYPE_SCRIBBLE") + + /** Use face mesh control to ensure that the new image has the same facial expressions */ + @JvmField public val FACE_MESH: ImagenControlType = ImagenControlType("CONTROL_TYPE_FACE_MESH") + + /** + * Use color superpixels to ensure that the new image is similar in shape and color to the + * original + */ + @JvmField + public val COLOR_SUPERPIXEL: ImagenControlType = + ImagenControlType("CONTROL_TYPE_COLOR_SUPERPIXEL") + } +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditMode.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditMode.kt new file mode 100644 index 00000000000..a852ea88a2a --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditMode.kt @@ -0,0 +1,31 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +/** Represents the edit mode for Imagen */ +public class ImagenEditMode private constructor(internal val value: String) { + + public companion object { + /** Inserts a new element into an image */ + @JvmField + public val INPAINT_INSERTION: ImagenEditMode = ImagenEditMode("EDIT_MODE_INPAINT_INSERTION") + /** Removes an element from an image */ + @JvmField + public val INPAINT_REMOVAL: ImagenEditMode = ImagenEditMode("EDIT_MODE_INPAINT_REMOVAL") + /** Extend the borders of an image outwards */ + @JvmField public val OUTPAINT: ImagenEditMode = ImagenEditMode("EDIT_MODE_OUTPAINT") + } +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditingConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditingConfig.kt new file mode 100644 index 00000000000..60e54261c1e --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenEditingConfig.kt @@ -0,0 +1,38 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +import kotlinx.serialization.Serializable + +/** + * Contains the editing settings which are not specific to a reference image + * @param editMode holds the editing mode if the request is for inpainting or outpainting + * @param editSteps the number of intermediate steps to include in the editing process + */ +@PublicPreviewAPI +public class ImagenEditingConfig( + internal val editMode: ImagenEditMode? = null, + internal val editSteps: Int? = null, +) { + internal fun toInternal(): Internal { + return Internal(baseSteps = editSteps) + } + + @Serializable + internal data class Internal( + val baseSteps: Int?, + ) +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenImagePlacement.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenImagePlacement.kt new file mode 100644 index 00000000000..db003a15344 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenImagePlacement.kt @@ -0,0 +1,108 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +/** + * Represents where the placement of an image is within a new, larger image, usually in the context + * of an outpainting request. + */ +public class ImagenImagePlacement +private constructor(public val x: Int? = null, public val y: Int? = null) { + + /** + * If this placement is represented by coordinates this is a no-op, if its one of the enumerated + * types below, then the position is calculated based on its description + */ + internal fun normalizeToDimensions( + imageDimensions: Dimensions, + canvasDimensions: Dimensions, + ): ImagenImagePlacement { + if (this.x != null && this.y != null) { + return this + } + val halfCanvasHeight = canvasDimensions.height / 2 + val halfCanvasWidth = canvasDimensions.width / 2 + val halfImageHeight = imageDimensions.height / 2 + val halfImageWidth = imageDimensions.width / 2 + return when (this) { + CENTER -> + ImagenImagePlacement(halfCanvasWidth - halfImageWidth, halfCanvasHeight - halfImageHeight) + TOP_CENTER -> ImagenImagePlacement(halfCanvasWidth - halfImageWidth, 0) + BOTTOM_CENTER -> + ImagenImagePlacement( + halfCanvasWidth - halfImageWidth, + canvasDimensions.height - imageDimensions.height, + ) + LEFT_CENTER -> ImagenImagePlacement(0, halfCanvasHeight - halfImageHeight) + RIGHT_CENTER -> + ImagenImagePlacement( + canvasDimensions.width - imageDimensions.width, + halfCanvasHeight - halfImageHeight, + ) + TOP_RIGHT -> ImagenImagePlacement(canvasDimensions.width - imageDimensions.width, 0) + BOTTOM_LEFT -> ImagenImagePlacement(0, canvasDimensions.height - imageDimensions.height) + BOTTOM_RIGHT -> + ImagenImagePlacement( + canvasDimensions.width - imageDimensions.width, + canvasDimensions.height - imageDimensions.height, + ) + else -> { + throw IllegalStateException("Unknown ImagenImagePlacement instance, cannot normalize") + } + } + } + + public companion object { + /** + * Creates an [ImagenImagePlacement] that represents a placement in an image described by two + * coordinates. The coordinate system has 0,0 in the top left corner, and the x and y + * coordinates represent the location of the top left corner of the original image. + * @param x the x coordinate of the top left corner of the original image + * @param y the y coordinate of the top left corner of the original image + */ + @JvmStatic + public fun fromCoordinate(x: Int, y: Int): ImagenImagePlacement { + return ImagenImagePlacement(x, y) + } + + /** Center the image horizontally and vertically within the larger image */ + @JvmField public val CENTER: ImagenImagePlacement = ImagenImagePlacement() + + /** Center the image horizontally and aligned with the top edge of the larger image */ + @JvmField public val TOP_CENTER: ImagenImagePlacement = ImagenImagePlacement() + + /** Center the image horizontally and aligned with the bottom edge of the larger image */ + @JvmField public val BOTTOM_CENTER: ImagenImagePlacement = ImagenImagePlacement() + + /** Center the image vertically and aligned with the left edge of the larger image */ + @JvmField public val LEFT_CENTER: ImagenImagePlacement = ImagenImagePlacement() + + /** Center the image vertically and aligned with the right edge of the larger image */ + @JvmField public val RIGHT_CENTER: ImagenImagePlacement = ImagenImagePlacement() + + /** Align the image with the top left corner of the larger image */ + @JvmField public val TOP_LEFT: ImagenImagePlacement = ImagenImagePlacement(0, 0) + + /** Align the image with the top right corner of the larger image */ + @JvmField public val TOP_RIGHT: ImagenImagePlacement = ImagenImagePlacement() + + /** Align the image with the bottom left corner of the larger image */ + @JvmField public val BOTTOM_LEFT: ImagenImagePlacement = ImagenImagePlacement() + + /** Align the image with the bottom right corner of the larger image */ + @JvmField public val BOTTOM_RIGHT: ImagenImagePlacement = ImagenImagePlacement() + } +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenInlineImage.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenInlineImage.kt index 5fa1d0e183b..fc6033d5390 100644 --- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenInlineImage.kt +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenInlineImage.kt @@ -18,6 +18,9 @@ package com.google.firebase.ai.type import android.graphics.Bitmap import android.graphics.BitmapFactory +import android.util.Base64 +import java.io.ByteArrayOutputStream +import kotlinx.serialization.Serializable /** * Represents an Imagen-generated image that is returned as inline data. @@ -36,4 +39,19 @@ internal constructor(public val data: ByteArray, public val mimeType: String) { public fun asBitmap(): Bitmap { return BitmapFactory.decodeByteArray(data, 0, data.size) } + + @Serializable internal data class Internal(val bytesBase64Encoded: String) + + internal fun toInternal(): Internal { + val base64 = Base64.encodeToString(data, Base64.NO_WRAP) + return Internal(base64) + } +} + +@PublicPreviewAPI +public fun Bitmap.toImagenInlineImage(): ImagenInlineImage { + val byteArrayOutputStream = ByteArrayOutputStream() + this.compress(Bitmap.CompressFormat.JPEG, 80, byteArrayOutputStream) + val byteArray = byteArrayOutputStream.toByteArray() + return ImagenInlineImage(data = byteArray, mimeType = "image/jpeg") } diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenMaskConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenMaskConfig.kt new file mode 100644 index 00000000000..64a9a4376e3 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenMaskConfig.kt @@ -0,0 +1,35 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +import kotlinx.serialization.Serializable + +internal class ImagenMaskConfig( + internal val maskType: ImagenMaskMode, + internal val dilation: Double? = null, + internal val classes: List? = null +) { + internal fun toInternal(): Internal { + return Internal(maskType.value, dilation, classes) + } + + @Serializable + internal data class Internal( + val maskMode: String, + val dilation: Double?, + val maskClasses: List? + ) +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenMaskMode.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenMaskMode.kt new file mode 100644 index 00000000000..b83a20381e2 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenMaskMode.kt @@ -0,0 +1,25 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +internal class ImagenMaskMode private constructor(internal val value: String) { + companion object { + val USER_PROVIDED: ImagenMaskMode = ImagenMaskMode("MASK_MODE_USER_PROVIDED") + val BACKGROUND: ImagenMaskMode = ImagenMaskMode("MASK_MODE_BACKGROUND") + val FOREGROUND: ImagenMaskMode = ImagenMaskMode("MASK_MODE_FOREGROUND") + val SEMANTIC: ImagenMaskMode = ImagenMaskMode("MASK_MODE_SEMANTIC") + } +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenReferenceImage.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenReferenceImage.kt new file mode 100644 index 00000000000..b9ba2ff48b2 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenReferenceImage.kt @@ -0,0 +1,282 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +import android.graphics.Bitmap +import android.graphics.Canvas +import android.graphics.Color +import android.graphics.Paint +import android.graphics.Rect +import com.google.firebase.ai.common.GenerateImageRequest +import kotlinx.serialization.Serializable + +/** Represents an reference image for an Imagen editing request */ +@PublicPreviewAPI +public abstract class ImagenReferenceImage +internal constructor( + internal val maskConfig: ImagenMaskConfig? = null, + internal val subjectConfig: ImagenSubjectConfig? = null, + internal val styleConfig: ImagenStyleConfig? = null, + internal val controlConfig: ImagenControlConfig? = null, + public val image: ImagenInlineImage? = null, + public val referenceId: Int? = null, +) { + + internal fun toInternal(optionalReferenceId: Int): Internal { + val referenceType = + when (this) { + is ImagenRawImage -> GenerateImageRequest.ReferenceType.RAW + is ImagenMaskReference -> GenerateImageRequest.ReferenceType.MASK + is ImagenSubjectReference -> GenerateImageRequest.ReferenceType.SUBJECT + is ImagenStyleReference -> GenerateImageRequest.ReferenceType.STYLE + is ImagenControlReference -> GenerateImageRequest.ReferenceType.CONTROL + else -> { + throw IllegalStateException( + "${this.javaClass.simpleName} is not a known subtype of ImagenReferenceImage" + ) + } + } + return Internal( + referenceType = referenceType, + referenceImage = image?.toInternal(), + referenceId = referenceId ?: optionalReferenceId, + subjectImageConfig = subjectConfig?.toInternal(), + maskImageConfig = maskConfig?.toInternal(), + styleImageConfig = styleConfig?.toInternal(), + controlConfig = controlConfig?.toInternal(), + ) + } + + @Serializable + internal data class Internal( + val referenceType: GenerateImageRequest.ReferenceType, + val referenceImage: ImagenInlineImage.Internal?, + val referenceId: Int, + val subjectImageConfig: ImagenSubjectConfig.Internal?, + val maskImageConfig: ImagenMaskConfig.Internal?, + val styleImageConfig: ImagenStyleConfig.Internal?, + val controlConfig: ImagenControlConfig.Internal? + ) +} + +/** + * Represents a reference image (provided or generated) to bound the created image via controlled + * generation. + * @param image the image provided, required if [enableComputation] is false + * @param type the type of control reference image + * @param referenceId the reference ID for this image, to be referenced in the prompt + * @param enableComputation requests that the reference image be generated serverside instead of + * provided + * @param superpixelRegionSize if type is COLOR_SUPERPIXEL and [enableComputation] is true, this + * will control the size of each superpixel region in pixels for the generated referenced image + * @param superpixelRuler if type is COLOR_SUPERPIXEL and [enableComputation] is true, this will + * control the superpixel smoothness factor for the generated referenced image + */ +@PublicPreviewAPI +public class ImagenControlReference( + type: ImagenControlType, + image: ImagenInlineImage? = null, + referenceId: Int? = null, + enableComputation: Boolean? = null, + superpixelRegionSize: Int? = null, + superpixelRuler: Int? = null, +) : + ImagenReferenceImage( + controlConfig = + ImagenControlConfig(type, enableComputation, superpixelRegionSize, superpixelRuler), + image = image, + referenceId = referenceId, + ) {} + +/** + * Represents a reference image for Imagen editing which will mask of a region to be edited. This + * image (generated or provided) should contain only black and white pixels, with black representing + * parts of the image which should not change. + */ +@PublicPreviewAPI +public abstract class ImagenMaskReference +internal constructor(maskConfig: ImagenMaskConfig, image: ImagenInlineImage? = null) : + ImagenReferenceImage(maskConfig = maskConfig, image = image) { + + public companion object { + /** + * Generates these two reference images in order: + * * One [ImagenRawImage] containing the original image, padded out to the new dimensions with + * black pixels, with the original image placed at the given placement + * * One [ImagenRawMask] of the same dimensions containing white everywhere except at the + * placement original image. + * + * This is the format expected by Imagen for outpainting requests. + * + * @param image the original image + * @param newDimensions the new dimensions for outpainting. This *must* be more than the + * original image. + * @param newPosition the placement of the original image within the new outpainted image. + */ + @JvmOverloads + @JvmStatic + public fun generateMaskAndPadForOutpainting( + image: ImagenInlineImage, + newDimensions: Dimensions, + newPosition: ImagenImagePlacement = ImagenImagePlacement.CENTER, + ): List { + val originalBitmap = image.asBitmap() + if ( + originalBitmap.width > newDimensions.width || originalBitmap.height > newDimensions.height + ) { + throw IllegalArgumentException( + "New Dimensions must be strictly larger than original image dimensions. Original image " + + "is:${originalBitmap.width}x${originalBitmap.height}, new dimensions are " + + "${newDimensions.width}x${newDimensions.height}" + ) + } + val normalizedPosition = + newPosition.normalizeToDimensions( + Dimensions(originalBitmap.width, originalBitmap.height), + newDimensions, + ) + + if (normalizedPosition.x == null || normalizedPosition.y == null) { + throw IllegalStateException("Error normalizing position for mask and padding.") + } + + val normalizedImageRectangle = + Rect( + normalizedPosition.x, + normalizedPosition.y, + normalizedPosition.x + originalBitmap.width, + normalizedPosition.y + originalBitmap.height, + ) + + val maskBitmap = + Bitmap.createBitmap(newDimensions.width, newDimensions.height, Bitmap.Config.RGB_565) + val newImageBitmap = + Bitmap.createBitmap(newDimensions.width, newDimensions.height, Bitmap.Config.RGB_565) + + val maskCanvas = Canvas(maskBitmap) + val newImageCanvas = Canvas(newImageBitmap) + + val blackPaint = Paint().apply { color = Color.BLACK } + val whitePaint = Paint().apply { color = Color.WHITE } + + // Fill the mask with white, then draw a black rectangle where the image is. + maskCanvas.drawPaint(whitePaint) + maskCanvas.drawRect(normalizedImageRectangle, blackPaint) + + // fill the image with black, and then draw the bitmap into the corresponding spot + newImageCanvas.drawPaint(blackPaint) + newImageCanvas.drawBitmap(originalBitmap, null, normalizedImageRectangle, null) + return listOf( + ImagenRawImage(newImageBitmap.toImagenInlineImage()), + ImagenRawMask(maskBitmap.toImagenInlineImage()), + ) + } + } +} + +/** + * A generated mask image which will auto-detect and mask out the background. The background will be + * white, and the foreground black + * @param dilation the amount to dilate the mask, this can help smooth the borders of an edit and + * make it seem more convincing. For example, 0.05 would dilate the mask 5%. + */ +@PublicPreviewAPI +public class ImagenBackgroundMask(dilation: Double? = null) : + ImagenMaskReference(maskConfig = ImagenMaskConfig(ImagenMaskMode.BACKGROUND, dilation)) {} + +/** + * A generated mask image which will auto-detect and mask out the foreground. The background will be + * black, and the foreground white + * @param dilation the amount to dilate the mask, this can help smooth the borders of an edit and + * make it seem more convincing. For example, 0.05 would dilate the mask 5%. + */ +@PublicPreviewAPI +public class ImagenForegroundMask(dilation: Double? = null) : + ImagenMaskReference(maskConfig = ImagenMaskConfig(ImagenMaskMode.FOREGROUND, dilation)) {} + +/** + * Represents a reference image for Imagen editing which will mask of a region to be edited. This + * image should contain only black and white pixels, with black representing parts of the image + * which should not change. + * + * @param mask the mask image + * @param dilation the amount to dilate the mask, this can help smooth the borders of an edit and + * make it seem more convincing. For example, 0.05 would dilate the mask 5%. + */ +@PublicPreviewAPI +public class ImagenRawMask(mask: ImagenInlineImage, dilation: Double? = null) : + ImagenMaskReference( + maskConfig = ImagenMaskConfig(ImagenMaskMode.USER_PROVIDED, dilation), + image = mask, + ) {} + +/** + * Represents a generated mask for Imagen editing which masks out certain objects using object + * detection. + * @param classes the list of segmentation IDs for objects to detect and mask out. See + * [here](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/imagen-api-edit#segment-ids) + * for a list of segmentation IDs + * @param dilation the amount to dilate the mask, this can help smooth the borders of an edit and + * make it seem more convincing. For example, 0.05 would dilate the mask 5%. + */ +@PublicPreviewAPI +public class ImagenSemanticMask(classes: List, dilation: Double? = null) : + ImagenMaskReference(maskConfig = ImagenMaskConfig(ImagenMaskMode.SEMANTIC, dilation, classes)) {} + +/** + * Represents a base image for Imagen editing + * @param image the image + */ +@PublicPreviewAPI +public class ImagenRawImage(image: ImagenInlineImage) : ImagenReferenceImage(image = image) {} + +/** + * A reference image for style transfer + * @param image the image representing the style you want to transfer to your original images + * @param referenceId the reference ID you can use to reference this style in your prompt + * @param description the description you can use to reference this style in your prompt + */ +@PublicPreviewAPI +public class ImagenStyleReference( + image: ImagenInlineImage, + referenceId: Int? = null, + description: String? = null, +) : + ImagenReferenceImage( + image = image, + referenceId = referenceId, + styleConfig = ImagenStyleConfig(description) + ) {} + +/** + * A reference image for generating an image with a specific subject + * @param image the image of the subject + * @param referenceId the reference ID you can use to reference this subject in your prompt + * @param description the description you can use to reference this subject in your prompt + * @param subjectType the type of the subject + */ +@PublicPreviewAPI +public class ImagenSubjectReference( + image: ImagenInlineImage, + referenceId: Int? = null, + description: String? = null, + subjectType: ImagenSubjectReferenceType? = null, +) : + ImagenReferenceImage( + image = image, + referenceId = referenceId, + subjectConfig = ImagenSubjectConfig(description, subjectType), + ) {} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenStyleConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenStyleConfig.kt new file mode 100644 index 00000000000..222f9b3f5a3 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenStyleConfig.kt @@ -0,0 +1,27 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +import kotlinx.serialization.Serializable + +internal class ImagenStyleConfig(val description: String?) { + + fun toInternal(): Internal { + return Internal(description) + } + + @Serializable internal data class Internal(val styleDescription: String?) +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenSubjectConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenSubjectConfig.kt new file mode 100644 index 00000000000..603a580a25e --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenSubjectConfig.kt @@ -0,0 +1,31 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +import kotlinx.serialization.Serializable + +internal class ImagenSubjectConfig( + val description: String?, + val type: ImagenSubjectReferenceType?, +) { + + internal fun toInternal(): Internal { + return Internal(description, type?.value) + } + + @Serializable + internal data class Internal(val subjectDescription: String?, val subjectType: String?) +} diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenSubjectReferenceType.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenSubjectReferenceType.kt new file mode 100644 index 00000000000..dfe77f9adf5 --- /dev/null +++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/ImagenSubjectReferenceType.kt @@ -0,0 +1,38 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.firebase.ai.type + +/** Represents a type for a subject reference, specifying how it should be interpreted. */ +public class ImagenSubjectReferenceType private constructor(internal val value: String) { + + public companion object { + + /** Marks the reference type as being of a person */ + @JvmField + public val PERSON: ImagenSubjectReferenceType = + ImagenSubjectReferenceType("SUBJECT_TYPE_PERSON") + + /** Marks the reference type as being of a animal */ + @JvmField + public val ANIMAL: ImagenSubjectReferenceType = + ImagenSubjectReferenceType("SUBJECT_TYPE_ANIMAL") + + /** Marks the reference type as being of a product */ + @JvmField + public val PRODUCT: ImagenSubjectReferenceType = + ImagenSubjectReferenceType("SUBJECT_TYPE_PRODUCT") + } +} diff --git a/firebase-ai/src/test/java/com/google/firebase/ai/SerializationTests.kt b/firebase-ai/src/test/java/com/google/firebase/ai/SerializationTests.kt index 8e904a3f315..177fde19a8b 100644 --- a/firebase-ai/src/test/java/com/google/firebase/ai/SerializationTests.kt +++ b/firebase-ai/src/test/java/com/google/firebase/ai/SerializationTests.kt @@ -25,7 +25,9 @@ import com.google.firebase.ai.type.GroundingAttribution import com.google.firebase.ai.type.GroundingChunk import com.google.firebase.ai.type.GroundingMetadata import com.google.firebase.ai.type.GroundingSupport +import com.google.firebase.ai.type.ImagenReferenceImage import com.google.firebase.ai.type.ModalityTokenCount +import com.google.firebase.ai.type.PublicPreviewAPI import com.google.firebase.ai.type.Schema import com.google.firebase.ai.type.SearchEntryPoint import com.google.firebase.ai.type.Segment @@ -34,6 +36,7 @@ import com.google.firebase.ai.type.WebGroundingChunk import io.kotest.assertions.json.shouldEqualJson import org.junit.Test +@OptIn(PublicPreviewAPI::class) internal class SerializationTests { @Test fun `test countTokensResponse serialization as Json`() { @@ -385,6 +388,43 @@ internal class SerializationTests { expectedJsonAsString shouldEqualJson actualJson.toString() } + @Test + fun `test ReferenceImage serialization as Json`() { + val expectedJsonAsString = + """ + { + "id": "ImagenReferenceImage", + "type": "object", + "properties": { + "referenceType": { + "type": "string" + }, + "referenceImage": { + "${'$'}ref": "ImagenInlineImage" + }, + "referenceId": { + "type": "integer" + }, + "subjectImageConfig": { + "${'$'}ref": "ImagenSubjectConfig" + }, + "maskImageConfig": { + "${'$'}ref": "ImagenMaskConfig" + }, + "styleImageConfig": { + "${'$'}ref": "ImagenStyleConfig" + }, + "controlConfig": { + "${'$'}ref": "ImagenControlConfig" + } + } + } + """ + .trimIndent() + val actualJson = descriptorToJson(ImagenReferenceImage.Internal.serializer().descriptor) + expectedJsonAsString shouldEqualJson actualJson.toString() + } + @Test fun `test Tool serialization as Json`() { val expectedJsonAsString = diff --git a/firebase-ai/src/testUtil/java/com/google/firebase/ai/JavaCompileTests.java b/firebase-ai/src/testUtil/java/com/google/firebase/ai/JavaCompileTests.java index 559c4ac8a04..3422c00e195 100644 --- a/firebase-ai/src/testUtil/java/com/google/firebase/ai/JavaCompileTests.java +++ b/firebase-ai/src/testUtil/java/com/google/firebase/ai/JavaCompileTests.java @@ -21,9 +21,11 @@ import com.google.common.util.concurrent.ListenableFuture; import com.google.firebase.ai.FirebaseAI; import com.google.firebase.ai.GenerativeModel; +import com.google.firebase.ai.ImagenModel; import com.google.firebase.ai.LiveGenerativeModel; import com.google.firebase.ai.java.ChatFutures; import com.google.firebase.ai.java.GenerativeModelFutures; +import com.google.firebase.ai.java.ImagenModelFutures; import com.google.firebase.ai.java.LiveModelFutures; import com.google.firebase.ai.java.LiveSessionFutures; import com.google.firebase.ai.type.BlockReason; @@ -33,6 +35,7 @@ import com.google.firebase.ai.type.Content; import com.google.firebase.ai.type.ContentModality; import com.google.firebase.ai.type.CountTokensResponse; +import com.google.firebase.ai.type.Dimensions; import com.google.firebase.ai.type.FileDataPart; import com.google.firebase.ai.type.FinishReason; import com.google.firebase.ai.type.FunctionCallPart; @@ -43,6 +46,11 @@ import com.google.firebase.ai.type.HarmProbability; import com.google.firebase.ai.type.HarmSeverity; import com.google.firebase.ai.type.ImagePart; +import com.google.firebase.ai.type.ImagenBackgroundMask; +import com.google.firebase.ai.type.ImagenEditMode; +import com.google.firebase.ai.type.ImagenEditingConfig; +import com.google.firebase.ai.type.ImagenInlineImage; +import com.google.firebase.ai.type.ImagenMaskReference; import com.google.firebase.ai.type.InlineDataPart; import com.google.firebase.ai.type.LiveGenerationConfig; import com.google.firebase.ai.type.LiveServerContent; @@ -65,6 +73,7 @@ import com.google.firebase.concurrent.FirebaseExecutors; import java.util.ArrayList; import java.util.Calendar; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.Executor; @@ -141,6 +150,17 @@ private LiveGenerationConfig getLiveConfig() { .build(); } + private void testImagen() { + ImagenModel modelSuspend = FirebaseAI.getInstance().imagenModel(""); + ImagenModelFutures model = ImagenModelFutures.from(modelSuspend); + model.editImage( + Collections.singletonList(new ImagenBackgroundMask()), + "", + new ImagenEditingConfig(ImagenEditMode.OUTPAINT, 25)); + ImagenMaskReference.generateMaskAndPadForOutpainting( + new ImagenInlineImage(new byte[0], ""), new Dimensions(0, 0)); + } + private void testFutures(GenerativeModelFutures futures) throws Exception { Content content = new Content.Builder()