From 06b66719fe2cefa6c5a8338e1d0f0dd274a7c455 Mon Sep 17 00:00:00 2001 From: Shruc <42489293+P3il4@users.noreply.github.com> Date: Mon, 9 Mar 2026 09:25:11 +0300 Subject: [PATCH] fix gemini image provider implementation (#2569) * implement proper calls to gemini image * remove hardcoded image prices * fix model selection * add flash 3.1 image, fix pricing * also handle uris with data prefix * more efficient uri parsing * fix token calculation * re-add some costs for models page --- .../src/services/ai/AIInterfaceService.js | 2 + .../ai/image/AIImageGenerationService.ts | 27 ++- .../GeminiImageGenerationProvider.ts | 195 +++++++++++++----- .../GeminiImageGenerationProvider/models.ts | 133 +++--------- .../src/services/ai/image/providers/types.ts | 3 + 5 files changed, 193 insertions(+), 167 deletions(-) diff --git a/src/backend/src/services/ai/AIInterfaceService.js b/src/backend/src/services/ai/AIInterfaceService.js index 5e4b32820..a62ef103c 100644 --- a/src/backend/src/services/ai/AIInterfaceService.js +++ b/src/backend/src/services/ai/AIInterfaceService.js @@ -125,6 +125,7 @@ class AIInterfaceService extends BaseService { prompt: { type: 'string' }, quality: { type: 'string' }, model: { type: 'string' }, + provider: { type: 'string', optional: true }, ratio: { type: 'json' }, width: { type: 'number', optional: true }, height: { type: 'number', optional: true }, @@ -135,6 +136,7 @@ class AIInterfaceService extends BaseService { n: { type: 'number', optional: true }, input_image: { type: 'string', optional: true }, input_image_mime_type: { type: 'string', optional: true }, + input_images: { type: 'json', optional: true }, image_url: { type: 'string', optional: true }, image_base64: { type: 'string', optional: true }, mask_image_url: { type: 'string', optional: true }, diff --git a/src/backend/src/services/ai/image/AIImageGenerationService.ts b/src/backend/src/services/ai/image/AIImageGenerationService.ts index 06ba1ba49..59b43aedf 100644 --- a/src/backend/src/services/ai/image/AIImageGenerationService.ts +++ b/src/backend/src/services/ai/image/AIImageGenerationService.ts @@ -68,13 +68,13 @@ export class AIImageGenerationService extends BaseService { /** Driver interfaces */ static IMPLEMENTS = { - ['driver-capabilities']: { + 'driver-capabilities': { supports_test_mode (iface: string, method_name: string) { return iface === 'puter-image-generation' && method_name === 'generate'; }, }, - ['puter-image-generation']: { + 'puter-image-generation': { async generate (...parameters: Parameters) { return (this as unknown as AIImageGenerationService).generate(...parameters); @@ -88,11 +88,18 @@ export class AIImageGenerationService extends BaseService { return undefined; } - if ( ! provider ) { - return models[0]; + if ( provider ) { + const model = models.find(m => m.provider === provider); + return model ?? models[0]; } - const model = models.find(m => m.provider === provider); - return model ?? models[0]; + + // If no provider is specified, prefer a model whose puterId exactly matches the requested modelId. + const exactPuterIdMatch = models.find(m => m.puterId === modelId); + if ( exactPuterIdMatch ) { + return exactPuterIdMatch; + } + + return models[0]; } private async registerProviders () { @@ -152,9 +159,11 @@ export class AIImageGenerationService extends BaseService { const provider = this.#providers[providerName]; // alias all driver requests to go here to support legacy routing - this.driverService.register_service_alias(AIImageGenerationService.SERVICE_NAME, - providerName, - { iface: 'puter-image-generation' }); + this.driverService.register_service_alias( + AIImageGenerationService.SERVICE_NAME, + providerName, + { iface: 'puter-image-generation' }, + ); // build model id map for ( const model of await provider.models() ) { diff --git a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts index eca25e210..656e23d08 100644 --- a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts +++ b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts @@ -22,17 +22,20 @@ import APIError from '../../../../../api/APIError.js'; import { ErrorService } from '../../../../../modules/core/ErrorService.js'; import { Context } from '../../../../../util/context.js'; import { MeteringService } from '../../../../MeteringService/MeteringService.js'; -import { GEMINI_DEFAULT_RATIO, GEMINI_IMAGE_GENERATION_MODELS } from './models.js'; +import { GEMINI_DEFAULT_RATIO, GEMINI_ESTIMATED_IMAGE_TOKENS, GEMINI_IMAGE_GENERATION_MODELS } from './models.js'; import { IGenerateParams, IImageModel, IImageProvider } from '../types.js'; -type GeminiGenerateParams = IGenerateParams & { - input_image?: string; - input_image_mime_type?: string; +const MIME_SIGNATURES: Record = { + '/9j/': 'image/jpeg', + 'iVBOR': 'image/png', + 'UklGR': 'image/webp', }; interface GeminiUsageMetadata { promptTokenCount: number; candidatesTokenCount: number; + candidatesTextTokenCount: number; + candidatesImageTokenCount: number; thoughtsTokenCount: number; } @@ -59,9 +62,8 @@ export class GeminiImageGenerationProvider implements IImageProvider { } async generate (params: IGenerateParams): Promise { - const { prompt, test_mode } = params; - let { model, ratio, quality } = params; - const { input_image, input_image_mime_type } = params as GeminiGenerateParams; + const { prompt, test_mode, input_image, input_image_mime_type, model, quality } = params; + let { ratio, input_images } = params; const selectedModel = this.models().find(m => m.id === model) || this.models().find(m => m.id === this.getDefaultModel())!; @@ -76,28 +78,19 @@ export class GeminiImageGenerationProvider implements IImageProvider { const allowedRatios = selectedModel.allowedRatios ?? [GEMINI_DEFAULT_RATIO]; ratio = ratio && this.#isValidRatio(ratio, allowedRatios) ? ratio : allowedRatios[0]; - if ( input_image && !input_image_mime_type ) { - throw new Error('`input_image_mime_type` is required when `input_image` is provided'); + // Backwards compat: merge singular input_image into input_images + if ( input_image && (!input_images || input_images.length === 0) ) { + input_images = [input_image]; } - if ( input_image_mime_type && !input_image ) { - throw new Error('`input_image` is required when `input_image_mime_type` is provided'); - } - - if ( input_image_mime_type && !this.#isValidImageMimeType(input_image_mime_type) ) { - throw new Error('`input_image_mime_type` must be a valid image MIME type (image/png, image/jpeg, image/webp)'); - } - - const priceKey = `${quality ? `${quality}:` : ''}${ratio.w}x${ratio.h}`; - const priceInCents = selectedModel.costs[priceKey]; - if ( priceInCents === undefined ) { - const availableSizes = Object.keys(selectedModel.costs) - .filter(key => key !== 'input' && key !== 'output'); - throw APIError.create('field_invalid', undefined, { - key: 'size/quality combination', - expected: `one of: ${ availableSizes.join(', ')}`, - got: priceKey, - }); + // Validate input images have detectable MIME types + if ( input_images?.length ) { + for ( const img of input_images ) { + const mime = this.#detectMimeType(img) ?? input_image_mime_type; + if ( ! mime ) { + throw new Error('Could not detect MIME type for an input image. Provide a known image format (JPEG, PNG, WebP) or set `input_image_mime_type`.'); + } + } } const actor = Context.get('actor'); @@ -110,9 +103,22 @@ export class GeminiImageGenerationProvider implements IImageProvider { }); } - const estimatedPromptTokenCount = this.#estimatePromptTokenCount(prompt); + // --- Pre-flight cost estimation --- + const inputImageCount = input_images?.length ?? 0; + const estimatedImageInputTokens = inputImageCount * 560; // https://ai.google.dev/gemini-api/docs/pricing#gemini-3-pro-image-preview + const estimatedPromptTokenCount = this.#estimatePromptTokenCount(prompt) + estimatedImageInputTokens; const estimatedInputCostInCents = this.#calculateTokenCostInCents(estimatedPromptTokenCount, selectedModel.costs.input); - const estimatedOutputCostInCents = priceInCents; + + // Estimate output image tokens + const imageTokenKey = quality ? `${selectedModel.id}:${quality}` : selectedModel.id; + const estimatedOutputImageTokens = GEMINI_ESTIMATED_IMAGE_TOKENS[imageTokenKey] ?? GEMINI_ESTIMATED_IMAGE_TOKENS[selectedModel.id]; + if ( estimatedOutputImageTokens === undefined ) { + throw new Error(`No estimated image token count configured for '${imageTokenKey}'.`); + } + const estimatedOutputImageCostInCents = this.#calculateTokenCostInCents(estimatedOutputImageTokens, selectedModel.costs.output_image); + const estimatedOutputTextCostInCents = this.#calculateTokenCostInCents(50, selectedModel.costs.output); // small text overhead estimate + const estimatedOutputCostInCents = estimatedOutputImageCostInCents + estimatedOutputTextCostInCents; + const estimatedTotalCostInMicroCents = this.#toMicroCents(estimatedInputCostInCents + estimatedOutputCostInCents); const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, estimatedTotalCostInMicroCents); @@ -120,21 +126,38 @@ export class GeminiImageGenerationProvider implements IImageProvider { throw APIError.create('insufficient_funds'); } - const contents = this.#buildContents(prompt, ratio, input_image, input_image_mime_type); + // --- API call --- + const contents = this.#buildContents(prompt, input_images, input_image_mime_type); + const aspectRatio = `${ratio.w}:${ratio.h}`; + + const imageConfig: Record = { aspectRatio }; + if ( quality && selectedModel.allowedQualityLevels?.includes(quality) ) { + imageConfig.imageSize = quality; + } + const response = await this.#client.models.generateContent({ model: selectedModel.id, contents, + config: { + responseModalities: ['TEXT', 'IMAGE'], + imageConfig, + }, }); + // --- Actual cost calculation from response usage --- const usage = this.#extractUsageMetadata(response); const inputTokenCount = usage.promptTokenCount || estimatedPromptTokenCount; - const outputTokenCount = usage.candidatesTokenCount + usage.thoughtsTokenCount; + + const outputTextTokenCount = usage.candidatesTextTokenCount + usage.thoughtsTokenCount; + const outputImageTokenCount = usage.candidatesImageTokenCount || estimatedOutputImageTokens; const inputCostInCents = this.#calculateTokenCostInCents(inputTokenCount, selectedModel.costs.input); - const outputTextCostInCents = this.#calculateTokenCostInCents(outputTokenCount, selectedModel.costs.output); - const outputCostInCents = priceInCents + outputTextCostInCents; + const outputTextCostInCents = this.#calculateTokenCostInCents(outputTextTokenCount, selectedModel.costs.output); + const outputImageCostInCents = this.#calculateTokenCostInCents(outputImageTokenCount, selectedModel.costs.output_image); + const outputCostInCents = outputTextCostInCents + outputImageCostInCents; - const usagePrefix = `gemini:${selectedModel.id}:${priceKey}`; + const totalOutputTokenCount = outputTextTokenCount + outputImageTokenCount; + const usagePrefix = `gemini:${selectedModel.id}`; this.#meteringService.batchIncrementUsages(actor, [ { usageType: `${usagePrefix}:input`, @@ -142,9 +165,14 @@ export class GeminiImageGenerationProvider implements IImageProvider { costOverride: this.#toMicroCents(inputCostInCents), }, { - usageType: `${usagePrefix}:output`, - usageAmount: Math.max(outputTokenCount, 1), - costOverride: this.#toMicroCents(outputCostInCents), + usageType: `${usagePrefix}:output:text`, + usageAmount: Math.max(outputTextTokenCount, 1), + costOverride: this.#toMicroCents(outputTextCostInCents), + }, + { + usageType: `${usagePrefix}:output:image`, + usageAmount: Math.max(outputImageTokenCount, 1), + costOverride: this.#toMicroCents(outputImageCostInCents), }, ]); @@ -155,7 +183,9 @@ export class GeminiImageGenerationProvider implements IImageProvider { inputCostInCents, outputCostInCents, inputTokenCount, - outputTokenCount, + outputTokenCount: totalOutputTokenCount, + outputTextTokenCount, + outputImageTokenCount, }); const url = this.#extractImageUrl(response); @@ -167,20 +197,24 @@ export class GeminiImageGenerationProvider implements IImageProvider { return url; } - #buildContents (prompt: string, ratio: { w: number; h: number }, input_image?: string, input_image_mime_type?: string) { - if ( input_image && input_image_mime_type ) { - return [ - { text: `Generate a picture of dimensions ${parseInt(`${ratio.w}`)}x${parseInt(`${ratio.h}`)} with the prompt: ${prompt}` }, - { + #buildContents (prompt: string, input_images?: string[], input_image_mime_type?: string) { + const parts: Record[] = [{ text: prompt }]; + + if ( input_images?.length ) { + for ( const img of input_images ) { + const parsed = this.#parseDataUri(img); + const mimeType = parsed?.mimeType ?? this.#detectMimeType(img) ?? input_image_mime_type ?? 'image/png'; + const rawBase64 = parsed?.base64 ?? img; + parts.push({ inlineData: { - mimeType: input_image_mime_type, - data: input_image, + mimeType, + data: rawBase64, }, - }, - ]; + }); + } } - return `Generate a picture of dimensions ${parseInt(`${ratio.w}`)}x${parseInt(`${ratio.h}`)} with the prompt: ${prompt}`; + return parts; } #setResponseCostMetadata ({ @@ -191,6 +225,8 @@ export class GeminiImageGenerationProvider implements IImageProvider { outputCostInCents, inputTokenCount, outputTokenCount, + outputTextTokenCount, + outputImageTokenCount, }: { model: string; quality?: string; @@ -199,6 +235,8 @@ export class GeminiImageGenerationProvider implements IImageProvider { outputCostInCents: number; inputTokenCount: number; outputTokenCount: number; + outputTextTokenCount: number; + outputImageTokenCount: number; }) { const clientDriverCall = Context.get('client_driver_call') as { response_metadata?: Record } | undefined; const responseMetadata = clientDriverCall?.response_metadata; @@ -218,6 +256,8 @@ export class GeminiImageGenerationProvider implements IImageProvider { ratio: `${ratio.w}x${ratio.h}`, input_tokens: inputTokenCount, output_tokens: outputTokenCount, + output_text_tokens: outputTextTokenCount, + output_image_tokens: outputImageTokenCount, input_microcents: this.#toMicroCents(inputCostInCents), output_microcents: this.#toMicroCents(outputCostInCents), total_microcents: this.#toMicroCents(totalCostInCents), @@ -226,9 +266,27 @@ export class GeminiImageGenerationProvider implements IImageProvider { #extractUsageMetadata (response: GenerateContentResponse): GeminiUsageMetadata { const usage = (response as GenerateContentResponse & { usageMetadata?: Record }).usageMetadata; + + let candidatesImageTokenCount = 0; + + const details = usage?.candidatesTokensDetails; + if ( Array.isArray(details) ) { + for ( const entry of details ) { + if ( entry?.modality === 'IMAGE' ) { + candidatesImageTokenCount += this.#toSafeCount(entry.tokenCount); + } + } + } + + // api only returns modality image, so calculate text tokens as candidates (output) - image tokens + const candidatesTokenCount = this.#toSafeCount(usage?.candidatesTokenCount); + const candidatesTextTokenCount = Math.max(0, candidatesTokenCount - candidatesImageTokenCount); + return { promptTokenCount: this.#toSafeCount(usage?.promptTokenCount), - candidatesTokenCount: this.#toSafeCount(usage?.candidatesTokenCount), + candidatesTokenCount, + candidatesTextTokenCount, + candidatesImageTokenCount, thoughtsTokenCount: this.#toSafeCount(usage?.thoughtsTokenCount), }; } @@ -266,19 +324,44 @@ export class GeminiImageGenerationProvider implements IImageProvider { for ( const part of parts ) { if ( part?.inlineData?.data ) { - return `data:image/png;base64,${ part.inlineData.data}`; + const mimeType = part.inlineData.mimeType ?? 'image/png'; + return `data:${mimeType};base64,${ part.inlineData.data}`; } } return undefined; } + #detectMimeType (data: string): string | undefined { + // Handle data URIs like "data:image/jpeg;base64,..." + const parsed = this.#parseDataUri(data); + if ( parsed ) { + return parsed.mimeType; + } + + for ( const [signature, mimeType] of Object.entries(MIME_SIGNATURES) ) { + if ( data.startsWith(signature) ) { + return mimeType; + } + } + return undefined; + } + + #parseDataUri (data: string): { mimeType: string; base64: string } | undefined { + if ( ! data.startsWith('data:image/') ) return undefined; + + const commaIdx = data.indexOf(','); + if ( commaIdx === -1 ) return undefined; + + const header = data.substring(5, commaIdx); // after "data:" up to "," + if ( ! header.endsWith(';base64') ) return undefined; + + const mimeType = header.substring(0, header.length - 7); // strip ";base64" + if ( mimeType.length === 0 ) return undefined; + + return { mimeType, base64: data.substring(commaIdx + 1) }; + } + #isValidRatio (ratio: { w: number; h: number }, allowedRatios: { w: number; h: number }[]) { return allowedRatios.some(r => r.w === ratio.w && r.h === ratio.h); } - - #isValidImageMimeType (mimeType?: string) { - if ( ! mimeType ) return false; - const supportedTypes = ['image/png', 'image/jpeg', 'image/jpg', 'image/webp']; - return supportedTypes.includes(mimeType.toLowerCase()); - } } diff --git a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts index 783bf7b26..fb6be1472 100644 --- a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts +++ b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts @@ -21,6 +21,22 @@ import { IImageModel } from '../types'; export const GEMINI_DEFAULT_RATIO = { w: 1024, h: 1024 }; +// Estimated image output token counts for pre-flight cost checks. +// These are based on Google's published pricing equivalences. +// https://ai.google.dev/gemini-api/docs/image-generation#aspect_ratios_and_image_size +export const GEMINI_ESTIMATED_IMAGE_TOKENS: Record = { + 'gemini-2.5-flash-image': 1290, + + 'gemini-3-pro-image-preview:1K': 1120, + 'gemini-3-pro-image-preview:2K': 1120, + 'gemini-3-pro-image-preview:4K': 2000, + + 'gemini-3.1-flash-image-preview:0.5K': 747, + 'gemini-3.1-flash-image-preview:1K': 1120, + 'gemini-3.1-flash-image-preview:2K': 1680, + 'gemini-3.1-flash-image-preview:4K': 2520, +}; + export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ { puterId: 'google:google/gemini-2.5-flash-image', @@ -37,20 +53,13 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ version: '1.0', costs_currency: 'usd-cents', index_cost_key: '1x1', - index_input_cost_key: 'prompt_tokens', + index_input_cost_key: 'input', allowedQualityLevels: [''], costs: { - input: 30, // $0.30 per 1M prompt tokens, however google counts them - '1x1': 3.9, // $0.039 per image, just used for extiamte input allowed usage - '2x3': 3.9, // $0.039 per image, just used for extiamte input allowed usage - '3x2': 3.9, // $0.039 per image, just used for extiamte input allowed usage - '3x4': 3.9, // $0.039 per image, just used for extiamte input allowed usage - '4x3': 3.9, // $0.039 per image, just used for extiamte input allowed usage - '4x5': 3.9, // $0.039 per image, just used for extiamte input allowed usage - '5x4': 3.9, // $0.039 per image, just used for extiamte input allowed usage - '9x16': 3.9, // $0.039 per image, just used for extiamte input allowed usage - '16x9': 3.9, // $0.039 per image, just used for extiamte input allowed usage - '21x9': 3.9, // $0.039 per image, just used for extiamte input allowed usage + input: 30, // $0.30 per 1M input tokens (text/image) + output: 250, // $2.50 per 1M output tokens (text and thinking) + output_image: 3000, // $30.00 per 1M output image tokens + '1x1': 3.9, }, allowedRatios: [ { w: 1, h: 1 }, @@ -72,6 +81,7 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ version: '1.0', costs_currency: 'usd-cents', index_cost_key: '1K:1x1', + index_input_cost_key: 'input', aliases: [ 'gemini-3-pro-image-preview', 'gemini-3-pro-image', @@ -93,38 +103,10 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ { w: 21, h: 9 }, ], costs: { - input: 200, // $2.00 per 1M input tokens, however google counts them - output: 1200, // $12.00 per 1M output tokens, however google counts them - '1K:1x1': 13.4, // $0.134 per image - '1K:2x3': 13.4, // $0.134 per image - '1K:3x2': 13.4, // $0.134 per image - '1K:3x4': 13.4, // $0.134 per image - '1K:4x3': 13.4, // $0.134 per image - '1K:4x5': 13.4, // $0.134 per image - '1K:5x4': 13.4, // $0.134 per image - '1K:9x16': 13.4, // $0.134 per image - '1K:16x9': 13.4, // $0.134 per image - '1K:21x9': 13.4, // $0.134 per image - '2K:1x1': 13.4, // $0.134 per image - '2K:2x3': 13.4, // $0.134 per image - '2K:3x2': 13.4, // $0.134 per image - '2K:3x4': 13.4, // $0.134 per image - '2K:4x3': 13.4, // $0.134 per image - '2K:4x5': 13.4, // $0.134 per image - '2K:5x4': 13.4, // $0.134 per image - '2K:9x16': 13.4, // $0.134 per image - '2K:16x9': 13.4, // $0.134 per image - '2K:21x9': 13.4, // $0.134 per image - '4K:1x1': 24, // $0.24 per image - '4K:2x3': 24, // $0.24 per image - '4K:3x2': 24, // $0.24 per image - '4K:3x4': 24, // $0.24 per image - '4K:4x3': 24, // $0.24 per image - '4K:4x5': 24, // $0.24 per image - '4K:5x4': 24, // $0.24 per image - '4K:9x16': 24, // $0.24 per image - '4K:16x9': 24, // $0.24 per image - '4K:21x9': 24, // $0.24 per image + input: 200, // $2.00 per 1M input tokens (text/image) + output: 1200, // $12.00 per 1M output tokens (text and thinking) + output_image: 12000, // $120.00 per 1M output image tokens + '1K:1x1': 13.4, }, }, { @@ -134,6 +116,7 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ version: '1.0', costs_currency: 'usd-cents', index_cost_key: '1K:1x1', + index_input_cost_key: 'input', aliases: [ 'gemini-3.1-flash-image-preview', 'gemini-3.1-flash-image', @@ -159,64 +142,10 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ { w: 21, h: 9 }, ], costs: { - input: 25, // $0.25 per 1M input tokens, however google counts them - output: 150, // $1.50 per 1M output tokens, however google counts them - '0.5K:1x1': 4.5, // $0.045 per image - '0.5K:1x4': 4.5, // $0.045 per image - '0.5K:1x8': 4.5, // $0.045 per image - '0.5K:2x3': 4.5, // $0.045 per image - '0.5K:3x2': 4.5, // $0.045 per image - '0.5K:3x4': 4.5, // $0.045 per image - '0.5K:4x1': 4.5, // $0.045 per image - '0.5K:4x3': 4.5, // $0.045 per image - '0.5K:4x5': 4.5, // $0.045 per image - '0.5K:5x4': 4.5, // $0.045 per image - '0.5K:8x1': 4.5, // $0.045 per image - '0.5K:9x16': 4.5, // $0.045 per image - '0.5K:16x9': 4.5, // $0.045 per image - '0.5K:21x9': 4.5, // $0.045 per image - '1K:1x1': 6.7, // $0.067 per image - '1K:1x4': 6.7, // $0.067 per image - '1K:1x8': 6.7, // $0.067 per image - '1K:2x3': 6.7, // $0.067 per image - '1K:3x2': 6.7, // $0.067 per image - '1K:3x4': 6.7, // $0.067 per image - '1K:4x1': 6.7, // $0.067 per image - '1K:4x3': 6.7, // $0.067 per image - '1K:4x5': 6.7, // $0.067 per image - '1K:5x4': 6.7, // $0.067 per image - '1K:8x1': 6.7, // $0.067 per image - '1K:9x16': 6.7, // $0.067 per image - '1K:16x9': 6.7, // $0.067 per image - '1K:21x9': 6.7, // $0.067 per image - '2K:1x1': 10.1, // $0.101 per image - '2K:1x4': 10.1, // $0.101 per image - '2K:1x8': 10.1, // $0.101 per image - '2K:2x3': 10.1, // $0.101 per image - '2K:3x2': 10.1, // $0.101 per image - '2K:3x4': 10.1, // $0.101 per image - '2K:4x1': 10.1, // $0.101 per image - '2K:4x3': 10.1, // $0.101 per image - '2K:4x5': 10.1, // $0.101 per image - '2K:5x4': 10.1, // $0.101 per image - '2K:8x1': 10.1, // $0.101 per image - '2K:9x16': 10.1, // $0.101 per image - '2K:16x9': 10.1, // $0.101 per image - '2K:21x9': 10.1, // $0.101 per image - '4K:1x1': 15.1, // $0.151 per image - '4K:1x4': 15.1, // $0.151 per image - '4K:1x8': 15.1, // $0.151 per image - '4K:2x3': 15.1, // $0.151 per image - '4K:3x2': 15.1, // $0.151 per image - '4K:3x4': 15.1, // $0.151 per image - '4K:4x1': 15.1, // $0.151 per image - '4K:4x3': 15.1, // $0.151 per image - '4K:4x5': 15.1, // $0.151 per image - '4K:5x4': 15.1, // $0.151 per image - '4K:8x1': 15.1, // $0.151 per image - '4K:9x16': 15.1, // $0.151 per image - '4K:16x9': 15.1, // $0.151 per image - '4K:21x9': 15.1, // $0.151 per image + input: 25, // $0.25 per 1M input tokens (text/image) + output: 150, // $1.50 per 1M output tokens (text and thinking) + output_image: 6000, // $60.00 per 1M output image tokens + '1K:1x1': 6.7, }, }, ]; diff --git a/src/backend/src/services/ai/image/providers/types.ts b/src/backend/src/services/ai/image/providers/types.ts index 2ed87ab87..eac489101 100644 --- a/src/backend/src/services/ai/image/providers/types.ts +++ b/src/backend/src/services/ai/image/providers/types.ts @@ -21,6 +21,9 @@ export interface IGenerateParams { provider?: string, test_mode?: boolean quality?: string, + input_image?: string, + input_image_mime_type?: string, + input_images?: string[], }; export interface IImageProvider {