fix gemini image provider implementation (#2569)

* implement proper calls to gemini image * remove hardcoded image prices * fix model selection * add flash 3.1 image, fix pricing * also handle uris with data prefix * more efficient uri parsing * fix token calculation * re-add some costs for models page
2026-05-04 08:30:39 +00:00 · 2026-03-09 09:25:11 +03:00
parent 62df3df6a9
commit 06b66719fe
5 changed files with 193 additions and 167 deletions
@@ -125,6 +125,7 @@ class AIInterfaceService extends BaseService {
                        prompt: { type: 'string' },
                        quality: { type: 'string' },
                        model: { type: 'string' },
+                        provider: { type: 'string', optional: true },
                        ratio: { type: 'json' },
                        width: { type: 'number', optional: true },
                        height: { type: 'number', optional: true },
@@ -135,6 +136,7 @@ class AIInterfaceService extends BaseService {
                        n: { type: 'number', optional: true },
                        input_image: { type: 'string', optional: true },
                        input_image_mime_type: { type: 'string', optional: true },
+                        input_images: { type: 'json', optional: true },
                        image_url: { type: 'string', optional: true },
                        image_base64: { type: 'string', optional: true },
                        mask_image_url: { type: 'string', optional: true },
@@ -68,13 +68,13 @@ export class AIImageGenerationService extends BaseService {

    /** Driver interfaces */
    static IMPLEMENTS = {
-        ['driver-capabilities']: {
+        'driver-capabilities': {
            supports_test_mode (iface: string, method_name: string) {
                return iface === 'puter-image-generation' &&
                    method_name === 'generate';
            },
        },
-        ['puter-image-generation']: {
+        'puter-image-generation': {

            async generate (...parameters: Parameters<AIImageGenerationService['generate']>) {
                return (this as unknown as AIImageGenerationService).generate(...parameters);
@@ -88,11 +88,18 @@ export class AIImageGenerationService extends BaseService {
            return undefined;
        }

-        if ( ! provider ) {
-            return models[0];
+        if ( provider ) {
+            const model = models.find(m => m.provider === provider);
+            return model ?? models[0];
        }
-        const model = models.find(m => m.provider === provider);
-        return model ?? models[0];
+
+        // If no provider is specified, prefer a model whose puterId exactly matches the requested modelId.
+        const exactPuterIdMatch = models.find(m => m.puterId === modelId);
+        if ( exactPuterIdMatch ) {
+            return exactPuterIdMatch;
+        }
+
+        return models[0];
    }

    private async registerProviders () {
@@ -152,9 +159,11 @@ export class AIImageGenerationService extends BaseService {
            const provider = this.#providers[providerName];

            // alias all driver requests to go here to support legacy routing
-            this.driverService.register_service_alias(AIImageGenerationService.SERVICE_NAME,
-                            providerName,
-                            { iface: 'puter-image-generation' });
+            this.driverService.register_service_alias(
+                AIImageGenerationService.SERVICE_NAME,
+                providerName,
+                { iface: 'puter-image-generation' },
+            );

            // build model id map
            for ( const model of await provider.models() ) {
@@ -22,17 +22,20 @@ import APIError from '../../../../../api/APIError.js';
 import { ErrorService } from '../../../../../modules/core/ErrorService.js';
 import { Context } from '../../../../../util/context.js';
 import { MeteringService } from '../../../../MeteringService/MeteringService.js';
-import { GEMINI_DEFAULT_RATIO, GEMINI_IMAGE_GENERATION_MODELS } from './models.js';
+import { GEMINI_DEFAULT_RATIO, GEMINI_ESTIMATED_IMAGE_TOKENS, GEMINI_IMAGE_GENERATION_MODELS } from './models.js';
 import { IGenerateParams, IImageModel, IImageProvider } from '../types.js';

-type GeminiGenerateParams = IGenerateParams & {
-    input_image?: string;
-    input_image_mime_type?: string;
+const MIME_SIGNATURES: Record<string, string> = {
+    '/9j/': 'image/jpeg',
+    'iVBOR': 'image/png',
+    'UklGR': 'image/webp',
 };

 interface GeminiUsageMetadata {
    promptTokenCount: number;
    candidatesTokenCount: number;
+    candidatesTextTokenCount: number;
+    candidatesImageTokenCount: number;
    thoughtsTokenCount: number;
 }

@@ -59,9 +62,8 @@ export class GeminiImageGenerationProvider implements IImageProvider {
    }

    async generate (params: IGenerateParams): Promise<string> {
-        const { prompt, test_mode } = params;
-        let { model, ratio, quality } = params;
-        const { input_image, input_image_mime_type } = params as GeminiGenerateParams;
+        const { prompt, test_mode, input_image, input_image_mime_type, model, quality } = params;
+        let { ratio, input_images } = params;

        const selectedModel = this.models().find(m => m.id === model) || this.models().find(m => m.id === this.getDefaultModel())!;

@@ -76,28 +78,19 @@ export class GeminiImageGenerationProvider implements IImageProvider {
        const allowedRatios = selectedModel.allowedRatios ?? [GEMINI_DEFAULT_RATIO];
        ratio = ratio && this.#isValidRatio(ratio, allowedRatios) ? ratio : allowedRatios[0];

-        if ( input_image && !input_image_mime_type ) {
-            throw new Error('`input_image_mime_type` is required when `input_image` is provided');
+        // Backwards compat: merge singular input_image into input_images
+        if ( input_image && (!input_images || input_images.length === 0) ) {
+            input_images = [input_image];
        }

-        if ( input_image_mime_type && !input_image ) {
-            throw new Error('`input_image` is required when `input_image_mime_type` is provided');
-        }
-
-        if ( input_image_mime_type && !this.#isValidImageMimeType(input_image_mime_type) ) {
-            throw new Error('`input_image_mime_type` must be a valid image MIME type (image/png, image/jpeg, image/webp)');
-        }
-
-        const priceKey = `${quality ? `${quality}:` : ''}${ratio.w}x${ratio.h}`;
-        const priceInCents = selectedModel.costs[priceKey];
-        if ( priceInCents === undefined ) {
-            const availableSizes = Object.keys(selectedModel.costs)
-                .filter(key => key !== 'input' && key !== 'output');
-            throw APIError.create('field_invalid', undefined, {
-                key: 'size/quality combination',
-                expected: `one of: ${ availableSizes.join(', ')}`,
-                got: priceKey,
-            });
+        // Validate input images have detectable MIME types
+        if ( input_images?.length ) {
+            for ( const img of input_images ) {
+                const mime = this.#detectMimeType(img) ?? input_image_mime_type;
+                if ( ! mime ) {
+                    throw new Error('Could not detect MIME type for an input image. Provide a known image format (JPEG, PNG, WebP) or set `input_image_mime_type`.');
+                }
+            }
        }

        const actor = Context.get('actor');
@@ -110,9 +103,22 @@ export class GeminiImageGenerationProvider implements IImageProvider {
            });
        }

-        const estimatedPromptTokenCount = this.#estimatePromptTokenCount(prompt);
+        // --- Pre-flight cost estimation ---
+        const inputImageCount = input_images?.length ?? 0;
+        const estimatedImageInputTokens = inputImageCount * 560; // https://ai.google.dev/gemini-api/docs/pricing#gemini-3-pro-image-preview
+        const estimatedPromptTokenCount = this.#estimatePromptTokenCount(prompt) + estimatedImageInputTokens;
        const estimatedInputCostInCents = this.#calculateTokenCostInCents(estimatedPromptTokenCount, selectedModel.costs.input);
-        const estimatedOutputCostInCents = priceInCents;
+
+        // Estimate output image tokens
+        const imageTokenKey = quality ? `${selectedModel.id}:${quality}` : selectedModel.id;
+        const estimatedOutputImageTokens = GEMINI_ESTIMATED_IMAGE_TOKENS[imageTokenKey] ?? GEMINI_ESTIMATED_IMAGE_TOKENS[selectedModel.id];
+        if ( estimatedOutputImageTokens === undefined ) {
+            throw new Error(`No estimated image token count configured for '${imageTokenKey}'.`);
+        }
+        const estimatedOutputImageCostInCents = this.#calculateTokenCostInCents(estimatedOutputImageTokens, selectedModel.costs.output_image);
+        const estimatedOutputTextCostInCents = this.#calculateTokenCostInCents(50, selectedModel.costs.output); // small text overhead estimate
+        const estimatedOutputCostInCents = estimatedOutputImageCostInCents + estimatedOutputTextCostInCents;
+
        const estimatedTotalCostInMicroCents = this.#toMicroCents(estimatedInputCostInCents + estimatedOutputCostInCents);
        const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, estimatedTotalCostInMicroCents);

@@ -120,21 +126,38 @@ export class GeminiImageGenerationProvider implements IImageProvider {
            throw APIError.create('insufficient_funds');
        }

-        const contents = this.#buildContents(prompt, ratio, input_image, input_image_mime_type);
+        // --- API call ---
+        const contents = this.#buildContents(prompt, input_images, input_image_mime_type);
+        const aspectRatio = `${ratio.w}:${ratio.h}`;
+
+        const imageConfig: Record<string, string> = { aspectRatio };
+        if ( quality && selectedModel.allowedQualityLevels?.includes(quality) ) {
+            imageConfig.imageSize = quality;
+        }
+
        const response = await this.#client.models.generateContent({
            model: selectedModel.id,
            contents,
+            config: {
+                responseModalities: ['TEXT', 'IMAGE'],
+                imageConfig,
+            },
        });

+        // --- Actual cost calculation from response usage ---
        const usage = this.#extractUsageMetadata(response);
        const inputTokenCount = usage.promptTokenCount || estimatedPromptTokenCount;
-        const outputTokenCount = usage.candidatesTokenCount + usage.thoughtsTokenCount;
+
+        const outputTextTokenCount = usage.candidatesTextTokenCount + usage.thoughtsTokenCount;
+        const outputImageTokenCount = usage.candidatesImageTokenCount || estimatedOutputImageTokens;

        const inputCostInCents = this.#calculateTokenCostInCents(inputTokenCount, selectedModel.costs.input);
-        const outputTextCostInCents = this.#calculateTokenCostInCents(outputTokenCount, selectedModel.costs.output);
-        const outputCostInCents = priceInCents + outputTextCostInCents;
+        const outputTextCostInCents = this.#calculateTokenCostInCents(outputTextTokenCount, selectedModel.costs.output);
+        const outputImageCostInCents = this.#calculateTokenCostInCents(outputImageTokenCount, selectedModel.costs.output_image);
+        const outputCostInCents = outputTextCostInCents + outputImageCostInCents;

-        const usagePrefix = `gemini:${selectedModel.id}:${priceKey}`;
+        const totalOutputTokenCount = outputTextTokenCount + outputImageTokenCount;
+        const usagePrefix = `gemini:${selectedModel.id}`;
        this.#meteringService.batchIncrementUsages(actor, [
            {
                usageType: `${usagePrefix}:input`,
@@ -142,9 +165,14 @@ export class GeminiImageGenerationProvider implements IImageProvider {
                costOverride: this.#toMicroCents(inputCostInCents),
            },
            {
-                usageType: `${usagePrefix}:output`,
-                usageAmount: Math.max(outputTokenCount, 1),
-                costOverride: this.#toMicroCents(outputCostInCents),
+                usageType: `${usagePrefix}:output:text`,
+                usageAmount: Math.max(outputTextTokenCount, 1),
+                costOverride: this.#toMicroCents(outputTextCostInCents),
+            },
+            {
+                usageType: `${usagePrefix}:output:image`,
+                usageAmount: Math.max(outputImageTokenCount, 1),
+                costOverride: this.#toMicroCents(outputImageCostInCents),
            },
        ]);

@@ -155,7 +183,9 @@ export class GeminiImageGenerationProvider implements IImageProvider {
            inputCostInCents,
            outputCostInCents,
            inputTokenCount,
-            outputTokenCount,
+            outputTokenCount: totalOutputTokenCount,
+            outputTextTokenCount,
+            outputImageTokenCount,
        });

        const url = this.#extractImageUrl(response);
@@ -167,20 +197,24 @@ export class GeminiImageGenerationProvider implements IImageProvider {
        return url;
    }

-    #buildContents (prompt: string, ratio: { w: number; h: number }, input_image?: string, input_image_mime_type?: string) {
-        if ( input_image && input_image_mime_type ) {
-            return [
-                { text: `Generate a picture of dimensions ${parseInt(`${ratio.w}`)}x${parseInt(`${ratio.h}`)} with the prompt: ${prompt}` },
-                {
+    #buildContents (prompt: string, input_images?: string[], input_image_mime_type?: string) {
+        const parts: Record<string, unknown>[] = [{ text: prompt }];
+
+        if ( input_images?.length ) {
+            for ( const img of input_images ) {
+                const parsed = this.#parseDataUri(img);
+                const mimeType = parsed?.mimeType ?? this.#detectMimeType(img) ?? input_image_mime_type ?? 'image/png';
+                const rawBase64 = parsed?.base64 ?? img;
+                parts.push({
                    inlineData: {
-                        mimeType: input_image_mime_type,
-                        data: input_image,
+                        mimeType,
+                        data: rawBase64,
                    },
-                },
-            ];
+                });
+            }
        }

-        return `Generate a picture of dimensions ${parseInt(`${ratio.w}`)}x${parseInt(`${ratio.h}`)} with the prompt: ${prompt}`;
+        return parts;
    }

    #setResponseCostMetadata ({
@@ -191,6 +225,8 @@ export class GeminiImageGenerationProvider implements IImageProvider {
        outputCostInCents,
        inputTokenCount,
        outputTokenCount,
+        outputTextTokenCount,
+        outputImageTokenCount,
    }: {
        model: string;
        quality?: string;
@@ -199,6 +235,8 @@ export class GeminiImageGenerationProvider implements IImageProvider {
        outputCostInCents: number;
        inputTokenCount: number;
        outputTokenCount: number;
+        outputTextTokenCount: number;
+        outputImageTokenCount: number;
    }) {
        const clientDriverCall = Context.get('client_driver_call') as { response_metadata?: Record<string, unknown> } | undefined;
        const responseMetadata = clientDriverCall?.response_metadata;
@@ -218,6 +256,8 @@ export class GeminiImageGenerationProvider implements IImageProvider {
            ratio: `${ratio.w}x${ratio.h}`,
            input_tokens: inputTokenCount,
            output_tokens: outputTokenCount,
+            output_text_tokens: outputTextTokenCount,
+            output_image_tokens: outputImageTokenCount,
            input_microcents: this.#toMicroCents(inputCostInCents),
            output_microcents: this.#toMicroCents(outputCostInCents),
            total_microcents: this.#toMicroCents(totalCostInCents),
@@ -226,9 +266,27 @@ export class GeminiImageGenerationProvider implements IImageProvider {

    #extractUsageMetadata (response: GenerateContentResponse): GeminiUsageMetadata {
        const usage = (response as GenerateContentResponse & { usageMetadata?: Record<string, unknown> }).usageMetadata;
+
+        let candidatesImageTokenCount = 0;
+
+        const details = usage?.candidatesTokensDetails;
+        if ( Array.isArray(details) ) {
+            for ( const entry of details ) {
+                if ( entry?.modality === 'IMAGE' ) {
+                    candidatesImageTokenCount += this.#toSafeCount(entry.tokenCount);
+                }
+            }
+        }
+
+        // api only returns modality image, so calculate text tokens as candidates (output) - image tokens
+        const candidatesTokenCount = this.#toSafeCount(usage?.candidatesTokenCount);
+        const candidatesTextTokenCount = Math.max(0, candidatesTokenCount - candidatesImageTokenCount);
+
        return {
            promptTokenCount: this.#toSafeCount(usage?.promptTokenCount),
-            candidatesTokenCount: this.#toSafeCount(usage?.candidatesTokenCount),
+            candidatesTokenCount,
+            candidatesTextTokenCount,
+            candidatesImageTokenCount,
            thoughtsTokenCount: this.#toSafeCount(usage?.thoughtsTokenCount),
        };
    }
@@ -266,19 +324,44 @@ export class GeminiImageGenerationProvider implements IImageProvider {

        for ( const part of parts ) {
            if ( part?.inlineData?.data ) {
-                return `data:image/png;base64,${ part.inlineData.data}`;
+                const mimeType = part.inlineData.mimeType ?? 'image/png';
+                return `data:${mimeType};base64,${ part.inlineData.data}`;
            }
        }
        return undefined;
    }

+    #detectMimeType (data: string): string | undefined {
+        // Handle data URIs like "data:image/jpeg;base64,..."
+        const parsed = this.#parseDataUri(data);
+        if ( parsed ) {
+            return parsed.mimeType;
+        }
+
+        for ( const [signature, mimeType] of Object.entries(MIME_SIGNATURES) ) {
+            if ( data.startsWith(signature) ) {
+                return mimeType;
+            }
+        }
+        return undefined;
+    }
+
+    #parseDataUri (data: string): { mimeType: string; base64: string } | undefined {
+        if ( ! data.startsWith('data:image/') ) return undefined;
+
+        const commaIdx = data.indexOf(',');
+        if ( commaIdx === -1 ) return undefined;
+
+        const header = data.substring(5, commaIdx); // after "data:" up to ","
+        if ( ! header.endsWith(';base64') ) return undefined;
+
+        const mimeType = header.substring(0, header.length - 7); // strip ";base64"
+        if ( mimeType.length === 0 ) return undefined;
+
+        return { mimeType, base64: data.substring(commaIdx + 1) };
+    }
+
    #isValidRatio (ratio: { w: number; h: number }, allowedRatios: { w: number; h: number }[]) {
        return allowedRatios.some(r => r.w === ratio.w && r.h === ratio.h);
    }
-
-    #isValidImageMimeType (mimeType?: string) {
-        if ( ! mimeType ) return false;
-        const supportedTypes = ['image/png', 'image/jpeg', 'image/jpg', 'image/webp'];
-        return supportedTypes.includes(mimeType.toLowerCase());
-    }
 }
@@ -21,6 +21,22 @@ import { IImageModel } from '../types';

 export const GEMINI_DEFAULT_RATIO = { w: 1024, h: 1024 };

+// Estimated image output token counts for pre-flight cost checks.
+// These are based on Google's published pricing equivalences.
+// https://ai.google.dev/gemini-api/docs/image-generation#aspect_ratios_and_image_size
+export const GEMINI_ESTIMATED_IMAGE_TOKENS: Record<string, number> = {
+    'gemini-2.5-flash-image': 1290,
+
+    'gemini-3-pro-image-preview:1K': 1120,
+    'gemini-3-pro-image-preview:2K': 1120,
+    'gemini-3-pro-image-preview:4K': 2000,
+
+    'gemini-3.1-flash-image-preview:0.5K': 747,
+    'gemini-3.1-flash-image-preview:1K': 1120,
+    'gemini-3.1-flash-image-preview:2K': 1680,
+    'gemini-3.1-flash-image-preview:4K': 2520,
+};
+
 export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [
    {
        puterId: 'google:google/gemini-2.5-flash-image',
@@ -37,20 +53,13 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [
        version: '1.0',
        costs_currency: 'usd-cents',
        index_cost_key: '1x1',
-        index_input_cost_key: 'prompt_tokens',
+        index_input_cost_key: 'input',
        allowedQualityLevels: [''],
        costs: {
-            input: 30, // $0.30 per 1M prompt tokens, however google counts them
-            '1x1': 3.9, // $0.039 per image, just used for extiamte input allowed usage
-            '2x3': 3.9, // $0.039 per image, just used for extiamte input allowed usage
-            '3x2': 3.9, // $0.039 per image, just used for extiamte input allowed usage
-            '3x4': 3.9, // $0.039 per image, just used for extiamte input allowed usage
-            '4x3': 3.9, // $0.039 per image, just used for extiamte input allowed usage
-            '4x5': 3.9, // $0.039 per image, just used for extiamte input allowed usage
-            '5x4': 3.9, // $0.039 per image, just used for extiamte input allowed usage
-            '9x16': 3.9, // $0.039 per image, just used for extiamte input allowed usage
-            '16x9': 3.9, // $0.039 per image, just used for extiamte input allowed usage
-            '21x9': 3.9, // $0.039 per image, just used for extiamte input allowed usage
+            input: 30, // $0.30 per 1M input tokens (text/image)
+            output: 250, // $2.50 per 1M output tokens (text and thinking)
+            output_image: 3000, // $30.00 per 1M output image tokens
+            '1x1': 3.9,
        },
        allowedRatios: [
            { w: 1, h: 1 },
@@ -72,6 +81,7 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [
        version: '1.0',
        costs_currency: 'usd-cents',
        index_cost_key: '1K:1x1',
+        index_input_cost_key: 'input',
        aliases: [
            'gemini-3-pro-image-preview',
            'gemini-3-pro-image',
@@ -93,38 +103,10 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [
            { w: 21, h: 9 },
        ],
        costs: {
-            input: 200, // $2.00 per 1M input tokens, however google counts them
-            output: 1200, // $12.00 per 1M output tokens, however google counts them
-            '1K:1x1': 13.4, // $0.134 per image
-            '1K:2x3': 13.4, // $0.134 per image
-            '1K:3x2': 13.4, // $0.134 per image
-            '1K:3x4': 13.4, // $0.134 per image
-            '1K:4x3': 13.4, // $0.134 per image
-            '1K:4x5': 13.4, // $0.134 per image
-            '1K:5x4': 13.4, // $0.134 per image
-            '1K:9x16': 13.4, // $0.134 per image
-            '1K:16x9': 13.4, // $0.134 per image
-            '1K:21x9': 13.4, // $0.134 per image
-            '2K:1x1': 13.4, // $0.134 per image
-            '2K:2x3': 13.4, // $0.134 per image
-            '2K:3x2': 13.4, // $0.134 per image
-            '2K:3x4': 13.4, // $0.134 per image
-            '2K:4x3': 13.4, // $0.134 per image
-            '2K:4x5': 13.4, // $0.134 per image
-            '2K:5x4': 13.4, // $0.134 per image
-            '2K:9x16': 13.4, // $0.134 per image
-            '2K:16x9': 13.4, // $0.134 per image
-            '2K:21x9': 13.4, // $0.134 per image
-            '4K:1x1': 24, // $0.24 per image
-            '4K:2x3': 24, // $0.24 per image
-            '4K:3x2': 24, // $0.24 per image
-            '4K:3x4': 24, // $0.24 per image
-            '4K:4x3': 24, // $0.24 per image
-            '4K:4x5': 24, // $0.24 per image
-            '4K:5x4': 24, // $0.24 per image
-            '4K:9x16': 24, // $0.24 per image
-            '4K:16x9': 24, // $0.24 per image
-            '4K:21x9': 24, // $0.24 per image
+            input: 200, // $2.00 per 1M input tokens (text/image)
+            output: 1200, // $12.00 per 1M output tokens (text and thinking)
+            output_image: 12000, // $120.00 per 1M output image tokens
+            '1K:1x1': 13.4,
        },
    },
    {
@@ -134,6 +116,7 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [
        version: '1.0',
        costs_currency: 'usd-cents',
        index_cost_key: '1K:1x1',
+        index_input_cost_key: 'input',
        aliases: [
            'gemini-3.1-flash-image-preview',
            'gemini-3.1-flash-image',
@@ -159,64 +142,10 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [
            { w: 21, h: 9 },
        ],
        costs: {
-            input: 25, // $0.25 per 1M input tokens, however google counts them
-            output: 150, // $1.50 per 1M output tokens, however google counts them
-            '0.5K:1x1': 4.5, // $0.045 per image
-            '0.5K:1x4': 4.5, // $0.045 per image
-            '0.5K:1x8': 4.5, // $0.045 per image
-            '0.5K:2x3': 4.5, // $0.045 per image
-            '0.5K:3x2': 4.5, // $0.045 per image
-            '0.5K:3x4': 4.5, // $0.045 per image
-            '0.5K:4x1': 4.5, // $0.045 per image
-            '0.5K:4x3': 4.5, // $0.045 per image
-            '0.5K:4x5': 4.5, // $0.045 per image
-            '0.5K:5x4': 4.5, // $0.045 per image
-            '0.5K:8x1': 4.5, // $0.045 per image
-            '0.5K:9x16': 4.5, // $0.045 per image
-            '0.5K:16x9': 4.5, // $0.045 per image
-            '0.5K:21x9': 4.5, // $0.045 per image
-            '1K:1x1': 6.7, // $0.067 per image
-            '1K:1x4': 6.7, // $0.067 per image
-            '1K:1x8': 6.7, // $0.067 per image
-            '1K:2x3': 6.7, // $0.067 per image
-            '1K:3x2': 6.7, // $0.067 per image
-            '1K:3x4': 6.7, // $0.067 per image
-            '1K:4x1': 6.7, // $0.067 per image
-            '1K:4x3': 6.7, // $0.067 per image
-            '1K:4x5': 6.7, // $0.067 per image
-            '1K:5x4': 6.7, // $0.067 per image
-            '1K:8x1': 6.7, // $0.067 per image
-            '1K:9x16': 6.7, // $0.067 per image
-            '1K:16x9': 6.7, // $0.067 per image
-            '1K:21x9': 6.7, // $0.067 per image
-            '2K:1x1': 10.1, // $0.101 per image
-            '2K:1x4': 10.1, // $0.101 per image
-            '2K:1x8': 10.1, // $0.101 per image
-            '2K:2x3': 10.1, // $0.101 per image
-            '2K:3x2': 10.1, // $0.101 per image
-            '2K:3x4': 10.1, // $0.101 per image
-            '2K:4x1': 10.1, // $0.101 per image
-            '2K:4x3': 10.1, // $0.101 per image
-            '2K:4x5': 10.1, // $0.101 per image
-            '2K:5x4': 10.1, // $0.101 per image
-            '2K:8x1': 10.1, // $0.101 per image
-            '2K:9x16': 10.1, // $0.101 per image
-            '2K:16x9': 10.1, // $0.101 per image
-            '2K:21x9': 10.1, // $0.101 per image
-            '4K:1x1': 15.1, // $0.151 per image
-            '4K:1x4': 15.1, // $0.151 per image
-            '4K:1x8': 15.1, // $0.151 per image
-            '4K:2x3': 15.1, // $0.151 per image
-            '4K:3x2': 15.1, // $0.151 per image
-            '4K:3x4': 15.1, // $0.151 per image
-            '4K:4x1': 15.1, // $0.151 per image
-            '4K:4x3': 15.1, // $0.151 per image
-            '4K:4x5': 15.1, // $0.151 per image
-            '4K:5x4': 15.1, // $0.151 per image
-            '4K:8x1': 15.1, // $0.151 per image
-            '4K:9x16': 15.1, // $0.151 per image
-            '4K:16x9': 15.1, // $0.151 per image
-            '4K:21x9': 15.1, // $0.151 per image
+            input: 25, // $0.25 per 1M input tokens (text/image)
+            output: 150, // $1.50 per 1M output tokens (text and thinking)
+            output_image: 6000, // $60.00 per 1M output image tokens
+            '1K:1x1': 6.7,
        },
    },
 ];
@@ -21,6 +21,9 @@ export interface IGenerateParams {
    provider?: string,
    test_mode?: boolean
    quality?: string,
+    input_image?: string,
+    input_image_mime_type?: string,
+    input_images?: string[],
 };
 export interface IImageProvider {