add gpt image 2 (#2829)

* add gpt image 2 * index cost key * docs + default low
2026-05-03 08:00:32 +00:00 · 2026-04-22 21:00:18 +03:00
parent b886dde3d6
commit f14f1bf49e
3 changed files with 128 additions and 10 deletions
@@ -85,9 +85,15 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
            throw new Error('`prompt` must be a string');
        }
-        const validRations = selectedModel?.allowedRatios;
+        const validRatios = selectedModel?.allowedRatios;
-        if ( validRations && (!ratio || !validRations.some(r => r.w === ratio.w && r.h === ratio.h)) ) {
+        if ( validRatios ) {
-            ratio = validRations[0]; // Default to the first allowed ratio
+            if ( !ratio || !validRatios.some(r => r.w === ratio.w && r.h === ratio.h) ) {
                ratio = validRatios[0]; // Default to the first allowed ratio
            }
        } else {
            // Open-ended size models (gpt-image-2): conform to OpenAI's size
            // rules (16px multiples, 3840 cap, 3:1 ratio, pixel budget).
            ratio = this.#normalizeGptImage2Ratio(ratio);
        }
        if ( ! ratio ) {
@@ -101,7 +107,10 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
        const size = `${ratio.w}x${ratio.h}`;
        const price_key = this.#buildPriceKey(selectedModel.id, quality!, size);
-        const outputPriceInCents = selectedModel?.costs[price_key];
+        let outputPriceInCents: number | undefined = selectedModel?.costs[price_key];
        if ( outputPriceInCents === undefined ) {
            outputPriceInCents = this.#estimateOutputCostFromTokens(selectedModel, ratio, quality);
        }
        if ( outputPriceInCents === undefined ) {
            const availableSizes = Object.keys(selectedModel?.costs)
                .filter(key => !OpenAiImageGenerationProvider.#NON_SIZE_COST_KEYS.includes(key));
@@ -412,8 +421,96 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
    }
    #isGptImageModel (model: string) {
-        // Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5 and future variants.
+        // Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5, gpt-image-2 and future variants.
-        return model.startsWith('gpt-image-1');
+        return model.startsWith('gpt-image-');
    }
    // gpt-image-2 size rules: each edge in [16, 3840] and a multiple of 16,
    // long:short ratio ≤ 3:1, pixel count in [655360, 8294400]. Silently
    // clamps/snaps rather than throwing so arbitrary user input is accepted.
    // https://developers.openai.com/api/docs/guides/image-generation
    #normalizeGptImage2Ratio (ratio?: { w: number; h: number }) {
        const MIN_EDGE = 16;
        const MAX_EDGE = 3840;
        const STEP = 16;
        const MAX_RATIO = 3;
        const MIN_PIXELS = 655_360;
        const MAX_PIXELS = 8_294_400;
        let w = Number(ratio?.w);
        let h = Number(ratio?.h);
        if ( !Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0 ) {
            return { w: 1024, h: 1024 };
        }
        // 1. Clamp long:short ratio to MAX_RATIO by shrinking the longer edge.
        if ( w / h > MAX_RATIO ) w = h * MAX_RATIO;
        else if ( h / w > MAX_RATIO ) h = w * MAX_RATIO;
        // 2. Cap each edge at MAX_EDGE, preserving aspect ratio.
        if ( w > MAX_EDGE ) {
            const s = MAX_EDGE / w; w = MAX_EDGE; h *= s;
        }
        if ( h > MAX_EDGE ) {
            const s = MAX_EDGE / h; h = MAX_EDGE; w *= s;
        }
        // 3. Scale uniformly into the pixel budget.
        const prescaledPixels = w * h;
        if ( prescaledPixels < MIN_PIXELS ) {
            const s = Math.sqrt(MIN_PIXELS / prescaledPixels);
            w *= s; h *= s;
        } else if ( prescaledPixels > MAX_PIXELS ) {
            const s = Math.sqrt(MAX_PIXELS / prescaledPixels);
            w *= s; h *= s;
        }
        // 4. Snap to STEP. Bias rounding direction so snap doesn't push pixels
        //    back out of the budget.
        const dir = prescaledPixels < MIN_PIXELS ? 1
            : prescaledPixels > MAX_PIXELS ? -1
                : 0;
        const snap = (v: number) => {
            const snapped = dir > 0 ? Math.ceil(v / STEP) * STEP
                : dir < 0 ? Math.floor(v / STEP) * STEP
                    : Math.round(v / STEP) * STEP;
            return Math.max(MIN_EDGE, Math.min(MAX_EDGE, snapped));
        };
        w = snap(w); h = snap(h);
        // 5. If snap rounding pushed ratio above MAX_RATIO, trim the longer
        //    edge by one STEP. Pixel budget had headroom from step 3 so this
        //    won't drop below MIN_PIXELS.
        if ( Math.max(w, h) / Math.min(w, h) > MAX_RATIO ) {
            if ( w >= h ) w = Math.max(MIN_EDGE, w - STEP);
            else h = Math.max(MIN_EDGE, h - STEP);
        }
        return { w, h };
    }
    // extracted from calculator at https://developers.openai.com/api/docs/guides/image-generation#cost-and-latency
    #estimateGptImage2OutputTokens (width: number, height: number, quality?: string): number {
        const FACTORS: Record<string, number> = { low: 16, medium: 48, high: 96 };
        const factor = FACTORS[quality ?? ''] ?? FACTORS.medium;
        const longEdge = Math.max(width, height);
        const shortEdge = Math.min(width, height);
        const shortLatent = Math.round(factor * shortEdge / longEdge);
        const latentW = width >= height ? factor : shortLatent;
        const latentH = width >= height ? shortLatent : factor;
        const baseArea = latentW * latentH;
        return Math.ceil(baseArea * (2_000_000 + width * height) / 4_000_000);
    }
    #estimateOutputCostFromTokens (
        selectedModel: IImageModel,
        ratio: { w: number; h: number },
        quality?: string,
    ): number | undefined {
        if ( ! selectedModel.id.startsWith('gpt-image-2') ) return undefined;
        const rate = this.#getCostRate(selectedModel, 'image_output');
        if ( rate === undefined ) return undefined;
        const tokens = this.#estimateGptImage2OutputTokens(ratio.w, ratio.h, quality);
        return this.#costForTokens(tokens, rate);
    }
    #buildPriceKey (model: string, quality: string, size: string) {
@@ -1,6 +1,27 @@
 import { IImageModel } from '../types';
 export const OPEN_AI_IMAGE_GENERATION_MODELS: IImageModel[] = [
    {
        puterId: 'openai:openai/gpt-image-2',
        id: 'gpt-image-2',
        aliases: ['openai/gpt-image-2', 'gpt-image-2-2026-04-21'],
        name: 'GPT Image 2',
        version: '2.0',
        costs_currency: 'usd-cents',
        index_cost_key: 'low:1024x1024',
        costs: {
            // Text tokens (per 1M tokens)
            text_input: 500, // $5.00
            text_cached_input: 125, // $1.25
            text_output: 1000, // $10.00
            // Image tokens (per 1M tokens)
            image_input: 800, // $8.00
            image_cached_input: 200, // $2.00
            image_output: 3000, // $30.00
            'low:1024x1024': 0.588,
        },
        allowedQualityLevels: ['low', 'medium', 'high', 'auto'],
    },
    {
        puterId: 'openai:openai/gpt-image-1.5',
        id: 'gpt-image-1.5',
@@ -37,13 +37,13 @@ Additional settings for the generation request. Available options depend on the
 #### OpenAI Options
-Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`):
+Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-2`, `gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`):
 | Option | Type | Description |
 |--------|------|-------------|
-| `model` | `String` | Image model to use. Available: `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` |
+| `model` | `String` | Image model to use. Available: `'gpt-image-2'`, `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` |
-| `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`). For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) |
+| `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`); `gpt-image-2` also accepts `'auto'`. For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) |
-| `ratio` | `Object` | Aspect ratio with `w` and `h` properties |
+| `ratio` | `Object` | Aspect ratio with `w` and `h` properties. `gpt-image-2` accepts arbitrary sizes; other GPT models and DALL-E are restricted to fixed sizes |
 For more details, see the [OpenAI API reference](https://platform.openai.com/docs/api-reference/images/create).