add gpt image 2 (#2829)
Docker Image CI / build-and-push-image (push) Has been cancelled
Maintain Release Merge PR / update-release-pr (push) Has been cancelled
Notify HeyPuter / notify (push) Has been cancelled
release-please / release-please (push) Has been cancelled
test / test-backend (24.x) (push) Has been cancelled
test / API tests (node env, api-test) (24.x) (push) Has been cancelled
test / puterjs (node env, vitest) (24.x) (push) Has been cancelled

* add gpt image 2

* index cost key

* docs + default low
This commit is contained in:
Shruc
2026-04-22 21:00:18 +03:00
committed by GitHub
parent b886dde3d6
commit f14f1bf49e
3 changed files with 128 additions and 10 deletions
@@ -85,9 +85,15 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
throw new Error('`prompt` must be a string'); throw new Error('`prompt` must be a string');
} }
const validRations = selectedModel?.allowedRatios; const validRatios = selectedModel?.allowedRatios;
if ( validRations && (!ratio || !validRations.some(r => r.w === ratio.w && r.h === ratio.h)) ) { if ( validRatios ) {
ratio = validRations[0]; // Default to the first allowed ratio if ( !ratio || !validRatios.some(r => r.w === ratio.w && r.h === ratio.h) ) {
ratio = validRatios[0]; // Default to the first allowed ratio
}
} else {
// Open-ended size models (gpt-image-2): conform to OpenAI's size
// rules (16px multiples, 3840 cap, 3:1 ratio, pixel budget).
ratio = this.#normalizeGptImage2Ratio(ratio);
} }
if ( ! ratio ) { if ( ! ratio ) {
@@ -101,7 +107,10 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
const size = `${ratio.w}x${ratio.h}`; const size = `${ratio.w}x${ratio.h}`;
const price_key = this.#buildPriceKey(selectedModel.id, quality!, size); const price_key = this.#buildPriceKey(selectedModel.id, quality!, size);
const outputPriceInCents = selectedModel?.costs[price_key]; let outputPriceInCents: number | undefined = selectedModel?.costs[price_key];
if ( outputPriceInCents === undefined ) {
outputPriceInCents = this.#estimateOutputCostFromTokens(selectedModel, ratio, quality);
}
if ( outputPriceInCents === undefined ) { if ( outputPriceInCents === undefined ) {
const availableSizes = Object.keys(selectedModel?.costs) const availableSizes = Object.keys(selectedModel?.costs)
.filter(key => !OpenAiImageGenerationProvider.#NON_SIZE_COST_KEYS.includes(key)); .filter(key => !OpenAiImageGenerationProvider.#NON_SIZE_COST_KEYS.includes(key));
@@ -412,8 +421,96 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
} }
#isGptImageModel (model: string) { #isGptImageModel (model: string) {
// Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5 and future variants. // Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5, gpt-image-2 and future variants.
return model.startsWith('gpt-image-1'); return model.startsWith('gpt-image-');
}
// gpt-image-2 size rules: each edge in [16, 3840] and a multiple of 16,
// long:short ratio ≤ 3:1, pixel count in [655360, 8294400]. Silently
// clamps/snaps rather than throwing so arbitrary user input is accepted.
// https://developers.openai.com/api/docs/guides/image-generation
#normalizeGptImage2Ratio (ratio?: { w: number; h: number }) {
const MIN_EDGE = 16;
const MAX_EDGE = 3840;
const STEP = 16;
const MAX_RATIO = 3;
const MIN_PIXELS = 655_360;
const MAX_PIXELS = 8_294_400;
let w = Number(ratio?.w);
let h = Number(ratio?.h);
if ( !Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0 ) {
return { w: 1024, h: 1024 };
}
// 1. Clamp long:short ratio to MAX_RATIO by shrinking the longer edge.
if ( w / h > MAX_RATIO ) w = h * MAX_RATIO;
else if ( h / w > MAX_RATIO ) h = w * MAX_RATIO;
// 2. Cap each edge at MAX_EDGE, preserving aspect ratio.
if ( w > MAX_EDGE ) {
const s = MAX_EDGE / w; w = MAX_EDGE; h *= s;
}
if ( h > MAX_EDGE ) {
const s = MAX_EDGE / h; h = MAX_EDGE; w *= s;
}
// 3. Scale uniformly into the pixel budget.
const prescaledPixels = w * h;
if ( prescaledPixels < MIN_PIXELS ) {
const s = Math.sqrt(MIN_PIXELS / prescaledPixels);
w *= s; h *= s;
} else if ( prescaledPixels > MAX_PIXELS ) {
const s = Math.sqrt(MAX_PIXELS / prescaledPixels);
w *= s; h *= s;
}
// 4. Snap to STEP. Bias rounding direction so snap doesn't push pixels
// back out of the budget.
const dir = prescaledPixels < MIN_PIXELS ? 1
: prescaledPixels > MAX_PIXELS ? -1
: 0;
const snap = (v: number) => {
const snapped = dir > 0 ? Math.ceil(v / STEP) * STEP
: dir < 0 ? Math.floor(v / STEP) * STEP
: Math.round(v / STEP) * STEP;
return Math.max(MIN_EDGE, Math.min(MAX_EDGE, snapped));
};
w = snap(w); h = snap(h);
// 5. If snap rounding pushed ratio above MAX_RATIO, trim the longer
// edge by one STEP. Pixel budget had headroom from step 3 so this
// won't drop below MIN_PIXELS.
if ( Math.max(w, h) / Math.min(w, h) > MAX_RATIO ) {
if ( w >= h ) w = Math.max(MIN_EDGE, w - STEP);
else h = Math.max(MIN_EDGE, h - STEP);
}
return { w, h };
}
// extracted from calculator at https://developers.openai.com/api/docs/guides/image-generation#cost-and-latency
#estimateGptImage2OutputTokens (width: number, height: number, quality?: string): number {
const FACTORS: Record<string, number> = { low: 16, medium: 48, high: 96 };
const factor = FACTORS[quality ?? ''] ?? FACTORS.medium;
const longEdge = Math.max(width, height);
const shortEdge = Math.min(width, height);
const shortLatent = Math.round(factor * shortEdge / longEdge);
const latentW = width >= height ? factor : shortLatent;
const latentH = width >= height ? shortLatent : factor;
const baseArea = latentW * latentH;
return Math.ceil(baseArea * (2_000_000 + width * height) / 4_000_000);
}
#estimateOutputCostFromTokens (
selectedModel: IImageModel,
ratio: { w: number; h: number },
quality?: string,
): number | undefined {
if ( ! selectedModel.id.startsWith('gpt-image-2') ) return undefined;
const rate = this.#getCostRate(selectedModel, 'image_output');
if ( rate === undefined ) return undefined;
const tokens = this.#estimateGptImage2OutputTokens(ratio.w, ratio.h, quality);
return this.#costForTokens(tokens, rate);
} }
#buildPriceKey (model: string, quality: string, size: string) { #buildPriceKey (model: string, quality: string, size: string) {
@@ -1,6 +1,27 @@
import { IImageModel } from '../types'; import { IImageModel } from '../types';
export const OPEN_AI_IMAGE_GENERATION_MODELS: IImageModel[] = [ export const OPEN_AI_IMAGE_GENERATION_MODELS: IImageModel[] = [
{
puterId: 'openai:openai/gpt-image-2',
id: 'gpt-image-2',
aliases: ['openai/gpt-image-2', 'gpt-image-2-2026-04-21'],
name: 'GPT Image 2',
version: '2.0',
costs_currency: 'usd-cents',
index_cost_key: 'low:1024x1024',
costs: {
// Text tokens (per 1M tokens)
text_input: 500, // $5.00
text_cached_input: 125, // $1.25
text_output: 1000, // $10.00
// Image tokens (per 1M tokens)
image_input: 800, // $8.00
image_cached_input: 200, // $2.00
image_output: 3000, // $30.00
'low:1024x1024': 0.588,
},
allowedQualityLevels: ['low', 'medium', 'high', 'auto'],
},
{ {
puterId: 'openai:openai/gpt-image-1.5', puterId: 'openai:openai/gpt-image-1.5',
id: 'gpt-image-1.5', id: 'gpt-image-1.5',
+4 -4
View File
@@ -37,13 +37,13 @@ Additional settings for the generation request. Available options depend on the
#### OpenAI Options #### OpenAI Options
Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`): Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-2`, `gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`):
| Option | Type | Description | | Option | Type | Description |
|--------|------|-------------| |--------|------|-------------|
| `model` | `String` | Image model to use. Available: `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` | | `model` | `String` | Image model to use. Available: `'gpt-image-2'`, `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` |
| `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`). For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) | | `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`); `gpt-image-2` also accepts `'auto'`. For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) |
| `ratio` | `Object` | Aspect ratio with `w` and `h` properties | | `ratio` | `Object` | Aspect ratio with `w` and `h` properties. `gpt-image-2` accepts arbitrary sizes; other GPT models and DALL-E are restricted to fixed sizes |
For more details, see the [OpenAI API reference](https://platform.openai.com/docs/api-reference/images/create). For more details, see the [OpenAI API reference](https://platform.openai.com/docs/api-reference/images/create).