mirror of
https://github.com/HeyPuter/puter.git
synced 2026-05-03 08:00:32 +00:00
add gpt image 2 (#2829)
Docker Image CI / build-and-push-image (push) Has been cancelled
Maintain Release Merge PR / update-release-pr (push) Has been cancelled
Notify HeyPuter / notify (push) Has been cancelled
release-please / release-please (push) Has been cancelled
test / test-backend (24.x) (push) Has been cancelled
test / API tests (node env, api-test) (24.x) (push) Has been cancelled
test / puterjs (node env, vitest) (24.x) (push) Has been cancelled
Docker Image CI / build-and-push-image (push) Has been cancelled
Maintain Release Merge PR / update-release-pr (push) Has been cancelled
Notify HeyPuter / notify (push) Has been cancelled
release-please / release-please (push) Has been cancelled
test / test-backend (24.x) (push) Has been cancelled
test / API tests (node env, api-test) (24.x) (push) Has been cancelled
test / puterjs (node env, vitest) (24.x) (push) Has been cancelled
* add gpt image 2 * index cost key * docs + default low
This commit is contained in:
+103
-6
@@ -85,9 +85,15 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
|
||||
throw new Error('`prompt` must be a string');
|
||||
}
|
||||
|
||||
const validRations = selectedModel?.allowedRatios;
|
||||
if ( validRations && (!ratio || !validRations.some(r => r.w === ratio.w && r.h === ratio.h)) ) {
|
||||
ratio = validRations[0]; // Default to the first allowed ratio
|
||||
const validRatios = selectedModel?.allowedRatios;
|
||||
if ( validRatios ) {
|
||||
if ( !ratio || !validRatios.some(r => r.w === ratio.w && r.h === ratio.h) ) {
|
||||
ratio = validRatios[0]; // Default to the first allowed ratio
|
||||
}
|
||||
} else {
|
||||
// Open-ended size models (gpt-image-2): conform to OpenAI's size
|
||||
// rules (16px multiples, 3840 cap, 3:1 ratio, pixel budget).
|
||||
ratio = this.#normalizeGptImage2Ratio(ratio);
|
||||
}
|
||||
|
||||
if ( ! ratio ) {
|
||||
@@ -101,7 +107,10 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
|
||||
|
||||
const size = `${ratio.w}x${ratio.h}`;
|
||||
const price_key = this.#buildPriceKey(selectedModel.id, quality!, size);
|
||||
const outputPriceInCents = selectedModel?.costs[price_key];
|
||||
let outputPriceInCents: number | undefined = selectedModel?.costs[price_key];
|
||||
if ( outputPriceInCents === undefined ) {
|
||||
outputPriceInCents = this.#estimateOutputCostFromTokens(selectedModel, ratio, quality);
|
||||
}
|
||||
if ( outputPriceInCents === undefined ) {
|
||||
const availableSizes = Object.keys(selectedModel?.costs)
|
||||
.filter(key => !OpenAiImageGenerationProvider.#NON_SIZE_COST_KEYS.includes(key));
|
||||
@@ -412,8 +421,96 @@ export class OpenAiImageGenerationProvider implements IImageProvider {
|
||||
}
|
||||
|
||||
#isGptImageModel (model: string) {
|
||||
// Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5 and future variants.
|
||||
return model.startsWith('gpt-image-1');
|
||||
// Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5, gpt-image-2 and future variants.
|
||||
return model.startsWith('gpt-image-');
|
||||
}
|
||||
|
||||
// gpt-image-2 size rules: each edge in [16, 3840] and a multiple of 16,
|
||||
// long:short ratio ≤ 3:1, pixel count in [655360, 8294400]. Silently
|
||||
// clamps/snaps rather than throwing so arbitrary user input is accepted.
|
||||
// https://developers.openai.com/api/docs/guides/image-generation
|
||||
#normalizeGptImage2Ratio (ratio?: { w: number; h: number }) {
|
||||
const MIN_EDGE = 16;
|
||||
const MAX_EDGE = 3840;
|
||||
const STEP = 16;
|
||||
const MAX_RATIO = 3;
|
||||
const MIN_PIXELS = 655_360;
|
||||
const MAX_PIXELS = 8_294_400;
|
||||
|
||||
let w = Number(ratio?.w);
|
||||
let h = Number(ratio?.h);
|
||||
if ( !Number.isFinite(w) || !Number.isFinite(h) || w <= 0 || h <= 0 ) {
|
||||
return { w: 1024, h: 1024 };
|
||||
}
|
||||
|
||||
// 1. Clamp long:short ratio to MAX_RATIO by shrinking the longer edge.
|
||||
if ( w / h > MAX_RATIO ) w = h * MAX_RATIO;
|
||||
else if ( h / w > MAX_RATIO ) h = w * MAX_RATIO;
|
||||
|
||||
// 2. Cap each edge at MAX_EDGE, preserving aspect ratio.
|
||||
if ( w > MAX_EDGE ) {
|
||||
const s = MAX_EDGE / w; w = MAX_EDGE; h *= s;
|
||||
}
|
||||
if ( h > MAX_EDGE ) {
|
||||
const s = MAX_EDGE / h; h = MAX_EDGE; w *= s;
|
||||
}
|
||||
|
||||
// 3. Scale uniformly into the pixel budget.
|
||||
const prescaledPixels = w * h;
|
||||
if ( prescaledPixels < MIN_PIXELS ) {
|
||||
const s = Math.sqrt(MIN_PIXELS / prescaledPixels);
|
||||
w *= s; h *= s;
|
||||
} else if ( prescaledPixels > MAX_PIXELS ) {
|
||||
const s = Math.sqrt(MAX_PIXELS / prescaledPixels);
|
||||
w *= s; h *= s;
|
||||
}
|
||||
|
||||
// 4. Snap to STEP. Bias rounding direction so snap doesn't push pixels
|
||||
// back out of the budget.
|
||||
const dir = prescaledPixels < MIN_PIXELS ? 1
|
||||
: prescaledPixels > MAX_PIXELS ? -1
|
||||
: 0;
|
||||
const snap = (v: number) => {
|
||||
const snapped = dir > 0 ? Math.ceil(v / STEP) * STEP
|
||||
: dir < 0 ? Math.floor(v / STEP) * STEP
|
||||
: Math.round(v / STEP) * STEP;
|
||||
return Math.max(MIN_EDGE, Math.min(MAX_EDGE, snapped));
|
||||
};
|
||||
w = snap(w); h = snap(h);
|
||||
|
||||
// 5. If snap rounding pushed ratio above MAX_RATIO, trim the longer
|
||||
// edge by one STEP. Pixel budget had headroom from step 3 so this
|
||||
// won't drop below MIN_PIXELS.
|
||||
if ( Math.max(w, h) / Math.min(w, h) > MAX_RATIO ) {
|
||||
if ( w >= h ) w = Math.max(MIN_EDGE, w - STEP);
|
||||
else h = Math.max(MIN_EDGE, h - STEP);
|
||||
}
|
||||
return { w, h };
|
||||
}
|
||||
|
||||
// extracted from calculator at https://developers.openai.com/api/docs/guides/image-generation#cost-and-latency
|
||||
#estimateGptImage2OutputTokens (width: number, height: number, quality?: string): number {
|
||||
const FACTORS: Record<string, number> = { low: 16, medium: 48, high: 96 };
|
||||
const factor = FACTORS[quality ?? ''] ?? FACTORS.medium;
|
||||
const longEdge = Math.max(width, height);
|
||||
const shortEdge = Math.min(width, height);
|
||||
const shortLatent = Math.round(factor * shortEdge / longEdge);
|
||||
const latentW = width >= height ? factor : shortLatent;
|
||||
const latentH = width >= height ? shortLatent : factor;
|
||||
const baseArea = latentW * latentH;
|
||||
return Math.ceil(baseArea * (2_000_000 + width * height) / 4_000_000);
|
||||
}
|
||||
|
||||
#estimateOutputCostFromTokens (
|
||||
selectedModel: IImageModel,
|
||||
ratio: { w: number; h: number },
|
||||
quality?: string,
|
||||
): number | undefined {
|
||||
if ( ! selectedModel.id.startsWith('gpt-image-2') ) return undefined;
|
||||
const rate = this.#getCostRate(selectedModel, 'image_output');
|
||||
if ( rate === undefined ) return undefined;
|
||||
const tokens = this.#estimateGptImage2OutputTokens(ratio.w, ratio.h, quality);
|
||||
return this.#costForTokens(tokens, rate);
|
||||
}
|
||||
|
||||
#buildPriceKey (model: string, quality: string, size: string) {
|
||||
|
||||
@@ -1,6 +1,27 @@
|
||||
import { IImageModel } from '../types';
|
||||
|
||||
export const OPEN_AI_IMAGE_GENERATION_MODELS: IImageModel[] = [
|
||||
{
|
||||
puterId: 'openai:openai/gpt-image-2',
|
||||
id: 'gpt-image-2',
|
||||
aliases: ['openai/gpt-image-2', 'gpt-image-2-2026-04-21'],
|
||||
name: 'GPT Image 2',
|
||||
version: '2.0',
|
||||
costs_currency: 'usd-cents',
|
||||
index_cost_key: 'low:1024x1024',
|
||||
costs: {
|
||||
// Text tokens (per 1M tokens)
|
||||
text_input: 500, // $5.00
|
||||
text_cached_input: 125, // $1.25
|
||||
text_output: 1000, // $10.00
|
||||
// Image tokens (per 1M tokens)
|
||||
image_input: 800, // $8.00
|
||||
image_cached_input: 200, // $2.00
|
||||
image_output: 3000, // $30.00
|
||||
'low:1024x1024': 0.588,
|
||||
},
|
||||
allowedQualityLevels: ['low', 'medium', 'high', 'auto'],
|
||||
},
|
||||
{
|
||||
puterId: 'openai:openai/gpt-image-1.5',
|
||||
id: 'gpt-image-1.5',
|
||||
|
||||
@@ -37,13 +37,13 @@ Additional settings for the generation request. Available options depend on the
|
||||
|
||||
#### OpenAI Options
|
||||
|
||||
Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`):
|
||||
Available when `provider: 'openai-image-generation'` or inferred from model (`gpt-image-2`, `gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`, `dall-e-3`):
|
||||
|
||||
| Option | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `model` | `String` | Image model to use. Available: `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` |
|
||||
| `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`). For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) |
|
||||
| `ratio` | `Object` | Aspect ratio with `w` and `h` properties |
|
||||
| `model` | `String` | Image model to use. Available: `'gpt-image-2'`, `'gpt-image-1.5'`, `'gpt-image-1'`, `'gpt-image-1-mini'`, `'dall-e-3'` |
|
||||
| `quality` | `String` | Image quality. For GPT models: `'high'`, `'medium'`, `'low'` (default: `'low'`); `gpt-image-2` also accepts `'auto'`. For DALL-E 3: `'hd'`, `'standard'` (default: `'standard'`) |
|
||||
| `ratio` | `Object` | Aspect ratio with `w` and `h` properties. `gpt-image-2` accepts arbitrary sizes; other GPT models and DALL-E are restricted to fixed sizes |
|
||||
|
||||
For more details, see the [OpenAI API reference](https://platform.openai.com/docs/api-reference/images/create).
|
||||
|
||||
|
||||
Reference in New Issue
Block a user