diff --git a/package-lock.json b/package-lock.json index a9ae006d8..6f045c2cc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -406,6 +406,7 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.980.0.tgz", "integrity": "sha512-1rGhAx4cHZy3pMB3R3r84qMT5WEvQ6ajr2UksnD48fjQxwaUcpI6NsPvU5j/5BI5LqGiUO6ThOrMwSMm95twQA==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -459,6 +460,7 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/util-endpoints/-/util-endpoints-3.980.0.tgz", "integrity": "sha512-AjKBNEc+rjOZQE1HwcD9aCELqg1GmUj1rtICKuY8cgwB73xJ4U/kNyqKKpN2k9emGqlfDY2D8itIp/vDc6OKpw==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@aws-sdk/types": "^3.973.1", "@smithy/types": "^4.12.0", @@ -1303,7 +1305,6 @@ "version": "7.28.5", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -1620,7 +1621,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=18" }, @@ -1659,7 +1659,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=18" } @@ -3432,7 +3431,6 @@ "node_modules/@jimp/custom": { "version": "0.22.12", "license": "MIT", - "peer": true, "dependencies": { "@jimp/core": "^0.22.12" } @@ -3463,7 +3461,6 @@ "node_modules/@jimp/plugin-blit": { "version": "0.22.12", "license": "MIT", - "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3474,7 +3471,6 @@ "node_modules/@jimp/plugin-blur": { "version": "0.22.12", "license": "MIT", - "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3495,7 +3491,6 @@ "node_modules/@jimp/plugin-color": { "version": "0.22.12", "license": "MIT", - "peer": true, "dependencies": { "@jimp/utils": "^0.22.12", "tinycolor2": "^1.6.0" @@ -3533,7 +3528,6 @@ "node_modules/@jimp/plugin-crop": { "version": "0.22.12", "license": "MIT", - "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3637,7 +3631,6 @@ "node_modules/@jimp/plugin-resize": { "version": "0.22.12", "license": "MIT", - "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3648,7 +3641,6 @@ "node_modules/@jimp/plugin-rotate": { "version": "0.22.12", "license": "MIT", - "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3662,7 +3654,6 @@ "node_modules/@jimp/plugin-scale": { "version": "0.22.12", "license": "MIT", - "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3903,7 +3894,6 @@ "node_modules/@opentelemetry/api": { "version": "1.9.0", "license": "Apache-2.0", - "peer": true, "engines": { "node": ">=8.0.0" } @@ -5736,7 +5726,6 @@ "version": "8.48.0", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.48.0", "@typescript-eslint/types": "8.48.0", @@ -6368,7 +6357,6 @@ "node_modules/acorn": { "version": "8.15.0", "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -7026,7 +7014,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.25", "caniuse-lite": "^1.0.30001754", @@ -7225,7 +7212,6 @@ "node_modules/chai": { "version": "4.5.0", "license": "MIT", - "peer": true, "dependencies": { "assertion-error": "^1.1.0", "check-error": "^1.0.3", @@ -8519,7 +8505,6 @@ "node_modules/eslint": { "version": "9.39.1", "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -10478,7 +10463,6 @@ "resolved": "https://registry.npmjs.org/ioredis/-/ioredis-5.9.2.tgz", "integrity": "sha512-tAAg/72/VxOUW7RQSX1pIxJVucYKcjFjfvj60L57jrZpYCHC3XN0WCQ3sNYL4Gmvv+7GPvTAjc+KSdeNuE8oWQ==", "license": "MIT", - "peer": true, "dependencies": { "@ioredis/commands": "1.5.0", "cluster-key-slot": "^1.1.0", @@ -14341,7 +14325,6 @@ "version": "8.17.1", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -15783,7 +15766,6 @@ "version": "5.9.3", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -15956,7 +15938,6 @@ "version": "7.2.6", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -16048,7 +16029,6 @@ "version": "4.0.14", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@vitest/expect": "4.0.14", "@vitest/mocker": "4.0.14", @@ -16162,7 +16142,6 @@ "version": "5.103.0", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.8", @@ -16210,7 +16189,6 @@ "version": "5.1.4", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@discoveryjs/json-ext": "^0.5.0", "@webpack-cli/configtest": "^2.1.1", @@ -16409,7 +16387,6 @@ "node_modules/winston": { "version": "3.18.3", "license": "MIT", - "peer": true, "dependencies": { "@colors/colors": "^1.6.0", "@dabh/diagnostics": "^2.0.8", @@ -16527,7 +16504,6 @@ "node_modules/ws": { "version": "8.18.3", "license": "MIT", - "peer": true, "engines": { "node": ">=10.0.0" }, @@ -16717,7 +16693,6 @@ "node_modules/zod": { "version": "3.25.76", "license": "MIT", - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -16838,7 +16813,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.4.1.tgz", "integrity": "sha512-O2yRJce1GOc6PAy3QxFM4NzFiWzvScDC1/5ihYBL6BUEVdq0XMWN01sppE+H6bBXbaFYipjwFLEWLg5PaSOThA==", "license": "Apache-2.0", - "peer": true, "engines": { "node": ">=8.0.0" } @@ -16848,7 +16822,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api-logs/-/api-logs-0.49.1.tgz", "integrity": "sha512-kaNl/T7WzyMUQHQlVq7q0oV4Kev6+0xFwqzofryC66jgGMacd0QH5TwfpbUwSTby+SdAdprAe5UKMvBw4tKS5Q==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/api": "^1.0.0" }, @@ -19624,7 +19597,7 @@ }, "src/puter-js": { "name": "@heyputer/puter.js", - "version": "2.2.5", + "version": "2.2.10", "license": "Apache-2.0", "dependencies": { "@heyputer/kv.js": "^0.2.1", diff --git a/src/backend/src/services/ai/image/AIImageGenerationService.ts b/src/backend/src/services/ai/image/AIImageGenerationService.ts index b4e9c9ae2..41c7a7165 100644 --- a/src/backend/src/services/ai/image/AIImageGenerationService.ts +++ b/src/backend/src/services/ai/image/AIImageGenerationService.ts @@ -22,7 +22,6 @@ import { ErrorService } from '../../../modules/core/ErrorService.js'; import { Context } from '../../../util/context.js'; import BaseService from '../../BaseService.js'; import { BaseDatabaseAccessService } from '../../database/BaseDatabaseAccessService.js'; -import { DB_WRITE } from '../../database/consts.js'; import { DriverService } from '../../drivers/DriverService.js'; import { TypedValue } from '../../drivers/meta/Runtime.js'; import { EventService } from '../../EventService.js'; @@ -30,8 +29,8 @@ import { MeteringService } from '../../MeteringService/MeteringService.js'; import { GeminiImageGenerationProvider } from './providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.js'; import { OpenAiImageGenerationProvider } from './providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.js'; import { TogetherImageGenerationProvider } from './providers/TogetherImageGenerationProvider/TogetherImageGenerationProvider.js'; -import { XAIImageGenerationProvider } from './providers/XAIImageGenerationProvider/XAIImageGenerationProvider.js'; import { IGenerateParams, IImageModel, IImageProvider } from './providers/types.js'; +import { XAIImageGenerationProvider } from './providers/XAIImageGenerationProvider/XAIImageGenerationProvider.js'; export class AIImageGenerationService extends BaseService { @@ -44,7 +43,7 @@ export class AIImageGenerationService extends BaseService { } get db (): BaseDatabaseAccessService { - return this.services.get('database').get(DB_WRITE, 'ai-service'); + return this.services.get('database').get(); } get errorService (): ErrorService { diff --git a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts index d4004fe74..eca25e210 100644 --- a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts +++ b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts @@ -30,6 +30,12 @@ type GeminiGenerateParams = IGenerateParams & { input_image_mime_type?: string; }; +interface GeminiUsageMetadata { + promptTokenCount: number; + candidatesTokenCount: number; + thoughtsTokenCount: number; +} + export class GeminiImageGenerationProvider implements IImageProvider { #meteringService: MeteringService; #client: GoogleGenAI; @@ -85,7 +91,8 @@ export class GeminiImageGenerationProvider implements IImageProvider { const priceKey = `${quality ? `${quality}:` : ''}${ratio.w}x${ratio.h}`; const priceInCents = selectedModel.costs[priceKey]; if ( priceInCents === undefined ) { - const availableSizes = Object.keys(selectedModel.costs); + const availableSizes = Object.keys(selectedModel.costs) + .filter(key => key !== 'input' && key !== 'output'); throw APIError.create('field_invalid', undefined, { key: 'size/quality combination', expected: `one of: ${ availableSizes.join(', ')}`, @@ -103,8 +110,11 @@ export class GeminiImageGenerationProvider implements IImageProvider { }); } - const costInMicroCents = priceInCents * 1_000_000; - const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents); + const estimatedPromptTokenCount = this.#estimatePromptTokenCount(prompt); + const estimatedInputCostInCents = this.#calculateTokenCostInCents(estimatedPromptTokenCount, selectedModel.costs.input); + const estimatedOutputCostInCents = priceInCents; + const estimatedTotalCostInMicroCents = this.#toMicroCents(estimatedInputCostInCents + estimatedOutputCostInCents); + const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, estimatedTotalCostInMicroCents); if ( ! usageAllowed ) { throw APIError.create('insufficient_funds'); @@ -116,8 +126,37 @@ export class GeminiImageGenerationProvider implements IImageProvider { contents, }); - const usageType = `gemini:${selectedModel.id}:${priceKey}`; - this.#meteringService.incrementUsage(actor, usageType, 1, costInMicroCents); + const usage = this.#extractUsageMetadata(response); + const inputTokenCount = usage.promptTokenCount || estimatedPromptTokenCount; + const outputTokenCount = usage.candidatesTokenCount + usage.thoughtsTokenCount; + + const inputCostInCents = this.#calculateTokenCostInCents(inputTokenCount, selectedModel.costs.input); + const outputTextCostInCents = this.#calculateTokenCostInCents(outputTokenCount, selectedModel.costs.output); + const outputCostInCents = priceInCents + outputTextCostInCents; + + const usagePrefix = `gemini:${selectedModel.id}:${priceKey}`; + this.#meteringService.batchIncrementUsages(actor, [ + { + usageType: `${usagePrefix}:input`, + usageAmount: Math.max(inputTokenCount, 1), + costOverride: this.#toMicroCents(inputCostInCents), + }, + { + usageType: `${usagePrefix}:output`, + usageAmount: Math.max(outputTokenCount, 1), + costOverride: this.#toMicroCents(outputCostInCents), + }, + ]); + + this.#setResponseCostMetadata({ + model: selectedModel.id, + quality, + ratio, + inputCostInCents, + outputCostInCents, + inputTokenCount, + outputTokenCount, + }); const url = this.#extractImageUrl(response); @@ -144,6 +183,81 @@ export class GeminiImageGenerationProvider implements IImageProvider { return `Generate a picture of dimensions ${parseInt(`${ratio.w}`)}x${parseInt(`${ratio.h}`)} with the prompt: ${prompt}`; } + #setResponseCostMetadata ({ + model, + quality, + ratio, + inputCostInCents, + outputCostInCents, + inputTokenCount, + outputTokenCount, + }: { + model: string; + quality?: string; + ratio: { w: number; h: number }; + inputCostInCents: number; + outputCostInCents: number; + inputTokenCount: number; + outputTokenCount: number; + }) { + const clientDriverCall = Context.get('client_driver_call') as { response_metadata?: Record } | undefined; + const responseMetadata = clientDriverCall?.response_metadata; + if ( ! responseMetadata ) return; + + const totalCostInCents = inputCostInCents + outputCostInCents; + responseMetadata.cost = { + currency: 'usd-cents', + input: inputCostInCents, + output: outputCostInCents, + total: totalCostInCents, + }; + responseMetadata.cost_components = { + provider: 'gemini-image-generation', + model, + quality, + ratio: `${ratio.w}x${ratio.h}`, + input_tokens: inputTokenCount, + output_tokens: outputTokenCount, + input_microcents: this.#toMicroCents(inputCostInCents), + output_microcents: this.#toMicroCents(outputCostInCents), + total_microcents: this.#toMicroCents(totalCostInCents), + }; + } + + #extractUsageMetadata (response: GenerateContentResponse): GeminiUsageMetadata { + const usage = (response as GenerateContentResponse & { usageMetadata?: Record }).usageMetadata; + return { + promptTokenCount: this.#toSafeCount(usage?.promptTokenCount), + candidatesTokenCount: this.#toSafeCount(usage?.candidatesTokenCount), + thoughtsTokenCount: this.#toSafeCount(usage?.thoughtsTokenCount), + }; + } + + #estimatePromptTokenCount (prompt: string): number { + const text = prompt.trim(); + if ( text.length === 0 ) return 0; + + // Same approximation used by chat billing flow. + return Math.max(1, Math.floor(((text.length / 4) + (text.split(/\s+/).length * (4 / 3))) / 2)); + } + + #calculateTokenCostInCents (tokenCount: number, centsPerMillion?: number): number { + if ( !Number.isFinite(tokenCount) || tokenCount <= 0 ) return 0; + if ( !Number.isFinite(centsPerMillion) || (centsPerMillion ?? 0) <= 0 ) return 0; + + return (tokenCount / 1_000_000) * (centsPerMillion as number); + } + + #toMicroCents (cents: number): number { + if ( !Number.isFinite(cents) || cents <= 0 ) return 1; + return Math.ceil(cents * 1_000_000); + } + + #toSafeCount (value: unknown): number { + if ( typeof value !== 'number' || !Number.isFinite(value) || value < 0 ) return 0; + return Math.floor(value); + } + #extractImageUrl (response: GenerateContentResponse): string | undefined { const parts = response?.candidates?.[0]?.content?.parts; if ( ! Array.isArray(parts) ) { diff --git a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts index f6beb98f4..72bf27037 100644 --- a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts +++ b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts @@ -26,8 +26,10 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ puterId: 'google:google/gemini-2.5-flash-image', id: 'gemini-2.5-flash-image', aliases: [ - 'gemini-2.5-flash-image-preview', 'gemini-2.5-flash-image', - 'google/gemini-2.5-flash-image-preview', 'google/gemini-2.5-flash-image', + 'gemini-2.5-flash-image-preview', + 'gemini-2.5-flash-image', + 'google/gemini-2.5-flash-image-preview', + 'google/gemini-2.5-flash-image', 'google:google/gemini-2.5-flash-image-preview', ], @@ -35,18 +37,20 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ version: '1.0', costs_currency: 'usd-cents', index_cost_key: '1x1', + index_input_cost_key: 'prompt_tokens', allowedQualityLevels: [''], costs: { - '1x1': 3.9, // $0.039 per image - '2x3': 3.9, // $0.039 per image - '3x2': 3.9, // $0.039 per image - '3x4': 3.9, // $0.039 per image - '4x3': 3.9, // $0.039 per image - '4x5': 3.9, // $0.039 per image - '5x4': 3.9, // $0.039 per image - '9x16': 3.9, // $0.039 per image - '16x9': 3.9, // $0.039 per image - '21x9': 3.9, // $0.039 per image + input: 30, // $0.30 per 1M prompt tokens, however google counts them + '1x1': 3.9, // $0.039 per image, just used for extiamte input allowed usage + '2x3': 3.9, // $0.039 per image, just used for extiamte input allowed usage + '3x2': 3.9, // $0.039 per image, just used for extiamte input allowed usage + '3x4': 3.9, // $0.039 per image, just used for extiamte input allowed usage + '4x3': 3.9, // $0.039 per image, just used for extiamte input allowed usage + '4x5': 3.9, // $0.039 per image, just used for extiamte input allowed usage + '5x4': 3.9, // $0.039 per image, just used for extiamte input allowed usage + '9x16': 3.9, // $0.039 per image, just used for extiamte input allowed usage + '16x9': 3.9, // $0.039 per image, just used for extiamte input allowed usage + '21x9': 3.9, // $0.039 per image, just used for extiamte input allowed usage }, allowedRatios: [ { w: 1, h: 1 }, @@ -62,16 +66,17 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ ], }, { - puterId: 'google:google/gemini-3-pro-image', - id: 'gemini-3-pro-image', + puterId: 'google:google/gemini-3-pro-image-preview', + id: 'gemini-3-pro-image-preview', name: 'Gemini 3 Pro Image', version: '1.0', costs_currency: 'usd-cents', index_cost_key: '1K:1x1', aliases: [ - 'gemini-3-pro-image-preview', 'gemini-3-pro-image', - 'google/gemini-3-pro-image-preview', 'google/gemini-3-pro-image', - 'google:google/gemini-3-pro-image-preview', + 'gemini-3-pro-image', + 'google/gemini-3-pro-image-preview', + 'google/gemini-3-pro-image', + 'google:google/gemini-3-pro-image', ], allowedQualityLevels: ['1K', '2K', '4K'], allowedRatios: [ @@ -87,36 +92,38 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ { w: 21, h: 9 }, ], costs: { - '1K:1x1': 13.51, // $0.1351 per image - '1K:2x3': 13.51, // $0.1351 per image - '1K:3x2': 13.51, // $0.1351 per image - '1K:3x4': 13.51, // $0.1351 per image - '1K:4x3': 13.51, // $0.1351 per image - '1K:4x5': 13.51, // $0.1351 per image - '1K:5x4': 13.51, // $0.1351 per image - '1K:9x16': 13.51, // $0.1351 per image - '1K:16x9': 13.51, // $0.1351 per image - '1K:21x9': 13.51, // $0.1351 per image - '2K:1x1': 13.51, // $0.1351 per image - '2K:2x3': 13.51, // $0.1351 per image - '2K:3x2': 13.51, // $0.1351 per image - '2K:3x4': 13.51, // $0.1351 per image - '2K:4x3': 13.51, // $0.1351 per image - '2K:4x5': 13.51, // $0.1351 per image - '2K:5x4': 13.51, // $0.1351 per image - '2K:9x16': 13.51, // $0.1351 per image - '2K:16x9': 13.51, // $0.1351 per image - '2K:21x9': 13.51, // $0.1351 per image - '4K:1x1': 24.1, // $0.24 per image - '4K:2x3': 24.1, // $0.24 per image - '4K:3x2': 24.1, // $0.24 per image - '4K:3x4': 24.1, // $0.24 per image - '4K:4x3': 24.1, // $0.24 per image - '4K:4x5': 24.1, // $0.24 per image - '4K:5x4': 24.1, // $0.24 per image - '4K:9x16': 24.1, // $0.24 per image - '4K:16x9': 24.1, // $0.24 per image - '4K:21x9': 24.1, // $0.24 per image + input: 200, // $2.00 per 1M input tokens, however google counts them + output: 1200, // $12.00 per 1M output tokens, however google counts them + '1K:1x1': 13.4, // $0.134 per image + '1K:2x3': 13.4, // $0.134 per image + '1K:3x2': 13.4, // $0.134 per image + '1K:3x4': 13.4, // $0.134 per image + '1K:4x3': 13.4, // $0.134 per image + '1K:4x5': 13.4, // $0.134 per image + '1K:5x4': 13.4, // $0.134 per image + '1K:9x16': 13.4, // $0.134 per image + '1K:16x9': 13.4, // $0.134 per image + '1K:21x9': 13.4, // $0.134 per image + '2K:1x1': 13.4, // $0.134 per image + '2K:2x3': 13.4, // $0.134 per image + '2K:3x2': 13.4, // $0.134 per image + '2K:3x4': 13.4, // $0.134 per image + '2K:4x3': 13.4, // $0.134 per image + '2K:4x5': 13.4, // $0.134 per image + '2K:5x4': 13.4, // $0.134 per image + '2K:9x16': 13.4, // $0.134 per image + '2K:16x9': 13.4, // $0.134 per image + '2K:21x9': 13.4, // $0.134 per image + '4K:1x1': 24, // $0.24 per image + '4K:2x3': 24, // $0.24 per image + '4K:3x2': 24, // $0.24 per image + '4K:3x4': 24, // $0.24 per image + '4K:4x3': 24, // $0.24 per image + '4K:4x5': 24, // $0.24 per image + '4K:5x4': 24, // $0.24 per image + '4K:9x16': 24, // $0.24 per image + '4K:16x9': 24, // $0.24 per image + '4K:21x9': 24, // $0.24 per image }, }, ]; diff --git a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts index d4fa2aaa3..53fe18418 100644 --- a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts +++ b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/OpenAiImageGenerationProvider.ts @@ -18,13 +18,24 @@ */ import openai, { OpenAI } from 'openai'; -import { ImageGenerateParamsNonStreaming } from 'openai/resources/images.js'; +import { ImageGenerateParamsNonStreaming, ImagesResponse } from 'openai/resources/images.js'; import APIError from '../../../../../api/APIError.js'; import { ErrorService } from '../../../../../modules/core/ErrorService.js'; import { Context } from '../../../../../util/context.js'; import { MeteringService } from '../../../../MeteringService/MeteringService.js'; -import { IGenerateParams, IImageProvider } from '../types.js'; +import { IGenerateParams, IImageModel, IImageProvider } from '../types.js'; import { OPEN_AI_IMAGE_GENERATION_MODELS } from './models.js'; + +interface OpenAIImageUsage { + inputTokens: number; + outputTokens: number; + inputTextTokens: number; + inputImageTokens: number; + cachedInputTokens: number; + cachedInputTextTokens: number; + cachedInputImageTokens: number; +} + /** * Service class for generating images using OpenAI's DALL-E API. * Extends BaseService to provide image generation capabilities through @@ -37,6 +48,15 @@ export class OpenAiImageGenerationProvider implements IImageProvider { #openai: OpenAI; #errors: ErrorService; + static #NON_SIZE_COST_KEYS = [ + 'text_input', + 'text_cached_input', + 'text_output', + 'image_input', + 'image_cached_input', + 'image_output', + ]; + constructor (config: { apiKey: string }, meteringService: MeteringService, errorService: ErrorService) { this.#meteringService = meteringService; this.#openai = new openai.OpenAI({ @@ -81,8 +101,10 @@ export class OpenAiImageGenerationProvider implements IImageProvider { const size = `${ratio.w}x${ratio.h}`; const price_key = this.#buildPriceKey(selectedModel.id, quality!, size); - if ( ! selectedModel?.costs[price_key] ) { - const availableSizes = Object.keys(selectedModel?.costs); + const outputPriceInCents = selectedModel?.costs[price_key]; + if ( outputPriceInCents === undefined ) { + const availableSizes = Object.keys(selectedModel?.costs) + .filter(key => !OpenAiImageGenerationProvider.#NON_SIZE_COST_KEYS.includes(key)); throw APIError.create('field_invalid', undefined, { key: 'size/quality combination', expected: `one of: ${ availableSizes.join(', ')}`, @@ -100,8 +122,18 @@ export class OpenAiImageGenerationProvider implements IImageProvider { }); } - const costInMicroCents = selectedModel.costs[price_key] * 1_000_000; - const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents); + const estimatedPromptTokenCount = this.#estimatePromptTokenCount(prompt); + const estimatedInputCostInCents = this.#calculateInputCostInCents(selectedModel, { + inputTokens: estimatedPromptTokenCount, + inputTextTokens: estimatedPromptTokenCount, + inputImageTokens: 0, + cachedInputTokens: 0, + cachedInputTextTokens: 0, + cachedInputImageTokens: 0, + } as OpenAIImageUsage); + const estimatedOutputCostInCents = outputPriceInCents; + const estimatedTotalCostInMicroCents = this.#toMicroCents(estimatedInputCostInCents + estimatedOutputCostInCents); + const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, estimatedTotalCostInMicroCents); if ( ! usageAllowed ) { throw APIError.create('insufficient_funds'); @@ -117,18 +149,55 @@ export class OpenAiImageGenerationProvider implements IImageProvider { const result = await this.#openai.images.generate(apiParams); - // For image generation, usage is typically image count and resolution - const usageType = `openai:${selectedModel.id}:${price_key}`; - this.#meteringService.incrementUsage(actor, usageType, 1, costInMicroCents); + const usage = this.#extractUsage(result); + const hasInputTokenUsage = + usage.inputTokens > 0 || + usage.inputTextTokens > 0 || + usage.inputImageTokens > 0; + const hasOutputTokenUsage = usage.outputTokens > 0; - const spending_meta = { - model, - size: `${ratio.w}x${ratio.h}`, + const billableUsage = hasInputTokenUsage ? usage : { + ...usage, + inputTokens: estimatedPromptTokenCount, + inputTextTokens: estimatedPromptTokenCount, }; - if ( quality ) { - spending_meta.size = `${quality}:${ spending_meta.size}`; + const inputCostInCents = hasInputTokenUsage + ? this.#calculateInputCostInCents(selectedModel, billableUsage) + : estimatedInputCostInCents; + const outputCostInCents = this.#calculateOutputCostInCents(selectedModel, usage, outputPriceInCents); + + const usageType = `openai:${selectedModel.id}:${price_key}`; + const usageEntries: Array<{ usageType: string; usageAmount: number; costOverride: number }> = []; + if ( inputCostInCents > 0 ) { + usageEntries.push({ + usageType: `${usageType}:input`, + usageAmount: Math.max(billableUsage.inputTokens || estimatedPromptTokenCount, 1), + costOverride: this.#toMicroCents(inputCostInCents), + }); } + if ( outputCostInCents > 0 ) { + usageEntries.push({ + usageType: `${usageType}:output`, + usageAmount: Math.max(usage.outputTokens, 1), + costOverride: this.#toMicroCents(outputCostInCents), + }); + } + if ( usageEntries.length ) { + this.#meteringService.batchIncrementUsages(actor, usageEntries); + } + + this.#setResponseCostMetadata({ + model: selectedModel.id, + quality, + ratio, + inputCostInCents, + outputCostInCents, + usage: billableUsage, + inputUsageSource: hasInputTokenUsage ? 'token-usage' : 'prompt-estimate', + outputUsageSource: hasOutputTokenUsage ? 'token-usage' : 'per-image-fallback', + outputPriceInCents, + }); const url = result.data?.[0]?.url || (result.data?.[0]?.b64_json ? `data:image/png;base64,${ result.data[0].b64_json}` : null); @@ -139,6 +208,209 @@ export class OpenAiImageGenerationProvider implements IImageProvider { return url; } + #extractUsage (result: ImagesResponse): OpenAIImageUsage { + const usage = (result.usage ?? {}) as ImagesResponse.Usage & Record; + const inputTokens = this.#toSafeCount(usage.input_tokens); + const outputTokens = this.#toSafeCount(usage.output_tokens); + + const inputDetails = (usage.input_tokens_details ?? {}) as unknown as Record; + const inputTextTokens = this.#toSafeCount(inputDetails.text_tokens); + const inputImageTokens = this.#toSafeCount(inputDetails.image_tokens); + + const cachedInputTokens = Math.max( + this.#toSafeCount((usage as Record).cached_input_tokens), + this.#toSafeCount(inputDetails.cached_tokens), + ); + + const cachedDetails = ((inputDetails.cached_tokens_details || inputDetails.cache_tokens_details) ?? {}) as Record; + const cachedInputTextTokens = this.#toSafeCount(cachedDetails.text_tokens); + const cachedInputImageTokens = this.#toSafeCount(cachedDetails.image_tokens); + + return { + inputTokens, + outputTokens, + inputTextTokens, + inputImageTokens, + cachedInputTokens, + cachedInputTextTokens, + cachedInputImageTokens, + }; + } + + #calculateInputCostInCents (selectedModel: IImageModel, usage: OpenAIImageUsage): number { + if ( ! this.#isGptImageModel(selectedModel.id) ) { + return 0; + } + + const textInputRate = this.#getCostRate(selectedModel, 'text_input'); + const textCachedInputRate = this.#getCostRate(selectedModel, 'text_cached_input') ?? textInputRate; + const imageInputRate = this.#getCostRate(selectedModel, 'image_input'); + const imageCachedInputRate = this.#getCostRate(selectedModel, 'image_cached_input') ?? imageInputRate; + + if ( textInputRate === undefined && imageInputRate === undefined ) { + return 0; + } + + const totalInputTokens = Math.max(usage.inputTokens, usage.inputTextTokens + usage.inputImageTokens); + let textTokens = usage.inputTextTokens; + let imageTokens = usage.inputImageTokens; + + // Current image generate calls are usually text-only prompts. + if ( textTokens + imageTokens === 0 && totalInputTokens > 0 ) { + textTokens = totalInputTokens; + } + + const knownInputTokens = textTokens + imageTokens; + let cachedInputTokens = Math.min(usage.cachedInputTokens, knownInputTokens || totalInputTokens); + + let cachedTextTokens = Math.min(usage.cachedInputTextTokens, textTokens); + let cachedImageTokens = Math.min(usage.cachedInputImageTokens, imageTokens); + + let cachedRemaining = Math.max(0, cachedInputTokens - (cachedTextTokens + cachedImageTokens)); + if ( cachedRemaining > 0 ) { + const availableText = Math.max(textTokens - cachedTextTokens, 0); + const availableImage = Math.max(imageTokens - cachedImageTokens, 0); + const availableTotal = availableText + availableImage; + + if ( availableTotal > 0 ) { + const proportionalText = Math.min(availableText, Math.round((availableText / availableTotal) * cachedRemaining)); + cachedTextTokens += proportionalText; + cachedRemaining -= proportionalText; + + const proportionalImage = Math.min(availableImage, cachedRemaining); + cachedImageTokens += proportionalImage; + cachedRemaining -= proportionalImage; + } + + if ( cachedRemaining > 0 && textTokens > cachedTextTokens ) { + const extraText = Math.min(textTokens - cachedTextTokens, cachedRemaining); + cachedTextTokens += extraText; + cachedRemaining -= extraText; + } + + if ( cachedRemaining > 0 && imageTokens > cachedImageTokens ) { + const extraImage = Math.min(imageTokens - cachedImageTokens, cachedRemaining); + cachedImageTokens += extraImage; + cachedRemaining -= extraImage; + } + } + + const uncachedTextTokens = Math.max(textTokens - cachedTextTokens, 0); + const uncachedImageTokens = Math.max(imageTokens - cachedImageTokens, 0); + + return this.#costForTokens(uncachedTextTokens, textInputRate) + + this.#costForTokens(cachedTextTokens, textCachedInputRate) + + this.#costForTokens(uncachedImageTokens, imageInputRate) + + this.#costForTokens(cachedImageTokens, imageCachedInputRate); + } + + #calculateOutputCostInCents (selectedModel: IImageModel, usage: OpenAIImageUsage, fallbackPriceInCents: number): number { + if ( ! this.#isGptImageModel(selectedModel.id) ) { + return fallbackPriceInCents; + } + + if ( usage.outputTokens <= 0 ) { + return fallbackPriceInCents; + } + + const imageOutputRate = this.#getCostRate(selectedModel, 'image_output'); + if ( imageOutputRate !== undefined ) { + return this.#costForTokens(usage.outputTokens, imageOutputRate); + } + + const textOutputRate = this.#getCostRate(selectedModel, 'text_output'); + if ( textOutputRate !== undefined ) { + return this.#costForTokens(usage.outputTokens, textOutputRate); + } + + return fallbackPriceInCents; + } + + #setResponseCostMetadata ({ + model, + quality, + ratio, + inputCostInCents, + outputCostInCents, + usage, + inputUsageSource, + outputUsageSource, + outputPriceInCents, + }: { + model: string; + quality?: string; + ratio: { w: number; h: number }; + inputCostInCents: number; + outputCostInCents: number; + usage: OpenAIImageUsage; + inputUsageSource: 'token-usage' | 'prompt-estimate'; + outputUsageSource: 'token-usage' | 'per-image-fallback'; + outputPriceInCents: number; + }) { + const clientDriverCall = Context.get('client_driver_call') as { response_metadata?: Record } | undefined; + const responseMetadata = clientDriverCall?.response_metadata; + if ( ! responseMetadata ) return; + + const totalCostInCents = inputCostInCents + outputCostInCents; + responseMetadata.cost = { + currency: 'usd-cents', + input: inputCostInCents, + output: outputCostInCents, + total: totalCostInCents, + }; + responseMetadata.cost_components = { + provider: 'openai-image-generation', + model, + quality, + ratio: `${ratio.w}x${ratio.h}`, + input_usage_source: inputUsageSource, + output_usage_source: outputUsageSource, + output_image_price_cents: outputPriceInCents, + input_tokens: usage.inputTokens, + output_tokens: usage.outputTokens, + input_text_tokens: usage.inputTextTokens, + input_image_tokens: usage.inputImageTokens, + cached_input_tokens: usage.cachedInputTokens, + cached_input_text_tokens: usage.cachedInputTextTokens, + cached_input_image_tokens: usage.cachedInputImageTokens, + input_microcents: this.#toMicroCents(inputCostInCents), + output_microcents: this.#toMicroCents(outputCostInCents), + total_microcents: this.#toMicroCents(totalCostInCents), + }; + } + + #estimatePromptTokenCount (prompt: string): number { + const text = prompt.trim(); + if ( text.length === 0 ) return 0; + + // Same approximation used by chat and Gemini image billing flows. + return Math.max(1, Math.floor(((text.length / 4) + (text.split(/\s+/).length * (4 / 3))) / 2)); + } + + #getCostRate (selectedModel: IImageModel, key: string): number | undefined { + const value = selectedModel.costs[key]; + if ( ! Number.isFinite(value) ) { + return undefined; + } + return value; + } + + #costForTokens (tokenCount: number, centsPerMillion?: number): number { + if ( !Number.isFinite(tokenCount) || tokenCount <= 0 ) return 0; + if ( !Number.isFinite(centsPerMillion) || (centsPerMillion ?? 0) <= 0 ) return 0; + return (tokenCount / 1_000_000) * (centsPerMillion as number); + } + + #toMicroCents (cents: number): number { + if ( !Number.isFinite(cents) || cents <= 0 ) return 1; + return Math.ceil(cents * 1_000_000); + } + + #toSafeCount (value: unknown): number { + if ( typeof value !== 'number' || !Number.isFinite(value) || value < 0 ) return 0; + return Math.floor(value); + } + #isGptImageModel (model: string) { // Covers gpt-image-1, gpt-image-1-mini, gpt-image-1.5 and future variants. return model.startsWith('gpt-image-1'); diff --git a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts index 1941d08c6..3b86dd05a 100644 --- a/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts +++ b/src/backend/src/services/ai/image/providers/OpenAiImageGenerationProvider/models.ts @@ -10,15 +10,24 @@ export const OPEN_AI_IMAGE_GENERATION_MODELS: IImageModel[] = [ costs_currency: 'usd-cents', index_cost_key: 'low:1024x1024', costs: { + // Text tokens (per 1M tokens) + text_input: 500, // $5.00 + text_cached_input: 125, // $1.25 + text_output: 1000, // $10.00 + // Image tokens (per 1M tokens) + image_input: 800, // $8.00 + image_cached_input: 200, // $2.00 + image_output: 3200, // $32.00 + // Image generation (per image) 'low:1024x1024': 0.9, 'low:1024x1536': 1.3, 'low:1536x1024': 1.3, 'medium:1024x1024': 3.4, - 'medium:1024x1536': 5.1, + 'medium:1024x1536': 5, 'medium:1536x1024': 5, 'high:1024x1024': 13.3, 'high:1024x1536': 20, - 'high:1536x1024': 19.9, + 'high:1536x1024': 20, }, allowedQualityLevels: ['low', 'medium', 'high'], allowedRatios: [{ w: 1024, h: 1024 }, { w: 1024, h: 1536 }, { w: 1536, h: 1024 }], @@ -32,6 +41,14 @@ export const OPEN_AI_IMAGE_GENERATION_MODELS: IImageModel[] = [ costs_currency: 'usd-cents', index_cost_key: 'low:1024x1024', costs: { + // Text tokens (per 1M tokens) + text_input: 200, // $2.00 + text_cached_input: 20, // $0.20 + // Image tokens (per 1M tokens) + image_input: 250, // $2.50 + image_cached_input: 25, // $0.25 + image_output: 800, // $8.00 + // Image generation (per image) 'low:1024x1024': 0.5, 'low:1024x1536': 0.6, 'low:1536x1024': 0.6, @@ -54,6 +71,14 @@ export const OPEN_AI_IMAGE_GENERATION_MODELS: IImageModel[] = [ costs_currency: 'usd-cents', index_cost_key: 'low:1024x1024', costs: { + // Text tokens (per 1M tokens) + text_input: 500, // $5.00 + text_cached_input: 125, // $1.25 + // Image tokens (per 1M tokens) + image_input: 1000, // $10.00 + image_cached_input: 250, // $2.50 + image_output: 4000, // $40.00 + // Image generation (per image) 'low:1024x1024': 1.1, 'low:1024x1536': 1.6, 'low:1536x1024': 1.6, @@ -99,6 +124,6 @@ export const OPEN_AI_IMAGE_GENERATION_MODELS: IImageModel[] = [ '512x512': 1.8, '1024x1024': 2, }, - allowedRatios: [ { w: 256, h: 256 }, { w: 512, h: 512 }, { w: 1024, h: 1024 }], + allowedRatios: [{ w: 256, h: 256 }, { w: 512, h: 512 }, { w: 1024, h: 1024 }], }, ]; diff --git a/src/backend/src/services/ai/image/providers/types.ts b/src/backend/src/services/ai/image/providers/types.ts index 1d8b99dc5..2ed87ab87 100644 --- a/src/backend/src/services/ai/image/providers/types.ts +++ b/src/backend/src/services/ai/image/providers/types.ts @@ -8,6 +8,7 @@ export interface IImageModel { version?: string; costs_currency: string; index_cost_key?: string; + index_input_cost_key?: string; costs: Record; allowedQualityLevels?: string[]; allowedRatios?: { w: number, h: number }[]; diff --git a/src/backend/src/util/securehttp.js b/src/backend/src/util/securehttp.js index 49f90b9b5..15584c05d 100644 --- a/src/backend/src/util/securehttp.js +++ b/src/backend/src/util/securehttp.js @@ -176,8 +176,6 @@ async function secureAxiosRequest (axios, url, options = {}) { // Validate URL doesn't contain IP addresses validateUrlNoIP(url); - console.log(`[securehttp] Making secure request to ${url}`); - // Create secure agents const { httpAgent, httpsAgent } = createSecureAgents(); diff --git a/src/puter-js/package-lock.json b/src/puter-js/package-lock.json index 73f360b7a..7ca78286c 100644 --- a/src/puter-js/package-lock.json +++ b/src/puter-js/package-lock.json @@ -1,12 +1,12 @@ { "name": "puter", - "version": "2.2.5", + "version": "2.2.10", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "puter", - "version": "2.2.5", + "version": "2.2.10", "license": "Apache-2.0", "dependencies": { "@heyputer/kv.js": "^0.1.92", diff --git a/src/puter-js/package.json b/src/puter-js/package.json index 20fad8df6..496c61e2f 100644 --- a/src/puter-js/package.json +++ b/src/puter-js/package.json @@ -1,6 +1,6 @@ { "name": "@heyputer/puter.js", - "version": "2.2.5", + "version": "2.2.10", "description": "Puter.js - A JavaScript library for interacting with Puter services.", "homepage": "https://developer.puter.com", "main": "src/index.js",