From 8924b2ceabbe04879af91ca43e32a5ddcfecc403 Mon Sep 17 00:00:00 2001 From: jelveh Date: Sun, 26 Oct 2025 22:58:39 -0700 Subject: [PATCH] Update OpenAI package to version 6.7.0 and add video generation capabilities - Updated `openai` dependency in `package-lock.json` and `package.json`. - Introduced new permissions for video generation in `hardcoded-permissions.js`. - Added video generation service in `AIInterfaceService.js` and registered it in `PuterAIModule.js`. - Implemented coercion for video streams in `CoercionService.js`. - Updated driver service to include video generation. - Added `txt2video` method and options in `AI.js` and corresponding TypeScript definitions in `index.d.ts`. --- package-lock.json | 34 +-- src/backend/package.json | 2 +- src/backend/src/data/hardcoded-permissions.js | 2 + .../src/modules/puterai/AIInterfaceService.js | 38 +++ .../puterai/OpenAIVideoGenerationService.js | 288 ++++++++++++++++++ .../src/modules/puterai/PuterAIModule.js | 3 + .../MeteringService/costMaps/index.ts | 4 +- .../costMaps/openaiVideoCostMap.ts | 8 + .../src/services/drivers/CoercionService.js | 31 ++ .../src/services/drivers/DriverService.js | 1 + src/puter-js/index.d.ts | 13 +- src/puter-js/src/modules/AI.js | 72 +++++ 12 files changed, 464 insertions(+), 32 deletions(-) create mode 100644 src/backend/src/modules/puterai/OpenAIVideoGenerationService.js create mode 100644 src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts diff --git a/package-lock.json b/package-lock.json index d258d3f6e..9479d2d4e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14725,25 +14725,16 @@ } }, "node_modules/openai": { - "version": "4.104.0", - "resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz", - "integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==", + "version": "6.7.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-6.7.0.tgz", + "integrity": "sha512-mgSQXa3O/UXTbA8qFzoa7aydbXBJR5dbLQXCRapAOtoNT+v69sLdKMZzgiakpqhclRnhPggPAXoniVGn2kMY2A==", "license": "Apache-2.0", - "dependencies": { - "@types/node": "^18.11.18", - "@types/node-fetch": "^2.6.4", - "abort-controller": "^3.0.0", - "agentkeepalive": "^4.2.1", - "form-data-encoder": "1.7.2", - "formdata-node": "^4.3.2", - "node-fetch": "^2.6.7" - }, "bin": { "openai": "bin/cli" }, "peerDependencies": { "ws": "^8.18.0", - "zod": "^3.23.8" + "zod": "^3.25 || ^4.0" }, "peerDependenciesMeta": { "ws": { @@ -14754,21 +14745,6 @@ } } }, - "node_modules/openai/node_modules/@types/node": { - "version": "18.19.130", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz", - "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==", - "license": "MIT", - "dependencies": { - "undici-types": "~5.26.4" - } - }, - "node_modules/openai/node_modules/undici-types": { - "version": "5.26.5", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", - "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", - "license": "MIT" - }, "node_modules/opener": { "version": "1.5.2", "resolved": "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz", @@ -19178,7 +19154,7 @@ "murmurhash": "^2.0.1", "nodemailer": "^6.9.3", "on-finished": "^2.4.1", - "openai": "^4.73.1", + "openai": "^6.7.0", "otpauth": "9.2.4", "prompt-sync": "^4.2.0", "proxyquire": "^2.1.3", diff --git a/src/backend/package.json b/src/backend/package.json index 89c6c3bfc..f9670853a 100644 --- a/src/backend/package.json +++ b/src/backend/package.json @@ -63,7 +63,7 @@ "murmurhash": "^2.0.1", "nodemailer": "^6.9.3", "on-finished": "^2.4.1", - "openai": "^4.73.1", + "openai": "^6.7.0", "otpauth": "9.2.4", "prompt-sync": "^4.2.0", "proxyquire": "^2.1.3", diff --git a/src/backend/src/data/hardcoded-permissions.js b/src/backend/src/data/hardcoded-permissions.js index d75685c31..171800911 100644 --- a/src/backend/src/data/hardcoded-permissions.js +++ b/src/backend/src/data/hardcoded-permissions.js @@ -23,6 +23,7 @@ const default_implicit_user_app_permissions = { 'driver:puter-ocr:recognize': {}, 'driver:puter-chat-completion': {}, 'driver:puter-image-generation': {}, + 'driver:puter-video-generation': {}, 'driver:puter-tts': {}, 'driver:puter-apps': {}, 'driver:puter-subdomains': {}, @@ -58,6 +59,7 @@ const implicit_user_app_permissions = [ 'driver:puter-kvstore:flush': {}, 'driver:puter-chat-completion:complete': {}, 'driver:puter-image-generation:generate': {}, + 'driver:puter-video-generation:generate': {}, 'driver:puter-analytics:create_trace': {}, 'driver:puter-analytics:record': {}, }, diff --git a/src/backend/src/modules/puterai/AIInterfaceService.js b/src/backend/src/modules/puterai/AIInterfaceService.js index 521c53458..b7c73b291 100644 --- a/src/backend/src/modules/puterai/AIInterfaceService.js +++ b/src/backend/src/modules/puterai/AIInterfaceService.js @@ -125,6 +125,44 @@ class AIInterfaceService extends BaseService { } }); + col_interfaces.set('puter-video-generation', { + description: 'AI Video Generation.', + methods: { + generate: { + description: 'Generate a video from a prompt.', + parameters: { + prompt: { type: 'string' }, + model: { type: 'string', optional: true }, + seconds: { type: 'number', optional: true }, + duration: { type: 'number', optional: true }, + size: { type: 'string', optional: true }, + resolution: { type: 'string', optional: true }, + input_reference: { type: 'file', optional: true }, + }, + result_choices: [ + { + names: ['url'], + type: { + $: 'string:url:web', + content_type: 'video', + } + }, + { + names: ['video'], + type: { + $: 'stream', + content_type: 'video', + } + }, + ], + result: { + description: 'Video asset descriptor or URL for the generated video.', + type: 'json' + } + } + } + }); + col_interfaces.set('puter-tts', { description: 'Text-to-speech.', methods: { diff --git a/src/backend/src/modules/puterai/OpenAIVideoGenerationService.js b/src/backend/src/modules/puterai/OpenAIVideoGenerationService.js new file mode 100644 index 000000000..7322603d5 --- /dev/null +++ b/src/backend/src/modules/puterai/OpenAIVideoGenerationService.js @@ -0,0 +1,288 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +// METADATA // {"ai-commented":{"service":"claude"}} +const APIError = require('../../api/APIError'); +const BaseService = require('../../services/BaseService'); +const { TypedValue } = require('../../services/drivers/meta/Runtime'); +const { Context } = require('../../util/context'); +const { Readable } = require('stream'); + +const DEFAULT_TEST_VIDEO_URL = 'https://puter-sample-data.puter.site/video_example.mp4'; +const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes +const POLL_INTERVAL_MS = 5_000; +const DEFAULT_DURATION_SECONDS = 4; +const DEFAULT_SIZE = '720x1280'; +const ALLOWED_SIZES = new Set(['720x1280', '1280x720', '1024x1792', '1792x1024']); +const ALLOWED_SECONDS = new Set(['4', '8', '12']); + +class OpenAIVideoGenerationService extends BaseService { + /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */ + get meteringService(){ + return this.services.get('meteringService').meteringService; + } + + static MODULES = { + openai: require('openai'), + }; + + _construct() { + this.models_ = { + 'sora-2': { + defaultUsageKey: 'openai:sora-2:default', + }, + 'sora-2-pro': { + defaultUsageKey: 'openai:sora-2-pro:default', + }, + }; + } + + async _init() { + let apiKey = + this.config?.services?.openai?.apiKey ?? + this.global_config?.services?.openai?.apiKey; + + if ( !apiKey ) { + apiKey = + this.config?.openai?.secret_key ?? + this.global_config.openai?.secret_key; + + console.warn('The `openai.secret_key` configuration format is deprecated. ' + + 'Please use `services.openai.apiKey` instead.'); + } + + this.openai = new this.modules.openai.OpenAI({ + apiKey, + }); + } + + static IMPLEMENTS = { + ['driver-capabilities']: { + supports_test_mode(iface, method_name) { + return iface === 'puter-video-generation' && + method_name === 'generate'; + }, + }, + ['puter-video-generation']: { + async generate(params) { + return await this.generateVideo(params); + }, + }, + }; + + async generateVideo(params) { + const { + prompt, + model: requestedModel, + duration, + seconds, + size, + resolution, + input_reference: inputReference, + test_mode: testMode, + } = params ?? {}; + + if ( typeof prompt !== 'string' || !prompt.trim() ) { + throw APIError.create('field_invalid', null, { + key: 'prompt', + expected: 'a non-empty string', + got: prompt, + }); + } + + const model = requestedModel ?? 'sora-2'; + const modelConfig = this.models_[model]; + if ( !modelConfig ) { + throw APIError.create('field_invalid', null, { + key: 'model', + expected: 'one of: ' + Object.keys(this.models_).join(', '), + got: model, + }); + } + + if ( testMode ) { + return new TypedValue({ + $: 'string:url:web', + content_type: 'video', + }, DEFAULT_TEST_VIDEO_URL); + } + + const normalizedSize = this.#normalizeSize(size ?? resolution) ?? DEFAULT_SIZE; + const normalizedSeconds = this.#normalizeSeconds(seconds ?? duration) ?? '4'; + + const usageKey = this.#determineUsageKey(model, normalizedSize); + if ( !usageKey ) { + throw new Error(`Unsupported pricing tier for model ${model}`); + } + + const estimatedUnits = this.#parseSeconds(normalizedSeconds) ?? DEFAULT_DURATION_SECONDS; + const actor = Context.get('actor'); + const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, estimatedUnits); + if ( !usageAllowed ) { + throw APIError.create('insufficient_funds'); + } + + const createParams = { + model, + prompt, + seconds: normalizedSeconds, + size: normalizedSize, + }; + + if ( inputReference ) { + createParams.input_reference = inputReference; + } + + const createResponse = await this.openai.videos.create(createParams); + const finalJob = await this.#pollUntilComplete(createResponse); + + if ( finalJob.status === 'failed' ) { + const errorMessage = finalJob.error?.message ?? 'Video generation failed'; + throw new Error(errorMessage); + } + + const finalResolution = this.#normalizeSize(finalJob.size) ?? normalizedSize; + const finalUsageKey = this.#determineUsageKey(model, finalResolution); + if ( !finalUsageKey ) { + throw new Error(`Unsupported pricing tier for model ${model}`); + } + + const actualSeconds = this.#parseSeconds(finalJob.seconds) ?? estimatedUnits; + + const downloadResponse = await this.openai.videos.downloadContent(finalJob.id); + const contentType = downloadResponse.headers.get('content-type') ?? 'video/mp4'; + + let stream = downloadResponse.body; + if ( stream && typeof stream.getReader === 'function' ) { + stream = Readable.fromWeb(stream); + } + + if ( !stream ) { + const arrayBuffer = await downloadResponse.arrayBuffer(); + stream = Readable.from(Buffer.from(arrayBuffer)); + } + + this.meteringService.incrementUsage(actor, finalUsageKey, actualSeconds); + + return new TypedValue({ + $: 'stream', + content_type: contentType, + }, stream); + } + + async #pollUntilComplete(initialJob) { + let job = initialJob; + const start = Date.now(); + + while ( job.status === 'queued' || job.status === 'in_progress' ) { + if ( Date.now() - start > DEFAULT_TIMEOUT_MS ) { + throw new Error('Timed out waiting for Sora video generation to complete'); + } + + await this.#delay(POLL_INTERVAL_MS); + job = await this.openai.videos.retrieve(job.id); + } + + return job; + } + + async #delay(ms) { + return await new Promise(resolve => setTimeout(resolve, ms)); + } + + #normalizeSize(candidate) { + if ( !candidate ) return undefined; + const normalized = this.#normalizeResolution(candidate); + if ( normalized && ALLOWED_SIZES.has(normalized) ) { + return normalized; + } + return undefined; + } + + #normalizeSeconds(value) { + if ( value === null || value === undefined ) { + return undefined; + } + + if ( typeof value === 'number' && Number.isFinite(value) ) { + const rounded = String(Math.round(value)); + return ALLOWED_SECONDS.has(rounded) ? rounded : undefined; + } + + if ( typeof value === 'string' ) { + const trimmed = value.trim(); + if ( ALLOWED_SECONDS.has(trimmed) ) { + return trimmed; + } + const numeric = Number.parseInt(trimmed, 10); + if ( Number.isFinite(numeric) ) { + const normalized = String(numeric); + return ALLOWED_SECONDS.has(normalized) ? normalized : undefined; + } + } + + return undefined; + } + + #determineUsageKey(model, normalizedSize) { + const config = this.models_[model]; + if ( !config ) return null; + + if ( model === 'sora-2-pro' && normalizedSize === '1792x1024' ) { + return 'openai:sora-2-pro:xl'; + } + + return config.defaultUsageKey; + } + + #normalizeResolution(value) { + if ( !value ) return undefined; + if ( typeof value === 'string' ) { + const match = value.match(/(\\d+)\\s*x\\s*(\\d+)/i); + if ( match ) { + const width = Number.parseInt(match[1], 10); + const height = Number.parseInt(match[2], 10); + if ( Number.isFinite(width) && Number.isFinite(height) ) { + const larger = Math.max(width, height); + const smaller = Math.min(width, height); + return `${larger}x${smaller}`; + } + } + } + return undefined; + } + + #parseSeconds(value) { + if ( value === null || value === undefined ) return undefined; + if ( typeof value === 'number' && Number.isFinite(value) ) { + return value; + } + if ( typeof value === 'string' ) { + const numeric = Number.parseInt(value, 10); + if ( Number.isFinite(numeric) ) { + return numeric; + } + } + return undefined; + } +} + +module.exports = { + OpenAIVideoGenerationService, +}; diff --git a/src/backend/src/modules/puterai/PuterAIModule.js b/src/backend/src/modules/puterai/PuterAIModule.js index a98c332b6..af5a96ff6 100644 --- a/src/backend/src/modules/puterai/PuterAIModule.js +++ b/src/backend/src/modules/puterai/PuterAIModule.js @@ -61,6 +61,9 @@ class PuterAIModule extends AdvancedBase { const { OpenAIImageGenerationService } = require('./OpenAIImageGenerationService'); services.registerService('openai-image-generation', OpenAIImageGenerationService); + + const { OpenAIVideoGenerationService } = require('./OpenAIVideoGenerationService'); + services.registerService('openai-video-generation', OpenAIVideoGenerationService); } if ( config?.services?.claude ) { diff --git a/src/backend/src/services/MeteringService/costMaps/index.ts b/src/backend/src/services/MeteringService/costMaps/index.ts index c7ec1653d..9a4faca56 100644 --- a/src/backend/src/services/MeteringService/costMaps/index.ts +++ b/src/backend/src/services/MeteringService/costMaps/index.ts @@ -10,6 +10,7 @@ import { MISTRAL_COST_MAP } from './mistralCostMap'; import { OPENAI_COST_MAP } from './openAiCostMap'; import { OPENAI_IMAGE_COST_MAP } from './openaiImageCostMap'; import { OPENROUTER_COST_MAP } from './openrouterCostMap'; +import { OPENAI_VIDEO_COST_MAP } from './openaiVideoCostMap'; import { TOGETHER_COST_MAP } from './togetherCostMap'; import { XAI_COST_MAP } from './xaiCostMap'; @@ -24,8 +25,9 @@ export const COST_MAPS = { ...MISTRAL_COST_MAP, ...OPENAI_COST_MAP, ...OPENAI_IMAGE_COST_MAP, + ...OPENAI_VIDEO_COST_MAP, ...OPENROUTER_COST_MAP, ...TOGETHER_COST_MAP, ...XAI_COST_MAP, ...FILE_SYSTEM_COST_MAP, -}; \ No newline at end of file +}; diff --git a/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts b/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts new file mode 100644 index 000000000..d4fda85ab --- /dev/null +++ b/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts @@ -0,0 +1,8 @@ +import { toMicroCents } from '../utils'; + +// Prices are per generated video-second. +export const OPENAI_VIDEO_COST_MAP = { + 'openai:sora-2:default': toMicroCents(0.10), + 'openai:sora-2-pro:default': toMicroCents(0.30), + 'openai:sora-2-pro:xl': toMicroCents(0.50), +}; diff --git a/src/backend/src/services/drivers/CoercionService.js b/src/backend/src/services/drivers/CoercionService.js index a3a3a13b7..ff07f4f4b 100644 --- a/src/backend/src/services/drivers/CoercionService.js +++ b/src/backend/src/services/drivers/CoercionService.js @@ -91,6 +91,37 @@ class CoercionService extends BaseService { } }); + this.coercions_.push({ + produces: { + $: 'stream', + content_type: 'video' + }, + consumes: { + $: 'string:url:web', + content_type: 'video' + }, + coerce: async typed_value => { + const response = await(async () => { + try { + return await CoercionService.MODULES.axios.get(typed_value.value, { + responseType: 'stream', + }); + } catch (e) { + APIError.create('field_invalid', null, { + key: 'url', + expected: 'web URL', + got: 'error during request: ' + e.message, + }); + } + })(); + + return new TypedValue({ + $: 'stream', + content_type: response.headers['content-type'] ?? 'video/mp4', + }, response.data); + } + }); + // Add coercion for data URLs to streams this.coercions_.push({ produces: { diff --git a/src/backend/src/services/drivers/DriverService.js b/src/backend/src/services/drivers/DriverService.js index 013058739..1edc9b922 100644 --- a/src/backend/src/services/drivers/DriverService.js +++ b/src/backend/src/services/drivers/DriverService.js @@ -291,6 +291,7 @@ class DriverService extends BaseService { ['puter-tts']: 'aws-polly', ['puter-chat-completion']: 'openai-completion', ['puter-image-generation']: 'openai-image-generation', + ['puter-video-generation']: 'openai-video-generation', 'puter-exec': 'judge0', 'convert-files': 'convert-api', 'puter-send-mail': 'user-send-mail', diff --git a/src/puter-js/index.d.ts b/src/puter-js/index.d.ts index 32fd9d456..a4742469d 100644 --- a/src/puter-js/index.d.ts +++ b/src/puter-js/index.d.ts @@ -51,6 +51,9 @@ interface AI { txt2img(prompt: string, testMode?: boolean): Promise; txt2img(prompt: string, options?: Txt2ImgOptions): Promise; + txt2video(prompt: string, testMode?: boolean): Promise; + txt2video(prompt: string, options?: Txt2VideoOptions): Promise; + txt2speech(text: string): Promise; txt2speech(text: string, options?: Txt2SpeechOptions): Promise; txt2speech(text: string, language?: string): Promise; @@ -114,6 +117,15 @@ interface Txt2ImgOptions { input_image_mime_type?: string; } +interface Txt2VideoOptions { + prompt?: string; + model?: 'sora-2' | 'sora-2-pro'; + duration?: 4 | 8 | 12; + seconds?: 4 | 8 | 12; + size?: '720x1280' | '1280x720' | '1024x1792' | '1792x1024'; + resolution?: '720x1280' | '1280x720' | '1024x1792' | '1792x1024'; +} + interface Txt2SpeechOptions { language?: string; voice?: string; @@ -512,4 +524,3 @@ export { WorkerExecOptions, WorkerInfo, Workers, WriteOptions }; - diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js index e1051e340..2afe40108 100644 --- a/src/puter-js/src/modules/AI.js +++ b/src/puter-js/src/modules/AI.js @@ -675,6 +675,78 @@ class AI{ } }).call(this, options); } + + txt2video = async (...args) => { + let options = {}; + let testMode = false; + + if(!args){ + throw({message: 'Arguments are required', code: 'arguments_required'}); + } + + if (typeof args[0] === 'string') { + options = { prompt: args[0] }; + } + + if (typeof args[1] === 'boolean' && args[1] === true) { + testMode = true; + } + + if (typeof args[0] === 'string' && typeof args[1] === "object") { + options = args[1]; + options.prompt = args[0]; + } + + if (typeof args[0] === 'object') { + options = args[0]; + } + + if (!options.prompt) { + throw({message: 'Prompt parameter is required', code: 'prompt_required'}); + } + + if (!options.model) { + options.model = 'sora-2'; + } + + if (options.duration !== undefined && options.seconds === undefined) { + options.seconds = options.duration; + } + + return await utils.make_driver_method(['prompt'], 'puter-video-generation', 'openai-video-generation', 'generate', { + responseType: 'blob', + test_mode: testMode ?? false, + transform: async result => { + let sourceUrl = null; + let mimeType = null; + if (result instanceof Blob) { + sourceUrl = await utils.blob_to_url(result); + mimeType = result.type || 'video/mp4'; + } else if (typeof result === 'string') { + sourceUrl = result; + } else if (result && typeof result === 'object') { + sourceUrl = result.asset_url || result.url || result.href || null; + mimeType = result.mime_type || result.content_type || null; + } + + if (!sourceUrl) { + return result; + } + + const video = document.createElement('video'); + video.src = sourceUrl; + video.controls = true; + video.preload = 'metadata'; + if (mimeType) { + video.setAttribute('data-mime-type', mimeType); + } + video.setAttribute('data-source', sourceUrl); + video.toString = () => video.src; + video.valueOf = () => video.src; + return video; + } + }).call(this, options); + } } export default AI;