diff --git a/package-lock.json b/package-lock.json
index d258d3f6e..9479d2d4e 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -14725,25 +14725,16 @@
}
},
"node_modules/openai": {
- "version": "4.104.0",
- "resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz",
- "integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==",
+ "version": "6.7.0",
+ "resolved": "https://registry.npmjs.org/openai/-/openai-6.7.0.tgz",
+ "integrity": "sha512-mgSQXa3O/UXTbA8qFzoa7aydbXBJR5dbLQXCRapAOtoNT+v69sLdKMZzgiakpqhclRnhPggPAXoniVGn2kMY2A==",
"license": "Apache-2.0",
- "dependencies": {
- "@types/node": "^18.11.18",
- "@types/node-fetch": "^2.6.4",
- "abort-controller": "^3.0.0",
- "agentkeepalive": "^4.2.1",
- "form-data-encoder": "1.7.2",
- "formdata-node": "^4.3.2",
- "node-fetch": "^2.6.7"
- },
"bin": {
"openai": "bin/cli"
},
"peerDependencies": {
"ws": "^8.18.0",
- "zod": "^3.23.8"
+ "zod": "^3.25 || ^4.0"
},
"peerDependenciesMeta": {
"ws": {
@@ -14754,21 +14745,6 @@
}
}
},
- "node_modules/openai/node_modules/@types/node": {
- "version": "18.19.130",
- "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz",
- "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==",
- "license": "MIT",
- "dependencies": {
- "undici-types": "~5.26.4"
- }
- },
- "node_modules/openai/node_modules/undici-types": {
- "version": "5.26.5",
- "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
- "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
- "license": "MIT"
- },
"node_modules/opener": {
"version": "1.5.2",
"resolved": "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz",
@@ -19178,7 +19154,7 @@
"murmurhash": "^2.0.1",
"nodemailer": "^6.9.3",
"on-finished": "^2.4.1",
- "openai": "^4.73.1",
+ "openai": "^6.7.0",
"otpauth": "9.2.4",
"prompt-sync": "^4.2.0",
"proxyquire": "^2.1.3",
diff --git a/src/backend/package.json b/src/backend/package.json
index 89c6c3bfc..f9670853a 100644
--- a/src/backend/package.json
+++ b/src/backend/package.json
@@ -63,7 +63,7 @@
"murmurhash": "^2.0.1",
"nodemailer": "^6.9.3",
"on-finished": "^2.4.1",
- "openai": "^4.73.1",
+ "openai": "^6.7.0",
"otpauth": "9.2.4",
"prompt-sync": "^4.2.0",
"proxyquire": "^2.1.3",
diff --git a/src/backend/src/data/hardcoded-permissions.js b/src/backend/src/data/hardcoded-permissions.js
index d75685c31..171800911 100644
--- a/src/backend/src/data/hardcoded-permissions.js
+++ b/src/backend/src/data/hardcoded-permissions.js
@@ -23,6 +23,7 @@ const default_implicit_user_app_permissions = {
'driver:puter-ocr:recognize': {},
'driver:puter-chat-completion': {},
'driver:puter-image-generation': {},
+ 'driver:puter-video-generation': {},
'driver:puter-tts': {},
'driver:puter-apps': {},
'driver:puter-subdomains': {},
@@ -58,6 +59,7 @@ const implicit_user_app_permissions = [
'driver:puter-kvstore:flush': {},
'driver:puter-chat-completion:complete': {},
'driver:puter-image-generation:generate': {},
+ 'driver:puter-video-generation:generate': {},
'driver:puter-analytics:create_trace': {},
'driver:puter-analytics:record': {},
},
diff --git a/src/backend/src/modules/puterai/AIInterfaceService.js b/src/backend/src/modules/puterai/AIInterfaceService.js
index 521c53458..b7c73b291 100644
--- a/src/backend/src/modules/puterai/AIInterfaceService.js
+++ b/src/backend/src/modules/puterai/AIInterfaceService.js
@@ -125,6 +125,44 @@ class AIInterfaceService extends BaseService {
}
});
+ col_interfaces.set('puter-video-generation', {
+ description: 'AI Video Generation.',
+ methods: {
+ generate: {
+ description: 'Generate a video from a prompt.',
+ parameters: {
+ prompt: { type: 'string' },
+ model: { type: 'string', optional: true },
+ seconds: { type: 'number', optional: true },
+ duration: { type: 'number', optional: true },
+ size: { type: 'string', optional: true },
+ resolution: { type: 'string', optional: true },
+ input_reference: { type: 'file', optional: true },
+ },
+ result_choices: [
+ {
+ names: ['url'],
+ type: {
+ $: 'string:url:web',
+ content_type: 'video',
+ }
+ },
+ {
+ names: ['video'],
+ type: {
+ $: 'stream',
+ content_type: 'video',
+ }
+ },
+ ],
+ result: {
+ description: 'Video asset descriptor or URL for the generated video.',
+ type: 'json'
+ }
+ }
+ }
+ });
+
col_interfaces.set('puter-tts', {
description: 'Text-to-speech.',
methods: {
diff --git a/src/backend/src/modules/puterai/OpenAIVideoGenerationService.js b/src/backend/src/modules/puterai/OpenAIVideoGenerationService.js
new file mode 100644
index 000000000..7322603d5
--- /dev/null
+++ b/src/backend/src/modules/puterai/OpenAIVideoGenerationService.js
@@ -0,0 +1,288 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+// METADATA // {"ai-commented":{"service":"claude"}}
+const APIError = require('../../api/APIError');
+const BaseService = require('../../services/BaseService');
+const { TypedValue } = require('../../services/drivers/meta/Runtime');
+const { Context } = require('../../util/context');
+const { Readable } = require('stream');
+
+const DEFAULT_TEST_VIDEO_URL = 'https://puter-sample-data.puter.site/video_example.mp4';
+const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
+const POLL_INTERVAL_MS = 5_000;
+const DEFAULT_DURATION_SECONDS = 4;
+const DEFAULT_SIZE = '720x1280';
+const ALLOWED_SIZES = new Set(['720x1280', '1280x720', '1024x1792', '1792x1024']);
+const ALLOWED_SECONDS = new Set(['4', '8', '12']);
+
+class OpenAIVideoGenerationService extends BaseService {
+ /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
+ get meteringService(){
+ return this.services.get('meteringService').meteringService;
+ }
+
+ static MODULES = {
+ openai: require('openai'),
+ };
+
+ _construct() {
+ this.models_ = {
+ 'sora-2': {
+ defaultUsageKey: 'openai:sora-2:default',
+ },
+ 'sora-2-pro': {
+ defaultUsageKey: 'openai:sora-2-pro:default',
+ },
+ };
+ }
+
+ async _init() {
+ let apiKey =
+ this.config?.services?.openai?.apiKey ??
+ this.global_config?.services?.openai?.apiKey;
+
+ if ( !apiKey ) {
+ apiKey =
+ this.config?.openai?.secret_key ??
+ this.global_config.openai?.secret_key;
+
+ console.warn('The `openai.secret_key` configuration format is deprecated. ' +
+ 'Please use `services.openai.apiKey` instead.');
+ }
+
+ this.openai = new this.modules.openai.OpenAI({
+ apiKey,
+ });
+ }
+
+ static IMPLEMENTS = {
+ ['driver-capabilities']: {
+ supports_test_mode(iface, method_name) {
+ return iface === 'puter-video-generation' &&
+ method_name === 'generate';
+ },
+ },
+ ['puter-video-generation']: {
+ async generate(params) {
+ return await this.generateVideo(params);
+ },
+ },
+ };
+
+ async generateVideo(params) {
+ const {
+ prompt,
+ model: requestedModel,
+ duration,
+ seconds,
+ size,
+ resolution,
+ input_reference: inputReference,
+ test_mode: testMode,
+ } = params ?? {};
+
+ if ( typeof prompt !== 'string' || !prompt.trim() ) {
+ throw APIError.create('field_invalid', null, {
+ key: 'prompt',
+ expected: 'a non-empty string',
+ got: prompt,
+ });
+ }
+
+ const model = requestedModel ?? 'sora-2';
+ const modelConfig = this.models_[model];
+ if ( !modelConfig ) {
+ throw APIError.create('field_invalid', null, {
+ key: 'model',
+ expected: 'one of: ' + Object.keys(this.models_).join(', '),
+ got: model,
+ });
+ }
+
+ if ( testMode ) {
+ return new TypedValue({
+ $: 'string:url:web',
+ content_type: 'video',
+ }, DEFAULT_TEST_VIDEO_URL);
+ }
+
+ const normalizedSize = this.#normalizeSize(size ?? resolution) ?? DEFAULT_SIZE;
+ const normalizedSeconds = this.#normalizeSeconds(seconds ?? duration) ?? '4';
+
+ const usageKey = this.#determineUsageKey(model, normalizedSize);
+ if ( !usageKey ) {
+ throw new Error(`Unsupported pricing tier for model ${model}`);
+ }
+
+ const estimatedUnits = this.#parseSeconds(normalizedSeconds) ?? DEFAULT_DURATION_SECONDS;
+ const actor = Context.get('actor');
+ const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, estimatedUnits);
+ if ( !usageAllowed ) {
+ throw APIError.create('insufficient_funds');
+ }
+
+ const createParams = {
+ model,
+ prompt,
+ seconds: normalizedSeconds,
+ size: normalizedSize,
+ };
+
+ if ( inputReference ) {
+ createParams.input_reference = inputReference;
+ }
+
+ const createResponse = await this.openai.videos.create(createParams);
+ const finalJob = await this.#pollUntilComplete(createResponse);
+
+ if ( finalJob.status === 'failed' ) {
+ const errorMessage = finalJob.error?.message ?? 'Video generation failed';
+ throw new Error(errorMessage);
+ }
+
+ const finalResolution = this.#normalizeSize(finalJob.size) ?? normalizedSize;
+ const finalUsageKey = this.#determineUsageKey(model, finalResolution);
+ if ( !finalUsageKey ) {
+ throw new Error(`Unsupported pricing tier for model ${model}`);
+ }
+
+ const actualSeconds = this.#parseSeconds(finalJob.seconds) ?? estimatedUnits;
+
+ const downloadResponse = await this.openai.videos.downloadContent(finalJob.id);
+ const contentType = downloadResponse.headers.get('content-type') ?? 'video/mp4';
+
+ let stream = downloadResponse.body;
+ if ( stream && typeof stream.getReader === 'function' ) {
+ stream = Readable.fromWeb(stream);
+ }
+
+ if ( !stream ) {
+ const arrayBuffer = await downloadResponse.arrayBuffer();
+ stream = Readable.from(Buffer.from(arrayBuffer));
+ }
+
+ this.meteringService.incrementUsage(actor, finalUsageKey, actualSeconds);
+
+ return new TypedValue({
+ $: 'stream',
+ content_type: contentType,
+ }, stream);
+ }
+
+ async #pollUntilComplete(initialJob) {
+ let job = initialJob;
+ const start = Date.now();
+
+ while ( job.status === 'queued' || job.status === 'in_progress' ) {
+ if ( Date.now() - start > DEFAULT_TIMEOUT_MS ) {
+ throw new Error('Timed out waiting for Sora video generation to complete');
+ }
+
+ await this.#delay(POLL_INTERVAL_MS);
+ job = await this.openai.videos.retrieve(job.id);
+ }
+
+ return job;
+ }
+
+ async #delay(ms) {
+ return await new Promise(resolve => setTimeout(resolve, ms));
+ }
+
+ #normalizeSize(candidate) {
+ if ( !candidate ) return undefined;
+ const normalized = this.#normalizeResolution(candidate);
+ if ( normalized && ALLOWED_SIZES.has(normalized) ) {
+ return normalized;
+ }
+ return undefined;
+ }
+
+ #normalizeSeconds(value) {
+ if ( value === null || value === undefined ) {
+ return undefined;
+ }
+
+ if ( typeof value === 'number' && Number.isFinite(value) ) {
+ const rounded = String(Math.round(value));
+ return ALLOWED_SECONDS.has(rounded) ? rounded : undefined;
+ }
+
+ if ( typeof value === 'string' ) {
+ const trimmed = value.trim();
+ if ( ALLOWED_SECONDS.has(trimmed) ) {
+ return trimmed;
+ }
+ const numeric = Number.parseInt(trimmed, 10);
+ if ( Number.isFinite(numeric) ) {
+ const normalized = String(numeric);
+ return ALLOWED_SECONDS.has(normalized) ? normalized : undefined;
+ }
+ }
+
+ return undefined;
+ }
+
+ #determineUsageKey(model, normalizedSize) {
+ const config = this.models_[model];
+ if ( !config ) return null;
+
+ if ( model === 'sora-2-pro' && normalizedSize === '1792x1024' ) {
+ return 'openai:sora-2-pro:xl';
+ }
+
+ return config.defaultUsageKey;
+ }
+
+ #normalizeResolution(value) {
+ if ( !value ) return undefined;
+ if ( typeof value === 'string' ) {
+ const match = value.match(/(\\d+)\\s*x\\s*(\\d+)/i);
+ if ( match ) {
+ const width = Number.parseInt(match[1], 10);
+ const height = Number.parseInt(match[2], 10);
+ if ( Number.isFinite(width) && Number.isFinite(height) ) {
+ const larger = Math.max(width, height);
+ const smaller = Math.min(width, height);
+ return `${larger}x${smaller}`;
+ }
+ }
+ }
+ return undefined;
+ }
+
+ #parseSeconds(value) {
+ if ( value === null || value === undefined ) return undefined;
+ if ( typeof value === 'number' && Number.isFinite(value) ) {
+ return value;
+ }
+ if ( typeof value === 'string' ) {
+ const numeric = Number.parseInt(value, 10);
+ if ( Number.isFinite(numeric) ) {
+ return numeric;
+ }
+ }
+ return undefined;
+ }
+}
+
+module.exports = {
+ OpenAIVideoGenerationService,
+};
diff --git a/src/backend/src/modules/puterai/PuterAIModule.js b/src/backend/src/modules/puterai/PuterAIModule.js
index a98c332b6..af5a96ff6 100644
--- a/src/backend/src/modules/puterai/PuterAIModule.js
+++ b/src/backend/src/modules/puterai/PuterAIModule.js
@@ -61,6 +61,9 @@ class PuterAIModule extends AdvancedBase {
const { OpenAIImageGenerationService } = require('./OpenAIImageGenerationService');
services.registerService('openai-image-generation', OpenAIImageGenerationService);
+
+ const { OpenAIVideoGenerationService } = require('./OpenAIVideoGenerationService');
+ services.registerService('openai-video-generation', OpenAIVideoGenerationService);
}
if ( config?.services?.claude ) {
diff --git a/src/backend/src/services/MeteringService/costMaps/index.ts b/src/backend/src/services/MeteringService/costMaps/index.ts
index c7ec1653d..9a4faca56 100644
--- a/src/backend/src/services/MeteringService/costMaps/index.ts
+++ b/src/backend/src/services/MeteringService/costMaps/index.ts
@@ -10,6 +10,7 @@ import { MISTRAL_COST_MAP } from './mistralCostMap';
import { OPENAI_COST_MAP } from './openAiCostMap';
import { OPENAI_IMAGE_COST_MAP } from './openaiImageCostMap';
import { OPENROUTER_COST_MAP } from './openrouterCostMap';
+import { OPENAI_VIDEO_COST_MAP } from './openaiVideoCostMap';
import { TOGETHER_COST_MAP } from './togetherCostMap';
import { XAI_COST_MAP } from './xaiCostMap';
@@ -24,8 +25,9 @@ export const COST_MAPS = {
...MISTRAL_COST_MAP,
...OPENAI_COST_MAP,
...OPENAI_IMAGE_COST_MAP,
+ ...OPENAI_VIDEO_COST_MAP,
...OPENROUTER_COST_MAP,
...TOGETHER_COST_MAP,
...XAI_COST_MAP,
...FILE_SYSTEM_COST_MAP,
-};
\ No newline at end of file
+};
diff --git a/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts b/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts
new file mode 100644
index 000000000..d4fda85ab
--- /dev/null
+++ b/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts
@@ -0,0 +1,8 @@
+import { toMicroCents } from '../utils';
+
+// Prices are per generated video-second.
+export const OPENAI_VIDEO_COST_MAP = {
+ 'openai:sora-2:default': toMicroCents(0.10),
+ 'openai:sora-2-pro:default': toMicroCents(0.30),
+ 'openai:sora-2-pro:xl': toMicroCents(0.50),
+};
diff --git a/src/backend/src/services/drivers/CoercionService.js b/src/backend/src/services/drivers/CoercionService.js
index a3a3a13b7..ff07f4f4b 100644
--- a/src/backend/src/services/drivers/CoercionService.js
+++ b/src/backend/src/services/drivers/CoercionService.js
@@ -91,6 +91,37 @@ class CoercionService extends BaseService {
}
});
+ this.coercions_.push({
+ produces: {
+ $: 'stream',
+ content_type: 'video'
+ },
+ consumes: {
+ $: 'string:url:web',
+ content_type: 'video'
+ },
+ coerce: async typed_value => {
+ const response = await(async () => {
+ try {
+ return await CoercionService.MODULES.axios.get(typed_value.value, {
+ responseType: 'stream',
+ });
+ } catch (e) {
+ APIError.create('field_invalid', null, {
+ key: 'url',
+ expected: 'web URL',
+ got: 'error during request: ' + e.message,
+ });
+ }
+ })();
+
+ return new TypedValue({
+ $: 'stream',
+ content_type: response.headers['content-type'] ?? 'video/mp4',
+ }, response.data);
+ }
+ });
+
// Add coercion for data URLs to streams
this.coercions_.push({
produces: {
diff --git a/src/backend/src/services/drivers/DriverService.js b/src/backend/src/services/drivers/DriverService.js
index 013058739..1edc9b922 100644
--- a/src/backend/src/services/drivers/DriverService.js
+++ b/src/backend/src/services/drivers/DriverService.js
@@ -291,6 +291,7 @@ class DriverService extends BaseService {
['puter-tts']: 'aws-polly',
['puter-chat-completion']: 'openai-completion',
['puter-image-generation']: 'openai-image-generation',
+ ['puter-video-generation']: 'openai-video-generation',
'puter-exec': 'judge0',
'convert-files': 'convert-api',
'puter-send-mail': 'user-send-mail',
diff --git a/src/puter-js/index.d.ts b/src/puter-js/index.d.ts
index 32fd9d456..a4742469d 100644
--- a/src/puter-js/index.d.ts
+++ b/src/puter-js/index.d.ts
@@ -51,6 +51,9 @@ interface AI {
txt2img(prompt: string, testMode?: boolean): Promise;
txt2img(prompt: string, options?: Txt2ImgOptions): Promise;
+ txt2video(prompt: string, testMode?: boolean): Promise;
+ txt2video(prompt: string, options?: Txt2VideoOptions): Promise;
+
txt2speech(text: string): Promise;
txt2speech(text: string, options?: Txt2SpeechOptions): Promise;
txt2speech(text: string, language?: string): Promise;
@@ -114,6 +117,15 @@ interface Txt2ImgOptions {
input_image_mime_type?: string;
}
+interface Txt2VideoOptions {
+ prompt?: string;
+ model?: 'sora-2' | 'sora-2-pro';
+ duration?: 4 | 8 | 12;
+ seconds?: 4 | 8 | 12;
+ size?: '720x1280' | '1280x720' | '1024x1792' | '1792x1024';
+ resolution?: '720x1280' | '1280x720' | '1024x1792' | '1792x1024';
+}
+
interface Txt2SpeechOptions {
language?: string;
voice?: string;
@@ -512,4 +524,3 @@ export {
WorkerExecOptions,
WorkerInfo, Workers, WriteOptions
};
-
diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js
index e1051e340..2afe40108 100644
--- a/src/puter-js/src/modules/AI.js
+++ b/src/puter-js/src/modules/AI.js
@@ -675,6 +675,78 @@ class AI{
}
}).call(this, options);
}
+
+ txt2video = async (...args) => {
+ let options = {};
+ let testMode = false;
+
+ if(!args){
+ throw({message: 'Arguments are required', code: 'arguments_required'});
+ }
+
+ if (typeof args[0] === 'string') {
+ options = { prompt: args[0] };
+ }
+
+ if (typeof args[1] === 'boolean' && args[1] === true) {
+ testMode = true;
+ }
+
+ if (typeof args[0] === 'string' && typeof args[1] === "object") {
+ options = args[1];
+ options.prompt = args[0];
+ }
+
+ if (typeof args[0] === 'object') {
+ options = args[0];
+ }
+
+ if (!options.prompt) {
+ throw({message: 'Prompt parameter is required', code: 'prompt_required'});
+ }
+
+ if (!options.model) {
+ options.model = 'sora-2';
+ }
+
+ if (options.duration !== undefined && options.seconds === undefined) {
+ options.seconds = options.duration;
+ }
+
+ return await utils.make_driver_method(['prompt'], 'puter-video-generation', 'openai-video-generation', 'generate', {
+ responseType: 'blob',
+ test_mode: testMode ?? false,
+ transform: async result => {
+ let sourceUrl = null;
+ let mimeType = null;
+ if (result instanceof Blob) {
+ sourceUrl = await utils.blob_to_url(result);
+ mimeType = result.type || 'video/mp4';
+ } else if (typeof result === 'string') {
+ sourceUrl = result;
+ } else if (result && typeof result === 'object') {
+ sourceUrl = result.asset_url || result.url || result.href || null;
+ mimeType = result.mime_type || result.content_type || null;
+ }
+
+ if (!sourceUrl) {
+ return result;
+ }
+
+ const video = document.createElement('video');
+ video.src = sourceUrl;
+ video.controls = true;
+ video.preload = 'metadata';
+ if (mimeType) {
+ video.setAttribute('data-mime-type', mimeType);
+ }
+ video.setAttribute('data-source', sourceUrl);
+ video.toString = () => video.src;
+ video.valueOf = () => video.src;
+ return video;
+ }
+ }).call(this, options);
+ }
}
export default AI;