From bed84cad7645ad9aa80b84d733189cd2c4cd73c0 Mon Sep 17 00:00:00 2001
From: Shruc <42489293+P3il4@users.noreply.github.com>
Date: Tue, 24 Mar 2026 12:42:52 +0300
Subject: [PATCH] refactor ai video gen (#2711)
* refactor ai video generation into one service
* use old duration and dimension keys
* use costoverride billing like the others
---
.../src/modules/ai/PuterAIChatModule.js | 14 +-
src/backend/src/services/ChatAPIService.js | 32 +-
.../costMaps/openaiVideoCostMap.ts | 1 +
src/backend/src/services/ai/video/.gitignore | 2 +
.../ai/video/AIVideoGenerationService.ts | 288 +++++++++++++++
.../OpenAIVideoGenerationService.js | 342 ------------------
.../OpenAIVideoGenerationProvider.ts | 246 +++++++++++++
.../OpenAIVideoGenerationProvider/models.ts | 59 +++
.../TogetherVideoGenerationProvider.ts} | 161 ++++-----
.../models.ts} | 140 +++++--
.../src/services/ai/video/providers/types.ts | 72 ++++
.../src/services/drivers/DriverService.js | 2 +-
src/puter-js/src/modules/AI.js | 42 +--
src/puter-js/types/modules/ai.d.ts | 2 +
14 files changed, 863 insertions(+), 540 deletions(-)
create mode 100644 src/backend/src/services/ai/video/.gitignore
create mode 100644 src/backend/src/services/ai/video/AIVideoGenerationService.ts
delete mode 100644 src/backend/src/services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js
create mode 100644 src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.ts
create mode 100644 src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/models.ts
rename src/backend/src/services/ai/video/{TogetherVideoGenerationService/TogetherVideoGenerationService.js => providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.ts} (61%)
rename src/backend/src/services/ai/video/{TogetherVideoGenerationService/models.js => providers/TogetherVideoGenerationProvider/models.ts} (67%)
create mode 100644 src/backend/src/services/ai/video/providers/types.ts
diff --git a/src/backend/src/modules/ai/PuterAIChatModule.js b/src/backend/src/modules/ai/PuterAIChatModule.js
index 99f93ea43..0ff818c82 100644
--- a/src/backend/src/modules/ai/PuterAIChatModule.js
+++ b/src/backend/src/modules/ai/PuterAIChatModule.js
@@ -28,9 +28,7 @@ import { OpenAISpeechToTextService } from '../../services/ai/stt/OpenAISpeechToT
import { AWSPollyService } from '../../services/ai/tts/AWSPollyService.js';
import { ElevenLabsTTSService } from '../../services/ai/tts/ElevenLabsTTSService.js';
import { OpenAITTSService } from '../../services/ai/tts/OpenAITTSService.js';
-import { TogetherVideoGenerationService } from '../../services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js';
-import { OpenAIVideoGenerationService } from '../../services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js';
-// import { AIVideoGenerationService } from '../../services/ai/video/AIVideoGenerationService.js';
+import { AIVideoGenerationService } from '../../services/ai/video/AIVideoGenerationService.js';
/**
* PuterAIModule class extends AdvancedBase to manage and register various AI services.
@@ -59,7 +57,7 @@ export class PuterAIModule extends AdvancedBase {
services.registerService('ai-image', AIImageGenerationService);
// video generation ai service
- // services.registerService('ai-video', AIVideoGenerationService);
+ services.registerService('ai-video', AIVideoGenerationService);
// TODO DS: centralize other service types too
// TODO: services should govern their own availability instead of the module deciding what to register
@@ -83,14 +81,6 @@ export class PuterAIModule extends AdvancedBase {
services.registerService('openai-tts', OpenAITTSService);
services.registerService('openai-speech2txt', OpenAISpeechToTextService);
-
- // TODO DS: move to video service
- services.registerService('openai-video-generation', OpenAIVideoGenerationService);
- }
-
- if ( config?.services?.['together-ai'] ) {
- // TODO DS: move to video service
- services.registerService('together-video-generation', TogetherVideoGenerationService);
}
}
}
diff --git a/src/backend/src/services/ChatAPIService.js b/src/backend/src/services/ChatAPIService.js
index 8e1079ead..e92968d6b 100644
--- a/src/backend/src/services/ChatAPIService.js
+++ b/src/backend/src/services/ChatAPIService.js
@@ -160,20 +160,8 @@ class ChatAPIService extends BaseService {
try {
const svc_su = this.services.get('su');
const models = await svc_su.sudo(async () => {
- const items = [];
- if ( this.services.has('openai-video-generation') ) {
- const svc_video = this.services.get('openai-video-generation');
- if ( typeof svc_video.models === 'function' ) {
- items.push(...await svc_video.models());
- }
- }
- if ( this.services.has('together-video-generation') ) {
- const svc_video = this.services.get('together-video-generation');
- if ( typeof svc_video.models === 'function' ) {
- items.push(...await svc_video.models());
- }
- }
- return items;
+ const svc_video = this.services.get('ai-video');
+ return svc_video.models();
});
res.json({ models });
} catch ( error ) {
@@ -190,20 +178,8 @@ class ChatAPIService extends BaseService {
try {
const svc_su = this.services.get('su');
const models = await svc_su.sudo(async () => {
- const items = [];
- if ( this.services.has('openai-video-generation') ) {
- const svc_video = this.services.get('openai-video-generation');
- if ( typeof svc_video.models === 'function' ) {
- items.push(...(await svc_video.models()).map(model => model.puterId || model.id));
- }
- }
- if ( this.services.has('together-video-generation') ) {
- const svc_video = this.services.get('together-video-generation');
- if ( typeof svc_video.models === 'function' ) {
- items.push(...(await svc_video.models()).map(model => model.id));
- }
- }
- return items;
+ const svc_video = this.services.get('ai-video');
+ return svc_video.list();
});
res.json({ models });
} catch ( error ) {
diff --git a/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts b/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts
index f7ed45e6a..4d84e2617 100644
--- a/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts
+++ b/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts
@@ -5,4 +5,5 @@ export const OPENAI_VIDEO_COST_MAP = {
'openai:sora-2:default': toMicroCents(0.10),
'openai:sora-2-pro:default': toMicroCents(0.30),
'openai:sora-2-pro:xl': toMicroCents(0.50),
+ 'openai:sora-2-pro:xxl': toMicroCents(0.70),
};
diff --git a/src/backend/src/services/ai/video/.gitignore b/src/backend/src/services/ai/video/.gitignore
new file mode 100644
index 000000000..aa4a6da26
--- /dev/null
+++ b/src/backend/src/services/ai/video/.gitignore
@@ -0,0 +1,2 @@
+*.js
+*.js.map
\ No newline at end of file
diff --git a/src/backend/src/services/ai/video/AIVideoGenerationService.ts b/src/backend/src/services/ai/video/AIVideoGenerationService.ts
new file mode 100644
index 000000000..8c9a8283b
--- /dev/null
+++ b/src/backend/src/services/ai/video/AIVideoGenerationService.ts
@@ -0,0 +1,288 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import { APIError } from '../../../api/APIError.js';
+import { Context } from '../../../util/context.js';
+import BaseService from '../../BaseService.js';
+import { DriverService } from '../../drivers/DriverService.js';
+import { EventService } from '../../EventService.js';
+import { MeteringService } from '../../MeteringService/MeteringService.js';
+import { OpenAIVideoGenerationProvider } from './providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.js';
+import { TogetherVideoGenerationProvider } from './providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.js';
+import { IGenerateVideoParams, IVideoModel, IVideoProvider } from './providers/types.js';
+
+export class AIVideoGenerationService extends BaseService {
+
+ static SERVICE_NAME = 'ai-video';
+
+ static DEFAULT_PROVIDER = 'openai-video-generation';
+
+ get meteringService (): MeteringService {
+ return this.services.get('meteringService').meteringService;
+ }
+
+ get eventService (): EventService {
+ return this.services.get('event');
+ }
+
+ get driverService (): DriverService {
+ return this.services.get('driver');
+ }
+
+ getProvider (name: string): IVideoProvider | undefined {
+ return this.#providers[name];
+ }
+
+ #providers: Record = {};
+ #modelIdMap: Record = {};
+
+ static IMPLEMENTS = {
+ 'driver-capabilities': {
+ supports_test_mode (iface: string, method_name: string) {
+ return iface === 'puter-video-generation' &&
+ method_name === 'generate';
+ },
+ },
+ 'puter-video-generation': {
+ async generate (...parameters: Parameters) {
+ return (this as unknown as AIVideoGenerationService).generate(...parameters);
+ },
+ },
+ };
+
+ getModel ({ modelId, provider }: { modelId: string, provider?: string }) {
+ const models = this.#modelIdMap[modelId];
+ if ( ! models ) {
+ return undefined;
+ }
+
+ if ( provider ) {
+ const model = models.find(m => m.provider === provider);
+ return model ?? models[0];
+ }
+
+ // Prefer exact primary ID match over alias matches
+ const exactIdMatch = models.find(m => m.id === modelId);
+ if ( exactIdMatch ) {
+ return exactIdMatch;
+ }
+
+ const exactPuterIdMatch = models.find(m => m.puterId === modelId);
+ if ( exactPuterIdMatch ) {
+ return exactPuterIdMatch;
+ }
+
+ return models[0];
+ }
+
+ private async registerProviders () {
+ const openAiConfig = this.config.providers?.['openai-video-generation'] || this.global_config?.services?.['openai'] || this.global_config?.openai;
+ if ( openAiConfig && (openAiConfig.apiKey || openAiConfig.secret_key) ) {
+ this.#providers['openai-video-generation'] = new OpenAIVideoGenerationProvider(
+ { apiKey: openAiConfig.apiKey || openAiConfig.secret_key },
+ this.meteringService,
+ );
+ }
+
+ const togetherConfig = this.config.providers?.['together-video-generation'] || this.global_config?.services?.['together-ai'];
+ if ( togetherConfig && (togetherConfig.apiKey || togetherConfig.secret_key) ) {
+ this.#providers['together-video-generation'] = new TogetherVideoGenerationProvider(
+ { apiKey: togetherConfig.apiKey || togetherConfig.secret_key },
+ this.meteringService,
+ );
+ }
+
+ // emit event for extensions to add providers
+ const extensionProviders = {} as Record;
+ await this.eventService.emit('ai.video.registerProviders', extensionProviders);
+ for ( const providerName in extensionProviders ) {
+ if ( this.#providers[providerName] ) {
+ console.warn('AIVideoGenerationService: provider name conflict for ', providerName, ' registering with -extension suffix');
+ this.#providers[`${providerName}-extension`] = extensionProviders[providerName];
+ continue;
+ }
+ this.#providers[providerName] = extensionProviders[providerName];
+ }
+ }
+
+ protected async '__on_boot.consolidation' () {
+ await this.registerProviders();
+
+ for ( const providerName in this.#providers ) {
+ const provider = this.#providers[providerName];
+
+ // alias all driver requests to go here to support legacy routing
+ this.driverService.register_service_alias(
+ AIVideoGenerationService.SERVICE_NAME,
+ providerName,
+ { iface: 'puter-video-generation' },
+ );
+
+ // build model id map
+ for ( const model of await provider.models() ) {
+ model.id = model.id.trim().toLowerCase();
+ if ( model.puterId ) {
+ model.puterId = model.puterId.trim().toLowerCase();
+ }
+ if ( model.aliases ) {
+ model.aliases = model.aliases.map(alias => alias.trim().toLowerCase());
+ }
+ if ( ! this.#modelIdMap[model.id] ) {
+ this.#modelIdMap[model.id] = [];
+ }
+ this.#modelIdMap[model.id].push({ ...model, provider: providerName });
+
+ if ( model.puterId ) {
+ if ( model.aliases ) {
+ model.aliases.push(model.puterId);
+ } else {
+ model.aliases = [model.puterId];
+ }
+ }
+
+ if ( model.aliases ) {
+ for ( let alias of model.aliases ) {
+ alias = alias.trim().toLowerCase();
+ if ( ! this.#modelIdMap[alias] ) {
+ this.#modelIdMap[alias] = this.#modelIdMap[model.id];
+ continue;
+ }
+ if ( this.#modelIdMap[alias] !== this.#modelIdMap[model.id] ) {
+ this.#modelIdMap[alias].push({ ...model, provider: providerName });
+ this.#modelIdMap[model.id] = this.#modelIdMap[alias];
+ continue;
+ }
+ }
+ }
+ this.#modelIdMap[model.id].sort((a, b) => {
+ const aCostKey = a.index_cost_key || a.output_cost_key || Object.keys(a.costs || {})[0];
+ const bCostKey = b.index_cost_key || b.output_cost_key || Object.keys(b.costs || {})[0];
+ const aCost = a.costs?.[aCostKey] ?? Infinity;
+ const bCost = b.costs?.[bCostKey] ?? Infinity;
+ return aCost - bCost;
+ });
+ }
+ }
+ }
+
+ models () {
+ const seen = new Set();
+ return Object.entries(this.#modelIdMap)
+ .map(([_, models]) => models)
+ .flat()
+ .filter(model => {
+ const identity = `${model.provider}:${model.puterId || model.id}`;
+ if ( seen.has(identity) ) {
+ return false;
+ }
+ seen.add(identity);
+ return true;
+ })
+ .sort((a, b) => {
+ if ( a.provider === b.provider ) {
+ return a.id.localeCompare(b.id);
+ }
+ return a.provider!.localeCompare(b.provider!);
+ });
+ }
+
+ list () {
+ return this.models().map(m => (m.puterId || m.id)).sort();
+ }
+
+ async generate (parameters: IGenerateVideoParams) {
+ const clientDriverCall = Context.get('client_driver_call');
+ let { test_mode: testMode, intended_service: legacyProviderName } = clientDriverCall as { test_mode?: boolean; response_metadata: Record; intended_service?: string };
+
+ if ( parameters.model ) {
+ parameters.model = parameters.model.trim().toLowerCase();
+ }
+
+ const configuredProviders = Object.keys(this.#providers);
+ if ( configuredProviders.length === 0 ) {
+ throw new Error('no video generation providers configured');
+ }
+
+ let intendedProvider = (parameters.provider || (legacyProviderName === AIVideoGenerationService.SERVICE_NAME ? '' : legacyProviderName)) ?? '';
+
+ if ( !parameters.model && !intendedProvider ) {
+ intendedProvider = configuredProviders.includes(AIVideoGenerationService.DEFAULT_PROVIDER)
+ ? AIVideoGenerationService.DEFAULT_PROVIDER
+ : configuredProviders[0];
+ }
+
+ if ( intendedProvider && !this.#providers[intendedProvider] ) {
+ intendedProvider = configuredProviders[0];
+ }
+
+ if ( !parameters.model && intendedProvider ) {
+ parameters.model = this.#providers[intendedProvider].getDefaultModel();
+ }
+
+ const model = parameters.model ? this.getModel({ modelId: parameters.model, provider: intendedProvider }) : undefined;
+
+ if ( ! model ) {
+ const availableModelsUrl = `${this.global_config.origin}/puterai/video/models`;
+
+ throw APIError.create('field_invalid', undefined, {
+ key: 'model',
+ expected: `a valid model name from ${availableModelsUrl}`,
+ got: parameters.model,
+ });
+ }
+
+ const provider = this.#providers[model.provider!];
+ if ( ! provider ) {
+ throw new Error(`no provider found for model ${model.id}`);
+ }
+
+ if ( model.durationSeconds?.length ) {
+ const requestedSeconds = parameters.seconds ?? parameters.duration;
+ const normalizedSeconds = typeof requestedSeconds === 'string'
+ ? Number.parseInt(requestedSeconds, 10)
+ : requestedSeconds;
+ const validSeconds = model.durationSeconds.includes(Number(normalizedSeconds))
+ ? normalizedSeconds
+ : model.durationSeconds[0];
+ parameters.seconds = validSeconds;
+ parameters.duration = validSeconds;
+ }
+
+ if ( model.dimensions?.length ) {
+ const requestedResolution = typeof parameters.size === 'string' && parameters.size.trim()
+ ? parameters.size
+ : typeof parameters.resolution === 'string' && parameters.resolution.trim()
+ ? parameters.resolution
+ : undefined;
+
+ const normalizedResolution = requestedResolution && model.dimensions.includes(requestedResolution)
+ ? requestedResolution
+ : model.dimensions[0];
+ parameters.size = normalizedResolution;
+ parameters.resolution = normalizedResolution;
+ }
+
+ return await provider.generate({
+ ...parameters,
+ model: model.id,
+ provider: model.provider,
+ test_mode: testMode,
+ });
+ }
+}
diff --git a/src/backend/src/services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js b/src/backend/src/services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js
deleted file mode 100644
index 7318ab5c7..000000000
--- a/src/backend/src/services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * Copyright (C) 2024-present Puter Technologies Inc.
- *
- * This file is part of Puter.
- *
- * Puter is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see .
- */
-
-const APIError = require('../../../../api/APIError');
-const BaseService = require('../../../BaseService');
-const { TypedValue } = require('../../../drivers/meta/Runtime');
-const { Context } = require('../../../../util/context');
-const { Readable } = require('stream');
-
-const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4';
-const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
-const POLL_INTERVAL_MS = 5_000;
-const DEFAULT_DURATION_SECONDS = 4;
-const DEFAULT_SIZE = '720x1280';
-const ALLOWED_SIZES = new Set(['720x1280', '1280x720', '1024x1792', '1792x1024']);
-const ALLOWED_SECONDS = new Set(['4', '8', '12']);
-const OPENAI_VIDEO_MODELS = [
- {
- puterId: 'openai:openai/sora-2',
- id: 'sora-2',
- aliases: ['openai/sora-2'],
- defaultUsageKey: 'openai:sora-2:default',
- },
- {
- puterId: 'openai:openai/sora-2-pro',
- id: 'sora-2-pro',
- aliases: ['openai/sora-2-pro'],
- defaultUsageKey: 'openai:sora-2-pro:default',
- },
-];
-
-class OpenAIVideoGenerationService extends BaseService {
- /** @type {import('../../../MeteringService/MeteringService').MeteringService} */
- get meteringService () {
- return this.services.get('meteringService').meteringService;
- }
-
- static MODULES = {
- openai: require('openai'),
- };
-
- _construct () {
- this.models_ = Object.fromEntries(OPENAI_VIDEO_MODELS.map(model => [
- model.id,
- { defaultUsageKey: model.defaultUsageKey },
- ]));
- }
-
- async _init () {
- let apiKey =
- this.config?.services?.openai?.apiKey ??
- this.global_config?.services?.openai?.apiKey;
-
- if ( ! apiKey ) {
- apiKey =
- this.config?.openai?.secret_key ??
- this.global_config.openai?.secret_key;
-
- console.warn('The `openai.secret_key` configuration format is deprecated. ' +
- 'Please use `services.openai.apiKey` instead.');
- }
-
- this.openai = new this.modules.openai.OpenAI({
- apiKey,
- });
- }
-
- static IMPLEMENTS = {
- 'driver-capabilities': {
- supports_test_mode (iface, method_name) {
- return iface === 'puter-video-generation' &&
- method_name === 'generate';
- },
- },
- 'puter-video-generation': {
- async generate (params) {
- return await this.generateVideo(params);
- },
- },
- };
-
- async models () {
- // Import cost map dynamically
- const costMapModule = await import('../../../MeteringService/costMaps/openaiVideoCostMap.ts');
- const OPENAI_VIDEO_COST_MAP = costMapModule.OPENAI_VIDEO_COST_MAP;
-
- // Convert microcents to cents (divide by 1,000,000)
- const microCentsToCents = (microCents) => microCents / 1_000_000;
-
- return OPENAI_VIDEO_MODELS.map(model => {
- const result = { ...model };
-
- // Get cost for default usage key
- const defaultCostMicroCents = OPENAI_VIDEO_COST_MAP[model.defaultUsageKey];
- if ( defaultCostMicroCents !== undefined ) {
- const perSecondCost = microCentsToCents(defaultCostMicroCents);
- result.costs_currency = 'usd-cents';
- result.costs = {
- 'per-second': perSecondCost,
- 'default-duration-per-video': perSecondCost * DEFAULT_DURATION_SECONDS,
- };
- result.output_cost_key = 'default-duration-per-video';
- }
-
- // Add cost for xl variant if it exists (sora-2-pro only)
- if ( model.id === 'sora-2-pro' ) {
- const xlCostMicroCents = OPENAI_VIDEO_COST_MAP['openai:sora-2-pro:xl'];
- if ( xlCostMicroCents !== undefined ) {
- if ( ! result.costs ) {
- result.costs = {};
- result.costs_currency = 'usd-cents';
- }
- const perSecondXlCost = microCentsToCents(xlCostMicroCents);
- result.costs['per-second-xl'] = perSecondXlCost;
- result.costs['default-duration-per-video-xl'] = perSecondXlCost * DEFAULT_DURATION_SECONDS;
- }
- }
-
- return result;
- });
- }
-
- async generateVideo (params) {
- const {
- prompt,
- model: requestedModel,
- duration,
- seconds,
- size,
- resolution,
- input_reference: inputReference,
- test_mode: testMode,
- } = params ?? {};
-
- if ( typeof prompt !== 'string' || !prompt.trim() ) {
- throw APIError.create('field_invalid', null, {
- key: 'prompt',
- expected: 'a non-empty string',
- got: prompt,
- });
- }
-
- const resolvedModel = OPENAI_VIDEO_MODELS.find(entry =>
- entry.id === requestedModel ||
- entry.puterId === requestedModel ||
- (entry.aliases || []).includes(requestedModel))?.id;
- const model = resolvedModel ?? requestedModel ?? 'sora-2';
- const modelConfig = this.models_[model];
- if ( ! modelConfig ) {
- throw APIError.create('field_invalid', null, {
- key: 'model',
- expected: `one of: ${ Object.keys(this.models_).join(', ')}`,
- got: model,
- });
- }
-
- if ( testMode ) {
- return new TypedValue({
- $: 'string:url:web',
- content_type: 'video',
- }, DEFAULT_TEST_VIDEO_URL);
- }
-
- const normalizedSize = this.#normalizeSize(size ?? resolution) ?? DEFAULT_SIZE;
- const normalizedSeconds = this.#normalizeSeconds(seconds ?? duration) ?? '4';
-
- const usageKey = this.#determineUsageKey(model, normalizedSize);
- if ( ! usageKey ) {
- throw new Error(`Unsupported pricing tier for model ${model}`);
- }
-
- const estimatedUnits = this.#parseSeconds(normalizedSeconds) ?? DEFAULT_DURATION_SECONDS;
- const actor = Context.get('actor');
- const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, estimatedUnits);
- if ( ! usageAllowed ) {
- throw APIError.create('insufficient_funds');
- }
-
- const createParams = {
- model,
- prompt,
- seconds: normalizedSeconds,
- size: normalizedSize,
- };
-
- if ( inputReference ) {
- createParams.input_reference = inputReference;
- }
-
- const createResponse = await this.openai.videos.create(createParams);
- const finalJob = await this.#pollUntilComplete(createResponse);
-
- if ( finalJob.status === 'failed' ) {
- const errorMessage = finalJob.error?.message ?? 'Video generation failed';
- throw new Error(errorMessage);
- }
-
- const finalResolution = this.#normalizeSize(finalJob.size) ?? normalizedSize;
- const finalUsageKey = this.#determineUsageKey(model, finalResolution);
- if ( ! finalUsageKey ) {
- throw new Error(`Unsupported pricing tier for model ${model}`);
- }
-
- const actualSeconds = this.#parseSeconds(finalJob.seconds) ?? estimatedUnits;
-
- const downloadResponse = await this.openai.videos.downloadContent(finalJob.id);
- const contentType = downloadResponse.headers.get('content-type') ?? 'video/mp4';
-
- let stream = downloadResponse.body;
- if ( stream && typeof stream.getReader === 'function' ) {
- stream = Readable.fromWeb(stream);
- }
-
- if ( ! stream ) {
- const arrayBuffer = await downloadResponse.arrayBuffer();
- stream = Readable.from(Buffer.from(arrayBuffer));
- }
-
- this.meteringService.incrementUsage(actor, finalUsageKey, actualSeconds);
-
- return new TypedValue({
- $: 'stream',
- content_type: contentType,
- }, stream);
- }
-
- async #pollUntilComplete (initialJob) {
- let job = initialJob;
- const start = Date.now();
-
- while ( job.status === 'queued' || job.status === 'in_progress' ) {
- if ( Date.now() - start > DEFAULT_TIMEOUT_MS ) {
- throw new Error('Timed out waiting for Sora video generation to complete');
- }
-
- await this.#delay(POLL_INTERVAL_MS);
- job = await this.openai.videos.retrieve(job.id);
- }
-
- return job;
- }
-
- async #delay (ms) {
- return await new Promise(resolve => setTimeout(resolve, ms));
- }
-
- #normalizeSize (candidate) {
- if ( ! candidate ) return undefined;
- const normalized = this.#normalizeResolution(candidate);
- if ( normalized && ALLOWED_SIZES.has(normalized) ) {
- return normalized;
- }
- return undefined;
- }
-
- #normalizeSeconds (value) {
- if ( value === null || value === undefined ) {
- return undefined;
- }
-
- if ( typeof value === 'number' && Number.isFinite(value) ) {
- const rounded = String(Math.round(value));
- return ALLOWED_SECONDS.has(rounded) ? rounded : undefined;
- }
-
- if ( typeof value === 'string' ) {
- const trimmed = value.trim();
- if ( ALLOWED_SECONDS.has(trimmed) ) {
- return trimmed;
- }
- const numeric = Number.parseInt(trimmed, 10);
- if ( Number.isFinite(numeric) ) {
- const normalized = String(numeric);
- return ALLOWED_SECONDS.has(normalized) ? normalized : undefined;
- }
- }
-
- return undefined;
- }
-
- #determineUsageKey (model, normalizedSize) {
- const config = this.models_[model];
- if ( ! config ) return null;
-
- if ( model === 'sora-2-pro' && normalizedSize === '1792x1024' ) {
- return 'openai:sora-2-pro:xl';
- }
-
- return config.defaultUsageKey;
- }
-
- #normalizeResolution (value) {
- if ( ! value ) return undefined;
- if ( typeof value === 'string' ) {
- const match = value.match(/(\\d+)\\s*x\\s*(\\d+)/i);
- if ( match ) {
- const width = Number.parseInt(match[1], 10);
- const height = Number.parseInt(match[2], 10);
- if ( Number.isFinite(width) && Number.isFinite(height) ) {
- const larger = Math.max(width, height);
- const smaller = Math.min(width, height);
- return `${larger}x${smaller}`;
- }
- }
- }
- return undefined;
- }
-
- #parseSeconds (value) {
- if ( value === null || value === undefined ) return undefined;
- if ( typeof value === 'number' && Number.isFinite(value) ) {
- return value;
- }
- if ( typeof value === 'string' ) {
- const numeric = Number.parseInt(value, 10);
- if ( Number.isFinite(numeric) ) {
- return numeric;
- }
- }
- return undefined;
- }
-}
-
-module.exports = {
- OpenAIVideoGenerationService,
-};
diff --git a/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.ts b/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.ts
new file mode 100644
index 000000000..94d0fe64e
--- /dev/null
+++ b/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.ts
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import OpenAI from 'openai';
+import APIError from '../../../../../api/APIError.js';
+import { Context } from '../../../../../util/context.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import { IGenerateVideoParams, IVideoModel, IVideoProvider } from '../types.js';
+import { TypedValue } from '../../../../drivers/meta/Runtime.js';
+import { Readable } from 'stream';
+import { OPENAI_VIDEO_MODELS, OPENAI_VIDEO_ALLOWED_SECONDS } from './models.js';
+
+const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4';
+const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000;
+const POLL_INTERVAL_MS = 5_000;
+const DEFAULT_DURATION_SECONDS = 4;
+
+export class OpenAIVideoGenerationProvider implements IVideoProvider {
+ #openai: OpenAI;
+ #meteringService: MeteringService;
+
+ constructor (config: { apiKey: string }, meteringService: MeteringService) {
+ if ( ! config.apiKey ) {
+ throw new Error('OpenAI video generation requires an API key');
+ }
+ this.#openai = new OpenAI({ apiKey: config.apiKey });
+ this.#meteringService = meteringService;
+ }
+
+ getDefaultModel (): string {
+ return OPENAI_VIDEO_MODELS[0].id;
+ }
+
+ async models (): Promise {
+ return OPENAI_VIDEO_MODELS;
+ }
+
+ async generate (params: IGenerateVideoParams): Promise {
+ const {
+ prompt,
+ model: requestedModel,
+ duration,
+ seconds,
+ size,
+ resolution,
+ input_reference: inputReference,
+ test_mode: testMode,
+ } = params ?? {};
+
+ if ( typeof prompt !== 'string' || !prompt.trim() ) {
+ throw APIError.create('field_invalid', null, {
+ key: 'prompt',
+ expected: 'a non-empty string',
+ got: prompt,
+ });
+ }
+
+ const selectedModel = await this.#selectModel(requestedModel);
+
+ if ( ! selectedModel ) {
+ throw new Error(`Unknown video model: ${requestedModel}`);
+ }
+
+ if ( testMode ) {
+ return new TypedValue({
+ $: 'string:url:web',
+ content_type: 'video',
+ }, DEFAULT_TEST_VIDEO_URL);
+ }
+
+ const defaultSize = selectedModel.dimensions?.[0] ?? '720x1280';
+ const normalizedSize = this.#normalizeSize(size ?? resolution, selectedModel) ?? defaultSize;
+ const normalizedSeconds = this.#normalizeSeconds(seconds ?? duration) ?? String(DEFAULT_DURATION_SECONDS);
+
+ const sizeTier = this.#determineSizeTier(selectedModel, normalizedSize);
+ const costPerSecondCents = this.#getCostPerSecond(selectedModel, sizeTier);
+
+ if ( ! costPerSecondCents ) {
+ throw new Error(`No pricing configured for model ${selectedModel.id} at size ${normalizedSize}`);
+ }
+
+ const estimatedUnits = this.#parseSeconds(normalizedSeconds) ?? DEFAULT_DURATION_SECONDS;
+ const actor = Context.get('actor');
+ const costInMicroCents = costPerSecondCents * 1_000_000;
+ const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents * estimatedUnits);
+ if ( ! usageAllowed ) {
+ throw APIError.create('insufficient_funds');
+ }
+
+ const createParams: OpenAI.VideoCreateParams = {
+ prompt,
+ model: selectedModel.id,
+ seconds: normalizedSeconds as OpenAI.VideoSeconds,
+ size: normalizedSize as OpenAI.VideoSize,
+ };
+
+ if ( inputReference ) {
+ createParams.input_reference = inputReference as OpenAI.VideoCreateParams['input_reference'];
+ }
+
+ const createResponse = await this.#openai.videos.create(createParams);
+ const finalJob = await this.#pollUntilComplete(createResponse);
+
+ if ( finalJob.status === 'failed' ) {
+ const errorMessage = finalJob.error?.message ?? 'Video generation failed';
+ throw new Error(errorMessage);
+ }
+
+ const finalResolution = this.#normalizeSize(finalJob.size, selectedModel) ?? normalizedSize;
+ const finalTier = this.#determineSizeTier(selectedModel, finalResolution);
+ const finalCostPerSecondCents = this.#getCostPerSecond(selectedModel, finalTier);
+
+ if ( ! finalCostPerSecondCents ) {
+ throw new Error(`No pricing configured for model ${selectedModel.id} at size ${finalResolution}`);
+ }
+
+ const finalCostInMicroCents = finalCostPerSecondCents * 1_000_000;
+ const actualSeconds = this.#parseSeconds(finalJob.seconds) ?? estimatedUnits;
+
+ const downloadResponse = await this.#openai.videos.downloadContent(finalJob.id);
+ const contentType = downloadResponse.headers.get('content-type') ?? 'video/mp4';
+
+ let stream: any = downloadResponse.body;
+ if ( stream && typeof stream.getReader === 'function' ) {
+ stream = Readable.fromWeb(stream as any);
+ }
+
+ if ( ! stream ) {
+ const arrayBuffer = await downloadResponse.arrayBuffer();
+ stream = Readable.from(Buffer.from(arrayBuffer));
+ }
+
+ const finalUsageKey = this.#getUsageKey(selectedModel, finalTier);
+ await this.#meteringService.incrementUsage(actor, finalUsageKey, actualSeconds, finalCostInMicroCents * actualSeconds);
+
+ return new TypedValue({
+ $: 'stream',
+ content_type: contentType,
+ }, stream);
+ }
+
+ async #selectModel (requestedModel?: string): Promise {
+ const allModels = await this.models();
+ return allModels.find(m => m.id.toLowerCase() === requestedModel?.toLowerCase());
+ }
+
+ async #pollUntilComplete (initialJob: OpenAI.Video): Promise {
+ let job = initialJob;
+ const start = Date.now();
+
+ while ( job.status === 'queued' || job.status === 'in_progress' ) {
+ if ( Date.now() - start > DEFAULT_TIMEOUT_MS ) {
+ throw new Error('Timed out waiting for Sora video generation to complete');
+ }
+
+ await this.#delay(POLL_INTERVAL_MS);
+ job = await this.#openai.videos.retrieve(job.id);
+ }
+
+ return job;
+ }
+
+ async #delay (ms: number): Promise {
+ return await new Promise(resolve => setTimeout(resolve, ms));
+ }
+
+ #normalizeSize (candidate: unknown, model: IVideoModel): string | undefined {
+ if ( ! candidate ) return undefined;
+ const normalized = this.#normalizeResolution(candidate);
+ if ( normalized && model.dimensions?.includes(normalized) ) {
+ return normalized;
+ }
+ return undefined;
+ }
+
+ #normalizeSeconds (value: unknown): string | undefined {
+ if ( value === null || value === undefined ) {
+ return undefined;
+ }
+ const parsed = typeof value === 'number' ? String(Math.round(value)) : typeof value === 'string' ? value.trim() : undefined;
+ if ( parsed && OPENAI_VIDEO_ALLOWED_SECONDS.includes(Number(parsed) as typeof OPENAI_VIDEO_ALLOWED_SECONDS[number]) ) {
+ return parsed;
+ }
+ return undefined;
+ }
+
+ #determineSizeTier (model: IVideoModel, size: string): string {
+ if ( model.id === 'sora-2-pro' ) {
+ if ( size === '1080x1920' || size === '1920x1080' ) return 'xxl';
+ if ( size === '1024x1792' || size === '1792x1024' ) return 'xl';
+ }
+ return 'default';
+ }
+
+ #getCostPerSecond (model: IVideoModel, tier: string): number | undefined {
+ const key = tier === 'default' ? 'per-second' : `per-second-${tier}`;
+ return model.costs?.[key];
+ }
+
+ #getUsageKey (model: IVideoModel, tier: string): string {
+ return `openai:${model.id}:${tier}`;
+ }
+
+ #normalizeResolution (value: unknown): string | undefined {
+ if ( ! value ) return undefined;
+ if ( typeof value === 'string' ) {
+ const match = value.match(/(\d+)\s*x\s*(\d+)/i);
+ if ( match ) {
+ const w = Number.parseInt(match[1], 10);
+ const h = Number.parseInt(match[2], 10);
+ if ( Number.isFinite(w) && Number.isFinite(h) ) {
+ return `${w}x${h}`;
+ }
+ }
+ }
+ return undefined;
+ }
+
+ #parseSeconds (value: unknown): number | undefined {
+ if ( value === null || value === undefined ) return undefined;
+ if ( typeof value === 'number' && Number.isFinite(value) ) {
+ return Math.round(value);
+ }
+ if ( typeof value === 'string' ) {
+ const numeric = Number.parseInt(value, 10);
+ return Number.isFinite(numeric) ? numeric : undefined;
+ }
+ return undefined;
+ }
+}
diff --git a/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/models.ts b/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/models.ts
new file mode 100644
index 000000000..061cd341d
--- /dev/null
+++ b/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/models.ts
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import { IVideoModel } from '../types.js';
+
+export const OPENAI_VIDEO_ALLOWED_SECONDS = [4, 8, 12] as const;
+
+export const OPENAI_VIDEO_MODELS: IVideoModel[] = [
+ {
+ id: 'sora-2',
+ puterId: 'openai:openai/sora-2',
+ aliases: ['openai/sora-2'],
+ name: 'Sora 2',
+ costs_currency: 'usd-cents',
+ costs: {
+ 'per-second': 10,
+ 'default-duration-per-video': 40,
+ },
+ output_cost_key: 'default-duration-per-video',
+ durationSeconds: OPENAI_VIDEO_ALLOWED_SECONDS.slice(),
+ dimensions: ['720x1280', '1280x720'],
+ defaultUsageKey: 'openai:sora-2:default',
+ },
+ {
+ id: 'sora-2-pro',
+ puterId: 'openai:openai/sora-2-pro',
+ aliases: ['openai/sora-2-pro'],
+ name: 'Sora 2 Pro',
+ costs_currency: 'usd-cents',
+ costs: {
+ 'per-second': 30,
+ 'default-duration-per-video': 120,
+ 'per-second-xl': 50,
+ 'default-duration-per-video-xl': 200,
+ 'per-second-xxl': 70,
+ 'default-duration-per-video-xxl': 280,
+ },
+ output_cost_key: 'default-duration-per-video',
+ durationSeconds: OPENAI_VIDEO_ALLOWED_SECONDS.slice(),
+ dimensions: ['720x1280', '1280x720', '1024x1792', '1792x1024', '1080x1920', '1920x1080'],
+ defaultUsageKey: 'openai:sora-2-pro:default',
+ },
+];
diff --git a/src/backend/src/services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js b/src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.ts
similarity index 61%
rename from src/backend/src/services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js
rename to src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.ts
index a31b7ec56..10bf86fbc 100644
--- a/src/backend/src/services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js
+++ b/src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.ts
@@ -17,56 +17,50 @@
* along with this program. If not, see .
*/
-const APIError = require('../../../../api/APIError');
-const BaseService = require('../../../BaseService');
-const { TypedValue } = require('../../../drivers/meta/Runtime');
-const { Context } = require('../../../../util/context');
-const { Together } = require('together-ai');
+import { Together } from 'together-ai';
+import APIError from '../../../../../api/APIError.js';
+import { Context } from '../../../../../util/context.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import { IGenerateVideoParams, IVideoModel, IVideoProvider } from '../types.js';
+import { TypedValue } from '../../../../drivers/meta/Runtime.js';
+import { TOGETHER_VIDEO_GENERATION_MODELS } from './models.js';
const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4';
const POLL_INTERVAL_MS = 5_000;
-const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
+const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000;
const DEFAULT_MODEL = 'minimax/video-01-director';
const DEFAULT_DURATION_SECONDS = 6;
-const DEFAULT_USAGE_KEY = 'together-video:default';
-let models = [];
+export class TogetherVideoGenerationProvider implements IVideoProvider {
+ #client: Together;
+ #meteringService: MeteringService;
-class TogetherVideoGenerationService extends BaseService {
- /** @type {import('../../../MeteringService/MeteringService').MeteringService} */
- get meteringService () {
- return this.services.get('meteringService').meteringService;
- }
-
- static MODULES = {};
-
- async _init () {
- const apiKey =
- this.config?.apiKey ??
- this.global_config?.services?.['together-ai']?.apiKey;
-
- if ( ! apiKey ) {
+ constructor (config: { apiKey: string }, meteringService: MeteringService) {
+ if ( ! config.apiKey ) {
throw new Error('Together AI video generation requires an API key');
}
-
- this.client = new Together({ apiKey });
+ this.#client = new Together({ apiKey: config.apiKey });
+ this.#meteringService = meteringService;
}
- static IMPLEMENTS = {
- 'driver-capabilities': {
- supports_test_mode (iface, method_name) {
- return iface === 'puter-video-generation' &&
- method_name === 'generate';
- },
- },
- 'puter-video-generation': {
- async generate (params) {
- return await this.generateVideo(params);
- },
- },
- };
+ getDefaultModel (): string {
+ return 'togetherai:minimax/video-01-director';
+ }
- async generateVideo (params) {
+ async models (): Promise {
+ return TOGETHER_VIDEO_GENERATION_MODELS.map((model) => ({
+ ...model,
+ aliases: [model.model],
+ durationSeconds: model.durationSeconds ?? undefined,
+ dimensions: model.dimensions ?? undefined,
+ fps: model.fps ?? undefined,
+ keyframes: model.keyframes ?? undefined,
+ promptLength: model.promptLength ?? undefined,
+ promptSupported: model.promptSupported ?? undefined,
+ }));
+ }
+
+ async generate (params: IGenerateVideoParams): Promise {
const {
prompt,
model: requestedModel,
@@ -97,6 +91,7 @@ class TogetherVideoGenerationService extends BaseService {
}
const model = this.#stripTogetherPrefix(requestedModel ?? DEFAULT_MODEL);
+ const selectedModel = await this.#getModel(requestedModel);
if ( testMode ) {
return new TypedValue({
@@ -105,10 +100,15 @@ class TogetherVideoGenerationService extends BaseService {
}, DEFAULT_TEST_VIDEO_URL);
}
+ const costPerVideoCents = selectedModel?.costs?.['per-video'];
+ if ( ! costPerVideoCents ) {
+ throw new Error(`No pricing configured for video model ${model}`);
+ }
+ const costInMicroCents = costPerVideoCents * 1_000_000;
+
let normalizedSeconds = this.#coercePositiveInteger(seconds ?? duration);
- if ( ! no_extra_params )
- {
+ if ( ! no_extra_params ) {
normalizedSeconds ??= DEFAULT_DURATION_SECONDS;
}
@@ -117,21 +117,18 @@ class TogetherVideoGenerationService extends BaseService {
throw new Error('actor not found in context');
}
- const estimatedUsageUnits = 1; // Together video billing is per generated video
- const usageKey = this.#determineUsageKey(model);
-
- const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, estimatedUsageUnits);
+ const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents);
if ( ! usageAllowed ) {
throw APIError.create('insufficient_funds');
}
- const createPayload = {
+ const createPayload: Together.VideoCreateParams & { metadata?: object } = {
prompt,
model,
};
if ( normalizedSeconds ) {
- createPayload.seconds = normalizedSeconds;
+ createPayload.seconds = String(normalizedSeconds);
}
if ( this.#isFiniteNumber(width) ) {
createPayload.width = Number(width);
@@ -152,7 +149,7 @@ class TogetherVideoGenerationService extends BaseService {
createPayload.seed = Number(seed);
}
if ( typeof outputFormat === 'string' && outputFormat.trim() ) {
- createPayload.output_format = outputFormat.trim();
+ createPayload.output_format = outputFormat.trim() as Together.VideoCreateParams['output_format'];
}
if ( this.#isFiniteNumber(outputQuality) ) {
createPayload.output_quality = Number(outputQuality);
@@ -161,16 +158,16 @@ class TogetherVideoGenerationService extends BaseService {
createPayload.negative_prompt = negativePrompt;
}
if ( Array.isArray(referenceImages) && referenceImages.length > 0 ) {
- createPayload.reference_images = referenceImages.filter(item => typeof item === 'string' && item.trim().length > 0);
+ createPayload.reference_images = referenceImages.filter((item: string) => typeof item === 'string' && item.trim().length > 0);
}
if ( Array.isArray(frameImages) && frameImages.length > 0 ) {
- createPayload.frame_images = frameImages.filter(frame => frame && typeof frame === 'object');
+ createPayload.frame_images = frameImages.filter((frame: any) => frame && typeof frame === 'object' && typeof frame.input_image === 'string') as Together.VideoCreateParams['frame_images'];
}
if ( metadata && typeof metadata === 'object' ) {
createPayload.metadata = metadata;
}
- const job = await this.client.videos.create(createPayload);
+ const job = await this.#client.videos.create(createPayload);
const finalJob = await this.#pollUntilComplete(job.id);
if ( finalJob.status === 'failed' ) {
@@ -185,7 +182,8 @@ class TogetherVideoGenerationService extends BaseService {
throw new Error('Video generation was cancelled');
}
- this.meteringService.incrementUsage(actor, usageKey, 1);
+ const usageKey = `together-video:${model}`;
+ await this.#meteringService.incrementUsage(actor, usageKey, 1, costInMicroCents);
const videoUrl = finalJob?.outputs?.video_url;
if ( typeof videoUrl === 'string' && videoUrl.trim() ) {
@@ -198,41 +196,9 @@ class TogetherVideoGenerationService extends BaseService {
throw new Error('Together AI response did not include a video URL');
}
- async models () {
- if ( models.length > 0 && models[0].costs_currency ) {
- return models;
- }
-
- const { TOGETHER_VIDEO_GENERATION_MODELS } = await import('./models.js');
- const costMapModule = await import('../../../MeteringService/costMaps/togetherCostMap.ts');
- const TOGETHER_COST_MAP = costMapModule.TOGETHER_COST_MAP;
-
- // Convert microcents to cents (divide by 1,000,000)
- const microCentsToCents = (microCents) => microCents / 1_000_000;
-
- models = TOGETHER_VIDEO_GENERATION_MODELS.map(model => {
- const result = { ...model };
-
- // Convert model ID from 'togetherai:google/veo-3.0' to cost key 'together-video:google/veo-3.0'
- const costKey = model.id.replace('togetherai:', 'together-video:');
- const costMicroCents = TOGETHER_COST_MAP[costKey];
-
- if ( costMicroCents !== undefined && costMicroCents > 0 ) {
- result.costs_currency = 'usd-cents';
- result.costs = {
- 'per-video': microCentsToCents(costMicroCents),
- };
- result.output_cost_key = 'per-video';
- }
-
- return result;
- });
-
- return models;
- }
-
- async #pollUntilComplete (jobId) {
- let job = await this.client.videos.retrieve(jobId);
+ async #pollUntilComplete (jobId: string): Promise {
+ // any here because sdk types are wrong https://docs.together.ai/docs/videos-overview -> "Job Status Reference"
+ let job = await (this.#client as any).videos.retrieve(jobId);
const start = Date.now();
while ( job.status === 'queued' || job.status === 'in_progress' ) {
@@ -241,31 +207,30 @@ class TogetherVideoGenerationService extends BaseService {
}
await this.#delay(POLL_INTERVAL_MS);
- job = await this.client.videos.retrieve(jobId);
+ job = await (this.#client as any).videos.retrieve(jobId);
}
return job;
}
- async #delay (ms) {
+ async #delay (ms: number): Promise {
return await new Promise(resolve => setTimeout(resolve, ms));
}
- #determineUsageKey (model) {
- if ( typeof model === 'string' && model.trim() ) {
- return `together-video:${model}`;
- }
- return DEFAULT_USAGE_KEY;
+ async #getModel (requestedModel?: string): Promise {
+ const bareModel = this.#stripTogetherPrefix(requestedModel ?? DEFAULT_MODEL);
+ const allModels = await this.models();
+ return allModels.find(m => m.model?.toLowerCase() === bareModel.toLowerCase());
}
- #stripTogetherPrefix (model) {
+ #stripTogetherPrefix (model: string): string {
if ( typeof model === 'string' && model.startsWith('togetherai:') ) {
return model.slice('togetherai:'.length);
}
return model;
}
- #coercePositiveInteger (value) {
+ #coercePositiveInteger (value: unknown): number | undefined {
if ( typeof value === 'number' && Number.isFinite(value) ) {
const rounded = Math.round(value);
return rounded > 0 ? rounded : undefined;
@@ -277,7 +242,7 @@ class TogetherVideoGenerationService extends BaseService {
return undefined;
}
- #isFiniteNumber (value) {
+ #isFiniteNumber (value: unknown): boolean {
if ( typeof value === 'number' ) {
return Number.isFinite(value);
}
@@ -288,7 +253,3 @@ class TogetherVideoGenerationService extends BaseService {
return false;
}
}
-
-module.exports = {
- TogetherVideoGenerationService,
-};
diff --git a/src/backend/src/services/ai/video/TogetherVideoGenerationService/models.js b/src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/models.ts
similarity index 67%
rename from src/backend/src/services/ai/video/TogetherVideoGenerationService/models.js
rename to src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/models.ts
index 1ea54fe97..4ea7e4417 100644
--- a/src/backend/src/services/ai/video/TogetherVideoGenerationService/models.js
+++ b/src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/models.ts
@@ -1,10 +1,45 @@
-export const TOGETHER_VIDEO_GENERATION_MODELS = [
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import { IVideoModel } from '../types.js';
+
+interface ITogetherVideoModel extends IVideoModel {
+ model: string;
+ organization: string;
+ durationSeconds: number[] | null;
+ dimensions: string[] | null;
+ fps: number[] | null;
+ keyframes: string[] | null;
+ promptLength: { min: number; max: number } | null;
+ promptSupported: boolean | null;
+}
+
+export const TOGETHER_VIDEO_GENERATION_MODELS: ITogetherVideoModel[] = [
{
id: 'togetherai:minimax/video-01-director',
organization: 'MiniMax',
name: 'MiniMax 01 Director',
model: 'minimax/video-01-director',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 28 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: ['1366x768'],
fps: [25],
keyframes: ['first'],
@@ -16,7 +51,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'MiniMax',
name: 'MiniMax Hailuo 02',
model: 'minimax/hailuo-02',
- durationSeconds: 10,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 56 },
+ output_cost_key: 'per-video',
+ durationSeconds: [10],
dimensions: ['1366x768', '1920x1080'],
fps: [25],
keyframes: ['first'],
@@ -28,7 +66,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Google',
name: 'Veo 2.0',
model: 'google/veo-2.0',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 250 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: ['1280x720', '720x1280'],
fps: [24],
keyframes: ['first', 'last'],
@@ -40,7 +81,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Google',
name: 'Veo 3.0',
model: 'google/veo-3.0',
- durationSeconds: 8,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 160 },
+ output_cost_key: 'per-video',
+ durationSeconds: [8],
dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -52,7 +96,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Google',
name: 'Veo 3.0 + Audio',
model: 'google/veo-3.0-audio',
- durationSeconds: 8,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 320 },
+ output_cost_key: 'per-video',
+ durationSeconds: [8],
dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -64,7 +111,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Google',
name: 'Veo 3.0 Fast',
model: 'google/veo-3.0-fast',
- durationSeconds: 8,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 80 },
+ output_cost_key: 'per-video',
+ durationSeconds: [8],
dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -76,7 +126,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Google',
name: 'Veo 3.0 Fast + Audio',
model: 'google/veo-3.0-fast-audio',
- durationSeconds: 8,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 120 },
+ output_cost_key: 'per-video',
+ durationSeconds: [8],
dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -88,7 +141,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'ByteDance',
name: 'Seedance 1.0 Lite',
model: 'ByteDance/Seedance-1.0-lite',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 14 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: [
'864x480',
'736x544',
@@ -111,7 +167,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'ByteDance',
name: 'Seedance 1.0 Pro',
model: 'ByteDance/Seedance-1.0-pro',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 57 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: [
'864x480',
'736x544',
@@ -134,7 +193,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'PixVerse',
name: 'PixVerse v5',
model: 'pixverse/pixverse-v5',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 30 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: [
'640x360',
'480x360',
@@ -167,7 +229,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 2.1 Master',
model: 'kwaivgI/kling-2.1-master',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 92 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -179,7 +244,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 2.1 Standard',
model: 'kwaivgI/kling-2.1-standard',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 18 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -191,7 +259,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 2.1 Pro',
model: 'kwaivgI/kling-2.1-pro',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 32 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [24],
keyframes: ['first', 'last'],
@@ -203,7 +274,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 2.0 Master',
model: 'kwaivgI/kling-2.0-master',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 92 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: ['1280x720', '720x720', '720x1280'],
fps: [24],
keyframes: ['first'],
@@ -215,7 +289,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 1.6 Standard',
model: 'kwaivgI/kling-1.6-standard',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 19 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [30, 24],
keyframes: ['first'],
@@ -227,7 +304,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 1.6 Pro',
model: 'kwaivgI/kling-1.6-pro',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 32 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -239,6 +319,9 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Wan-AI',
name: 'Wan 2.2 I2V',
model: 'Wan-AI/Wan2.2-I2V-A14B',
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 31 },
+ output_cost_key: 'per-video',
durationSeconds: null,
dimensions: null,
fps: null,
@@ -251,6 +334,9 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Wan-AI',
name: 'Wan 2.2 T2V',
model: 'Wan-AI/Wan2.2-T2V-A14B',
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 66 },
+ output_cost_key: 'per-video',
durationSeconds: null,
dimensions: null,
fps: null,
@@ -263,7 +349,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Vidu',
name: 'Vidu 2.0',
model: 'vidu/vidu-2.0',
- durationSeconds: 8,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 28 },
+ output_cost_key: 'per-video',
+ durationSeconds: [8],
dimensions: [
'1920x1080',
'1080x1080',
@@ -285,7 +374,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Vidu',
name: 'Vidu Q1',
model: 'vidu/vidu-q1',
- durationSeconds: 5,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 22 },
+ output_cost_key: 'per-video',
+ durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [24],
keyframes: ['first', 'last'],
@@ -297,7 +389,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'OpenAI',
name: 'Sora 2',
model: 'openai/sora-2',
- durationSeconds: 8,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 80 },
+ output_cost_key: 'per-video',
+ durationSeconds: [8],
dimensions: ['1280x720', '720x1280'],
fps: null,
keyframes: ['first'],
@@ -309,7 +404,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'OpenAI',
name: 'Sora 2 Pro',
model: 'openai/sora-2-pro',
- durationSeconds: 8,
+ costs_currency: 'usd-cents',
+ costs: { 'per-video': 400 },
+ output_cost_key: 'per-video',
+ durationSeconds: [8],
dimensions: ['1280x720', '720x1280'],
fps: null,
keyframes: ['first'],
diff --git a/src/backend/src/services/ai/video/providers/types.ts b/src/backend/src/services/ai/video/providers/types.ts
new file mode 100644
index 000000000..2b378d48e
--- /dev/null
+++ b/src/backend/src/services/ai/video/providers/types.ts
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+export interface IVideoModel {
+ id: string;
+ name: string;
+ puterId?: string;
+ provider?: string;
+ aliases?: string[];
+ description?: string;
+ version?: string;
+ costs_currency?: string;
+ index_cost_key?: string;
+ output_cost_key?: string;
+ costs?: Record;
+ durationSeconds?: number[] | null;
+ dimensions?: string[] | null;
+ defaultUsageKey?: string;
+ organization?: string;
+ model?: string;
+ fps?: number[] | null;
+ keyframes?: string[] | null;
+ promptLength?: { min: number; max: number } | null;
+ promptSupported?: boolean | null;
+}
+
+export interface IGenerateVideoParams {
+ prompt: string;
+ model?: string;
+ provider?: string;
+ test_mode?: boolean;
+ seconds?: number | string;
+ duration?: number | string;
+ size?: string;
+ resolution?: string;
+ width?: number;
+ height?: number;
+ fps?: number;
+ steps?: number;
+ guidance_scale?: number;
+ seed?: number;
+ output_format?: string;
+ output_quality?: number;
+ negative_prompt?: string;
+ reference_images?: string[];
+ frame_images?: object[];
+ metadata?: object;
+ input_reference?: unknown;
+ no_extra_params?: boolean;
+}
+
+export interface IVideoProvider {
+ generate (params: IGenerateVideoParams): Promise;
+ models (): Promise | IVideoModel[];
+ getDefaultModel (): string;
+}
diff --git a/src/backend/src/services/drivers/DriverService.js b/src/backend/src/services/drivers/DriverService.js
index 775f4b9a9..47c3b09b6 100644
--- a/src/backend/src/services/drivers/DriverService.js
+++ b/src/backend/src/services/drivers/DriverService.js
@@ -293,7 +293,7 @@ class DriverService extends BaseService {
'puter-speech2txt': 'openai-speech2txt',
'puter-chat-completion': 'openai-completion',
'puter-image-generation': 'openai-image-generation',
- 'puter-video-generation': 'openai-video-generation',
+ 'puter-video-generation': 'ai-video',
'puter-apps': 'es:app',
'puter-subdomains': 'es:subdomain',
'puter-notifications': 'es:notification',
diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js
index a8712fdd7..f5eadcf06 100644
--- a/src/puter-js/src/modules/AI.js
+++ b/src/puter-js/src/modules/AI.js
@@ -11,16 +11,6 @@ const normalizeTTSProvider = (value) => {
return value;
};
-const TOGETHER_VIDEO_MODEL_PREFIXES = [
- 'minimax/',
- 'google/',
- 'bytedance/',
- 'pixverse/',
- 'kwaivgi/',
- 'vidu/',
- 'wan-ai/',
-];
-
class AI {
/**
* Creates a new instance with the given authentication token, API origin, and app ID,
@@ -908,39 +898,19 @@ class AI {
throw ({ message: 'Prompt parameter is required', code: 'prompt_required' });
}
- if ( ! options.model ) {
- options.model = 'sora-2';
- }
-
if ( options.duration !== undefined && options.seconds === undefined ) {
options.seconds = options.duration;
}
- // This sucks, should be backend's job like we do for chat models now
- let videoService = 'openai-video-generation';
+ if ( options.test_mode === true ) {
+ testMode = true;
+ }
+
+ let videoService = 'ai-video';
const driverHint = typeof options.driver === 'string' ? options.driver : undefined;
- const driverHintLower = driverHint ? driverHint.toLowerCase() : undefined;
- const providerRaw = typeof options.provider === 'string'
- ? options.provider
- : (typeof options.service === 'string' ? options.service : undefined);
- const providerHint = typeof providerRaw === 'string' ? providerRaw.toLowerCase() : undefined;
- const modelLower = typeof options.model === 'string' ? options.model.toLowerCase() : '';
- const looksLikeTogetherVideoModel = typeof options.model === 'string' &&
- (TOGETHER_VIDEO_MODEL_PREFIXES.some(prefix => modelLower.startsWith(prefix)) || options.model.startsWith('togetherai:'));
-
- if ( driverHintLower === 'together' || driverHintLower === 'together-ai' ) {
- videoService = 'together-video-generation';
- } else if ( driverHintLower === 'together-video-generation' ) {
- videoService = 'together-video-generation';
- } else if ( driverHintLower === 'openai' ) {
- videoService = 'openai-video-generation';
- } else if ( driverHint ) {
+ if ( driverHint ) {
videoService = driverHint;
- } else if ( providerHint === 'together' || providerHint === 'together-ai' ) {
- videoService = 'together-video-generation';
- } else if ( looksLikeTogetherVideoModel ) {
- videoService = 'together-video-generation';
}
return await utils.make_driver_method(['prompt'], 'puter-video-generation', videoService, 'generate', {
diff --git a/src/puter-js/types/modules/ai.d.ts b/src/puter-js/types/modules/ai.d.ts
index b997afd07..e97d0995a 100644
--- a/src/puter-js/types/modules/ai.d.ts
+++ b/src/puter-js/types/modules/ai.d.ts
@@ -82,8 +82,10 @@ export interface Txt2ImgOptions {
export interface Txt2VidOptions {
prompt?: string;
provider?: string;
+ driver?: string;
model?: string;
seconds?: number;
+ duration?: number;
test_mode?: boolean;
// OpenAI options