refactor ai video gen (#2711)

* refactor ai video generation into one service

* use old duration and dimension keys

* use costoverride billing like the others
This commit is contained in:
Shruc
2026-03-24 12:42:52 +03:00
committed by GitHub
parent 76f5a26d84
commit bed84cad76
14 changed files with 863 additions and 540 deletions
@@ -28,9 +28,7 @@ import { OpenAISpeechToTextService } from '../../services/ai/stt/OpenAISpeechToT
import { AWSPollyService } from '../../services/ai/tts/AWSPollyService.js';
import { ElevenLabsTTSService } from '../../services/ai/tts/ElevenLabsTTSService.js';
import { OpenAITTSService } from '../../services/ai/tts/OpenAITTSService.js';
import { TogetherVideoGenerationService } from '../../services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js';
import { OpenAIVideoGenerationService } from '../../services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js';
// import { AIVideoGenerationService } from '../../services/ai/video/AIVideoGenerationService.js';
import { AIVideoGenerationService } from '../../services/ai/video/AIVideoGenerationService.js';
/**
* PuterAIModule class extends AdvancedBase to manage and register various AI services.
@@ -59,7 +57,7 @@ export class PuterAIModule extends AdvancedBase {
services.registerService('ai-image', AIImageGenerationService);
// video generation ai service
// services.registerService('ai-video', AIVideoGenerationService);
services.registerService('ai-video', AIVideoGenerationService);
// TODO DS: centralize other service types too
// TODO: services should govern their own availability instead of the module deciding what to register
@@ -83,14 +81,6 @@ export class PuterAIModule extends AdvancedBase {
services.registerService('openai-tts', OpenAITTSService);
services.registerService('openai-speech2txt', OpenAISpeechToTextService);
// TODO DS: move to video service
services.registerService('openai-video-generation', OpenAIVideoGenerationService);
}
if ( config?.services?.['together-ai'] ) {
// TODO DS: move to video service
services.registerService('together-video-generation', TogetherVideoGenerationService);
}
}
}
+4 -28
View File
@@ -160,20 +160,8 @@ class ChatAPIService extends BaseService {
try {
const svc_su = this.services.get('su');
const models = await svc_su.sudo(async () => {
const items = [];
if ( this.services.has('openai-video-generation') ) {
const svc_video = this.services.get('openai-video-generation');
if ( typeof svc_video.models === 'function' ) {
items.push(...await svc_video.models());
}
}
if ( this.services.has('together-video-generation') ) {
const svc_video = this.services.get('together-video-generation');
if ( typeof svc_video.models === 'function' ) {
items.push(...await svc_video.models());
}
}
return items;
const svc_video = this.services.get('ai-video');
return svc_video.models();
});
res.json({ models });
} catch ( error ) {
@@ -190,20 +178,8 @@ class ChatAPIService extends BaseService {
try {
const svc_su = this.services.get('su');
const models = await svc_su.sudo(async () => {
const items = [];
if ( this.services.has('openai-video-generation') ) {
const svc_video = this.services.get('openai-video-generation');
if ( typeof svc_video.models === 'function' ) {
items.push(...(await svc_video.models()).map(model => model.puterId || model.id));
}
}
if ( this.services.has('together-video-generation') ) {
const svc_video = this.services.get('together-video-generation');
if ( typeof svc_video.models === 'function' ) {
items.push(...(await svc_video.models()).map(model => model.id));
}
}
return items;
const svc_video = this.services.get('ai-video');
return svc_video.list();
});
res.json({ models });
} catch ( error ) {
@@ -5,4 +5,5 @@ export const OPENAI_VIDEO_COST_MAP = {
'openai:sora-2:default': toMicroCents(0.10),
'openai:sora-2-pro:default': toMicroCents(0.30),
'openai:sora-2-pro:xl': toMicroCents(0.50),
'openai:sora-2-pro:xxl': toMicroCents(0.70),
};
@@ -0,0 +1,2 @@
*.js
*.js.map
@@ -0,0 +1,288 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import { APIError } from '../../../api/APIError.js';
import { Context } from '../../../util/context.js';
import BaseService from '../../BaseService.js';
import { DriverService } from '../../drivers/DriverService.js';
import { EventService } from '../../EventService.js';
import { MeteringService } from '../../MeteringService/MeteringService.js';
import { OpenAIVideoGenerationProvider } from './providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.js';
import { TogetherVideoGenerationProvider } from './providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.js';
import { IGenerateVideoParams, IVideoModel, IVideoProvider } from './providers/types.js';
export class AIVideoGenerationService extends BaseService {
static SERVICE_NAME = 'ai-video';
static DEFAULT_PROVIDER = 'openai-video-generation';
get meteringService (): MeteringService {
return this.services.get('meteringService').meteringService;
}
get eventService (): EventService {
return this.services.get('event');
}
get driverService (): DriverService {
return this.services.get('driver');
}
getProvider (name: string): IVideoProvider | undefined {
return this.#providers[name];
}
#providers: Record<string, IVideoProvider> = {};
#modelIdMap: Record<string, IVideoModel[]> = {};
static IMPLEMENTS = {
'driver-capabilities': {
supports_test_mode (iface: string, method_name: string) {
return iface === 'puter-video-generation' &&
method_name === 'generate';
},
},
'puter-video-generation': {
async generate (...parameters: Parameters<AIVideoGenerationService['generate']>) {
return (this as unknown as AIVideoGenerationService).generate(...parameters);
},
},
};
getModel ({ modelId, provider }: { modelId: string, provider?: string }) {
const models = this.#modelIdMap[modelId];
if ( ! models ) {
return undefined;
}
if ( provider ) {
const model = models.find(m => m.provider === provider);
return model ?? models[0];
}
// Prefer exact primary ID match over alias matches
const exactIdMatch = models.find(m => m.id === modelId);
if ( exactIdMatch ) {
return exactIdMatch;
}
const exactPuterIdMatch = models.find(m => m.puterId === modelId);
if ( exactPuterIdMatch ) {
return exactPuterIdMatch;
}
return models[0];
}
private async registerProviders () {
const openAiConfig = this.config.providers?.['openai-video-generation'] || this.global_config?.services?.['openai'] || this.global_config?.openai;
if ( openAiConfig && (openAiConfig.apiKey || openAiConfig.secret_key) ) {
this.#providers['openai-video-generation'] = new OpenAIVideoGenerationProvider(
{ apiKey: openAiConfig.apiKey || openAiConfig.secret_key },
this.meteringService,
);
}
const togetherConfig = this.config.providers?.['together-video-generation'] || this.global_config?.services?.['together-ai'];
if ( togetherConfig && (togetherConfig.apiKey || togetherConfig.secret_key) ) {
this.#providers['together-video-generation'] = new TogetherVideoGenerationProvider(
{ apiKey: togetherConfig.apiKey || togetherConfig.secret_key },
this.meteringService,
);
}
// emit event for extensions to add providers
const extensionProviders = {} as Record<string, IVideoProvider>;
await this.eventService.emit('ai.video.registerProviders', extensionProviders);
for ( const providerName in extensionProviders ) {
if ( this.#providers[providerName] ) {
console.warn('AIVideoGenerationService: provider name conflict for ', providerName, ' registering with -extension suffix');
this.#providers[`${providerName}-extension`] = extensionProviders[providerName];
continue;
}
this.#providers[providerName] = extensionProviders[providerName];
}
}
protected async '__on_boot.consolidation' () {
await this.registerProviders();
for ( const providerName in this.#providers ) {
const provider = this.#providers[providerName];
// alias all driver requests to go here to support legacy routing
this.driverService.register_service_alias(
AIVideoGenerationService.SERVICE_NAME,
providerName,
{ iface: 'puter-video-generation' },
);
// build model id map
for ( const model of await provider.models() ) {
model.id = model.id.trim().toLowerCase();
if ( model.puterId ) {
model.puterId = model.puterId.trim().toLowerCase();
}
if ( model.aliases ) {
model.aliases = model.aliases.map(alias => alias.trim().toLowerCase());
}
if ( ! this.#modelIdMap[model.id] ) {
this.#modelIdMap[model.id] = [];
}
this.#modelIdMap[model.id].push({ ...model, provider: providerName });
if ( model.puterId ) {
if ( model.aliases ) {
model.aliases.push(model.puterId);
} else {
model.aliases = [model.puterId];
}
}
if ( model.aliases ) {
for ( let alias of model.aliases ) {
alias = alias.trim().toLowerCase();
if ( ! this.#modelIdMap[alias] ) {
this.#modelIdMap[alias] = this.#modelIdMap[model.id];
continue;
}
if ( this.#modelIdMap[alias] !== this.#modelIdMap[model.id] ) {
this.#modelIdMap[alias].push({ ...model, provider: providerName });
this.#modelIdMap[model.id] = this.#modelIdMap[alias];
continue;
}
}
}
this.#modelIdMap[model.id].sort((a, b) => {
const aCostKey = a.index_cost_key || a.output_cost_key || Object.keys(a.costs || {})[0];
const bCostKey = b.index_cost_key || b.output_cost_key || Object.keys(b.costs || {})[0];
const aCost = a.costs?.[aCostKey] ?? Infinity;
const bCost = b.costs?.[bCostKey] ?? Infinity;
return aCost - bCost;
});
}
}
}
models () {
const seen = new Set<string>();
return Object.entries(this.#modelIdMap)
.map(([_, models]) => models)
.flat()
.filter(model => {
const identity = `${model.provider}:${model.puterId || model.id}`;
if ( seen.has(identity) ) {
return false;
}
seen.add(identity);
return true;
})
.sort((a, b) => {
if ( a.provider === b.provider ) {
return a.id.localeCompare(b.id);
}
return a.provider!.localeCompare(b.provider!);
});
}
list () {
return this.models().map(m => (m.puterId || m.id)).sort();
}
async generate (parameters: IGenerateVideoParams) {
const clientDriverCall = Context.get('client_driver_call');
let { test_mode: testMode, intended_service: legacyProviderName } = clientDriverCall as { test_mode?: boolean; response_metadata: Record<string, unknown>; intended_service?: string };
if ( parameters.model ) {
parameters.model = parameters.model.trim().toLowerCase();
}
const configuredProviders = Object.keys(this.#providers);
if ( configuredProviders.length === 0 ) {
throw new Error('no video generation providers configured');
}
let intendedProvider = (parameters.provider || (legacyProviderName === AIVideoGenerationService.SERVICE_NAME ? '' : legacyProviderName)) ?? '';
if ( !parameters.model && !intendedProvider ) {
intendedProvider = configuredProviders.includes(AIVideoGenerationService.DEFAULT_PROVIDER)
? AIVideoGenerationService.DEFAULT_PROVIDER
: configuredProviders[0];
}
if ( intendedProvider && !this.#providers[intendedProvider] ) {
intendedProvider = configuredProviders[0];
}
if ( !parameters.model && intendedProvider ) {
parameters.model = this.#providers[intendedProvider].getDefaultModel();
}
const model = parameters.model ? this.getModel({ modelId: parameters.model, provider: intendedProvider }) : undefined;
if ( ! model ) {
const availableModelsUrl = `${this.global_config.origin}/puterai/video/models`;
throw APIError.create('field_invalid', undefined, {
key: 'model',
expected: `a valid model name from ${availableModelsUrl}`,
got: parameters.model,
});
}
const provider = this.#providers[model.provider!];
if ( ! provider ) {
throw new Error(`no provider found for model ${model.id}`);
}
if ( model.durationSeconds?.length ) {
const requestedSeconds = parameters.seconds ?? parameters.duration;
const normalizedSeconds = typeof requestedSeconds === 'string'
? Number.parseInt(requestedSeconds, 10)
: requestedSeconds;
const validSeconds = model.durationSeconds.includes(Number(normalizedSeconds))
? normalizedSeconds
: model.durationSeconds[0];
parameters.seconds = validSeconds;
parameters.duration = validSeconds;
}
if ( model.dimensions?.length ) {
const requestedResolution = typeof parameters.size === 'string' && parameters.size.trim()
? parameters.size
: typeof parameters.resolution === 'string' && parameters.resolution.trim()
? parameters.resolution
: undefined;
const normalizedResolution = requestedResolution && model.dimensions.includes(requestedResolution)
? requestedResolution
: model.dimensions[0];
parameters.size = normalizedResolution;
parameters.resolution = normalizedResolution;
}
return await provider.generate({
...parameters,
model: model.id,
provider: model.provider,
test_mode: testMode,
});
}
}
@@ -1,342 +0,0 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const APIError = require('../../../../api/APIError');
const BaseService = require('../../../BaseService');
const { TypedValue } = require('../../../drivers/meta/Runtime');
const { Context } = require('../../../../util/context');
const { Readable } = require('stream');
const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4';
const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
const POLL_INTERVAL_MS = 5_000;
const DEFAULT_DURATION_SECONDS = 4;
const DEFAULT_SIZE = '720x1280';
const ALLOWED_SIZES = new Set(['720x1280', '1280x720', '1024x1792', '1792x1024']);
const ALLOWED_SECONDS = new Set(['4', '8', '12']);
const OPENAI_VIDEO_MODELS = [
{
puterId: 'openai:openai/sora-2',
id: 'sora-2',
aliases: ['openai/sora-2'],
defaultUsageKey: 'openai:sora-2:default',
},
{
puterId: 'openai:openai/sora-2-pro',
id: 'sora-2-pro',
aliases: ['openai/sora-2-pro'],
defaultUsageKey: 'openai:sora-2-pro:default',
},
];
class OpenAIVideoGenerationService extends BaseService {
/** @type {import('../../../MeteringService/MeteringService').MeteringService} */
get meteringService () {
return this.services.get('meteringService').meteringService;
}
static MODULES = {
openai: require('openai'),
};
_construct () {
this.models_ = Object.fromEntries(OPENAI_VIDEO_MODELS.map(model => [
model.id,
{ defaultUsageKey: model.defaultUsageKey },
]));
}
async _init () {
let apiKey =
this.config?.services?.openai?.apiKey ??
this.global_config?.services?.openai?.apiKey;
if ( ! apiKey ) {
apiKey =
this.config?.openai?.secret_key ??
this.global_config.openai?.secret_key;
console.warn('The `openai.secret_key` configuration format is deprecated. ' +
'Please use `services.openai.apiKey` instead.');
}
this.openai = new this.modules.openai.OpenAI({
apiKey,
});
}
static IMPLEMENTS = {
'driver-capabilities': {
supports_test_mode (iface, method_name) {
return iface === 'puter-video-generation' &&
method_name === 'generate';
},
},
'puter-video-generation': {
async generate (params) {
return await this.generateVideo(params);
},
},
};
async models () {
// Import cost map dynamically
const costMapModule = await import('../../../MeteringService/costMaps/openaiVideoCostMap.ts');
const OPENAI_VIDEO_COST_MAP = costMapModule.OPENAI_VIDEO_COST_MAP;
// Convert microcents to cents (divide by 1,000,000)
const microCentsToCents = (microCents) => microCents / 1_000_000;
return OPENAI_VIDEO_MODELS.map(model => {
const result = { ...model };
// Get cost for default usage key
const defaultCostMicroCents = OPENAI_VIDEO_COST_MAP[model.defaultUsageKey];
if ( defaultCostMicroCents !== undefined ) {
const perSecondCost = microCentsToCents(defaultCostMicroCents);
result.costs_currency = 'usd-cents';
result.costs = {
'per-second': perSecondCost,
'default-duration-per-video': perSecondCost * DEFAULT_DURATION_SECONDS,
};
result.output_cost_key = 'default-duration-per-video';
}
// Add cost for xl variant if it exists (sora-2-pro only)
if ( model.id === 'sora-2-pro' ) {
const xlCostMicroCents = OPENAI_VIDEO_COST_MAP['openai:sora-2-pro:xl'];
if ( xlCostMicroCents !== undefined ) {
if ( ! result.costs ) {
result.costs = {};
result.costs_currency = 'usd-cents';
}
const perSecondXlCost = microCentsToCents(xlCostMicroCents);
result.costs['per-second-xl'] = perSecondXlCost;
result.costs['default-duration-per-video-xl'] = perSecondXlCost * DEFAULT_DURATION_SECONDS;
}
}
return result;
});
}
async generateVideo (params) {
const {
prompt,
model: requestedModel,
duration,
seconds,
size,
resolution,
input_reference: inputReference,
test_mode: testMode,
} = params ?? {};
if ( typeof prompt !== 'string' || !prompt.trim() ) {
throw APIError.create('field_invalid', null, {
key: 'prompt',
expected: 'a non-empty string',
got: prompt,
});
}
const resolvedModel = OPENAI_VIDEO_MODELS.find(entry =>
entry.id === requestedModel ||
entry.puterId === requestedModel ||
(entry.aliases || []).includes(requestedModel))?.id;
const model = resolvedModel ?? requestedModel ?? 'sora-2';
const modelConfig = this.models_[model];
if ( ! modelConfig ) {
throw APIError.create('field_invalid', null, {
key: 'model',
expected: `one of: ${ Object.keys(this.models_).join(', ')}`,
got: model,
});
}
if ( testMode ) {
return new TypedValue({
$: 'string:url:web',
content_type: 'video',
}, DEFAULT_TEST_VIDEO_URL);
}
const normalizedSize = this.#normalizeSize(size ?? resolution) ?? DEFAULT_SIZE;
const normalizedSeconds = this.#normalizeSeconds(seconds ?? duration) ?? '4';
const usageKey = this.#determineUsageKey(model, normalizedSize);
if ( ! usageKey ) {
throw new Error(`Unsupported pricing tier for model ${model}`);
}
const estimatedUnits = this.#parseSeconds(normalizedSeconds) ?? DEFAULT_DURATION_SECONDS;
const actor = Context.get('actor');
const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, estimatedUnits);
if ( ! usageAllowed ) {
throw APIError.create('insufficient_funds');
}
const createParams = {
model,
prompt,
seconds: normalizedSeconds,
size: normalizedSize,
};
if ( inputReference ) {
createParams.input_reference = inputReference;
}
const createResponse = await this.openai.videos.create(createParams);
const finalJob = await this.#pollUntilComplete(createResponse);
if ( finalJob.status === 'failed' ) {
const errorMessage = finalJob.error?.message ?? 'Video generation failed';
throw new Error(errorMessage);
}
const finalResolution = this.#normalizeSize(finalJob.size) ?? normalizedSize;
const finalUsageKey = this.#determineUsageKey(model, finalResolution);
if ( ! finalUsageKey ) {
throw new Error(`Unsupported pricing tier for model ${model}`);
}
const actualSeconds = this.#parseSeconds(finalJob.seconds) ?? estimatedUnits;
const downloadResponse = await this.openai.videos.downloadContent(finalJob.id);
const contentType = downloadResponse.headers.get('content-type') ?? 'video/mp4';
let stream = downloadResponse.body;
if ( stream && typeof stream.getReader === 'function' ) {
stream = Readable.fromWeb(stream);
}
if ( ! stream ) {
const arrayBuffer = await downloadResponse.arrayBuffer();
stream = Readable.from(Buffer.from(arrayBuffer));
}
this.meteringService.incrementUsage(actor, finalUsageKey, actualSeconds);
return new TypedValue({
$: 'stream',
content_type: contentType,
}, stream);
}
async #pollUntilComplete (initialJob) {
let job = initialJob;
const start = Date.now();
while ( job.status === 'queued' || job.status === 'in_progress' ) {
if ( Date.now() - start > DEFAULT_TIMEOUT_MS ) {
throw new Error('Timed out waiting for Sora video generation to complete');
}
await this.#delay(POLL_INTERVAL_MS);
job = await this.openai.videos.retrieve(job.id);
}
return job;
}
async #delay (ms) {
return await new Promise(resolve => setTimeout(resolve, ms));
}
#normalizeSize (candidate) {
if ( ! candidate ) return undefined;
const normalized = this.#normalizeResolution(candidate);
if ( normalized && ALLOWED_SIZES.has(normalized) ) {
return normalized;
}
return undefined;
}
#normalizeSeconds (value) {
if ( value === null || value === undefined ) {
return undefined;
}
if ( typeof value === 'number' && Number.isFinite(value) ) {
const rounded = String(Math.round(value));
return ALLOWED_SECONDS.has(rounded) ? rounded : undefined;
}
if ( typeof value === 'string' ) {
const trimmed = value.trim();
if ( ALLOWED_SECONDS.has(trimmed) ) {
return trimmed;
}
const numeric = Number.parseInt(trimmed, 10);
if ( Number.isFinite(numeric) ) {
const normalized = String(numeric);
return ALLOWED_SECONDS.has(normalized) ? normalized : undefined;
}
}
return undefined;
}
#determineUsageKey (model, normalizedSize) {
const config = this.models_[model];
if ( ! config ) return null;
if ( model === 'sora-2-pro' && normalizedSize === '1792x1024' ) {
return 'openai:sora-2-pro:xl';
}
return config.defaultUsageKey;
}
#normalizeResolution (value) {
if ( ! value ) return undefined;
if ( typeof value === 'string' ) {
const match = value.match(/(\\d+)\\s*x\\s*(\\d+)/i);
if ( match ) {
const width = Number.parseInt(match[1], 10);
const height = Number.parseInt(match[2], 10);
if ( Number.isFinite(width) && Number.isFinite(height) ) {
const larger = Math.max(width, height);
const smaller = Math.min(width, height);
return `${larger}x${smaller}`;
}
}
}
return undefined;
}
#parseSeconds (value) {
if ( value === null || value === undefined ) return undefined;
if ( typeof value === 'number' && Number.isFinite(value) ) {
return value;
}
if ( typeof value === 'string' ) {
const numeric = Number.parseInt(value, 10);
if ( Number.isFinite(numeric) ) {
return numeric;
}
}
return undefined;
}
}
module.exports = {
OpenAIVideoGenerationService,
};
@@ -0,0 +1,246 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import OpenAI from 'openai';
import APIError from '../../../../../api/APIError.js';
import { Context } from '../../../../../util/context.js';
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
import { IGenerateVideoParams, IVideoModel, IVideoProvider } from '../types.js';
import { TypedValue } from '../../../../drivers/meta/Runtime.js';
import { Readable } from 'stream';
import { OPENAI_VIDEO_MODELS, OPENAI_VIDEO_ALLOWED_SECONDS } from './models.js';
const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4';
const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000;
const POLL_INTERVAL_MS = 5_000;
const DEFAULT_DURATION_SECONDS = 4;
export class OpenAIVideoGenerationProvider implements IVideoProvider {
#openai: OpenAI;
#meteringService: MeteringService;
constructor (config: { apiKey: string }, meteringService: MeteringService) {
if ( ! config.apiKey ) {
throw new Error('OpenAI video generation requires an API key');
}
this.#openai = new OpenAI({ apiKey: config.apiKey });
this.#meteringService = meteringService;
}
getDefaultModel (): string {
return OPENAI_VIDEO_MODELS[0].id;
}
async models (): Promise<IVideoModel[]> {
return OPENAI_VIDEO_MODELS;
}
async generate (params: IGenerateVideoParams): Promise<unknown> {
const {
prompt,
model: requestedModel,
duration,
seconds,
size,
resolution,
input_reference: inputReference,
test_mode: testMode,
} = params ?? {};
if ( typeof prompt !== 'string' || !prompt.trim() ) {
throw APIError.create('field_invalid', null, {
key: 'prompt',
expected: 'a non-empty string',
got: prompt,
});
}
const selectedModel = await this.#selectModel(requestedModel);
if ( ! selectedModel ) {
throw new Error(`Unknown video model: ${requestedModel}`);
}
if ( testMode ) {
return new TypedValue({
$: 'string:url:web',
content_type: 'video',
}, DEFAULT_TEST_VIDEO_URL);
}
const defaultSize = selectedModel.dimensions?.[0] ?? '720x1280';
const normalizedSize = this.#normalizeSize(size ?? resolution, selectedModel) ?? defaultSize;
const normalizedSeconds = this.#normalizeSeconds(seconds ?? duration) ?? String(DEFAULT_DURATION_SECONDS);
const sizeTier = this.#determineSizeTier(selectedModel, normalizedSize);
const costPerSecondCents = this.#getCostPerSecond(selectedModel, sizeTier);
if ( ! costPerSecondCents ) {
throw new Error(`No pricing configured for model ${selectedModel.id} at size ${normalizedSize}`);
}
const estimatedUnits = this.#parseSeconds(normalizedSeconds) ?? DEFAULT_DURATION_SECONDS;
const actor = Context.get('actor');
const costInMicroCents = costPerSecondCents * 1_000_000;
const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents * estimatedUnits);
if ( ! usageAllowed ) {
throw APIError.create('insufficient_funds');
}
const createParams: OpenAI.VideoCreateParams = {
prompt,
model: selectedModel.id,
seconds: normalizedSeconds as OpenAI.VideoSeconds,
size: normalizedSize as OpenAI.VideoSize,
};
if ( inputReference ) {
createParams.input_reference = inputReference as OpenAI.VideoCreateParams['input_reference'];
}
const createResponse = await this.#openai.videos.create(createParams);
const finalJob = await this.#pollUntilComplete(createResponse);
if ( finalJob.status === 'failed' ) {
const errorMessage = finalJob.error?.message ?? 'Video generation failed';
throw new Error(errorMessage);
}
const finalResolution = this.#normalizeSize(finalJob.size, selectedModel) ?? normalizedSize;
const finalTier = this.#determineSizeTier(selectedModel, finalResolution);
const finalCostPerSecondCents = this.#getCostPerSecond(selectedModel, finalTier);
if ( ! finalCostPerSecondCents ) {
throw new Error(`No pricing configured for model ${selectedModel.id} at size ${finalResolution}`);
}
const finalCostInMicroCents = finalCostPerSecondCents * 1_000_000;
const actualSeconds = this.#parseSeconds(finalJob.seconds) ?? estimatedUnits;
const downloadResponse = await this.#openai.videos.downloadContent(finalJob.id);
const contentType = downloadResponse.headers.get('content-type') ?? 'video/mp4';
let stream: any = downloadResponse.body;
if ( stream && typeof stream.getReader === 'function' ) {
stream = Readable.fromWeb(stream as any);
}
if ( ! stream ) {
const arrayBuffer = await downloadResponse.arrayBuffer();
stream = Readable.from(Buffer.from(arrayBuffer));
}
const finalUsageKey = this.#getUsageKey(selectedModel, finalTier);
await this.#meteringService.incrementUsage(actor, finalUsageKey, actualSeconds, finalCostInMicroCents * actualSeconds);
return new TypedValue({
$: 'stream',
content_type: contentType,
}, stream);
}
async #selectModel (requestedModel?: string): Promise<IVideoModel | undefined> {
const allModels = await this.models();
return allModels.find(m => m.id.toLowerCase() === requestedModel?.toLowerCase());
}
async #pollUntilComplete (initialJob: OpenAI.Video): Promise<OpenAI.Video> {
let job = initialJob;
const start = Date.now();
while ( job.status === 'queued' || job.status === 'in_progress' ) {
if ( Date.now() - start > DEFAULT_TIMEOUT_MS ) {
throw new Error('Timed out waiting for Sora video generation to complete');
}
await this.#delay(POLL_INTERVAL_MS);
job = await this.#openai.videos.retrieve(job.id);
}
return job;
}
async #delay (ms: number): Promise<void> {
return await new Promise(resolve => setTimeout(resolve, ms));
}
#normalizeSize (candidate: unknown, model: IVideoModel): string | undefined {
if ( ! candidate ) return undefined;
const normalized = this.#normalizeResolution(candidate);
if ( normalized && model.dimensions?.includes(normalized) ) {
return normalized;
}
return undefined;
}
#normalizeSeconds (value: unknown): string | undefined {
if ( value === null || value === undefined ) {
return undefined;
}
const parsed = typeof value === 'number' ? String(Math.round(value)) : typeof value === 'string' ? value.trim() : undefined;
if ( parsed && OPENAI_VIDEO_ALLOWED_SECONDS.includes(Number(parsed) as typeof OPENAI_VIDEO_ALLOWED_SECONDS[number]) ) {
return parsed;
}
return undefined;
}
#determineSizeTier (model: IVideoModel, size: string): string {
if ( model.id === 'sora-2-pro' ) {
if ( size === '1080x1920' || size === '1920x1080' ) return 'xxl';
if ( size === '1024x1792' || size === '1792x1024' ) return 'xl';
}
return 'default';
}
#getCostPerSecond (model: IVideoModel, tier: string): number | undefined {
const key = tier === 'default' ? 'per-second' : `per-second-${tier}`;
return model.costs?.[key];
}
#getUsageKey (model: IVideoModel, tier: string): string {
return `openai:${model.id}:${tier}`;
}
#normalizeResolution (value: unknown): string | undefined {
if ( ! value ) return undefined;
if ( typeof value === 'string' ) {
const match = value.match(/(\d+)\s*x\s*(\d+)/i);
if ( match ) {
const w = Number.parseInt(match[1], 10);
const h = Number.parseInt(match[2], 10);
if ( Number.isFinite(w) && Number.isFinite(h) ) {
return `${w}x${h}`;
}
}
}
return undefined;
}
#parseSeconds (value: unknown): number | undefined {
if ( value === null || value === undefined ) return undefined;
if ( typeof value === 'number' && Number.isFinite(value) ) {
return Math.round(value);
}
if ( typeof value === 'string' ) {
const numeric = Number.parseInt(value, 10);
return Number.isFinite(numeric) ? numeric : undefined;
}
return undefined;
}
}
@@ -0,0 +1,59 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import { IVideoModel } from '../types.js';
export const OPENAI_VIDEO_ALLOWED_SECONDS = [4, 8, 12] as const;
export const OPENAI_VIDEO_MODELS: IVideoModel[] = [
{
id: 'sora-2',
puterId: 'openai:openai/sora-2',
aliases: ['openai/sora-2'],
name: 'Sora 2',
costs_currency: 'usd-cents',
costs: {
'per-second': 10,
'default-duration-per-video': 40,
},
output_cost_key: 'default-duration-per-video',
durationSeconds: OPENAI_VIDEO_ALLOWED_SECONDS.slice(),
dimensions: ['720x1280', '1280x720'],
defaultUsageKey: 'openai:sora-2:default',
},
{
id: 'sora-2-pro',
puterId: 'openai:openai/sora-2-pro',
aliases: ['openai/sora-2-pro'],
name: 'Sora 2 Pro',
costs_currency: 'usd-cents',
costs: {
'per-second': 30,
'default-duration-per-video': 120,
'per-second-xl': 50,
'default-duration-per-video-xl': 200,
'per-second-xxl': 70,
'default-duration-per-video-xxl': 280,
},
output_cost_key: 'default-duration-per-video',
durationSeconds: OPENAI_VIDEO_ALLOWED_SECONDS.slice(),
dimensions: ['720x1280', '1280x720', '1024x1792', '1792x1024', '1080x1920', '1920x1080'],
defaultUsageKey: 'openai:sora-2-pro:default',
},
];
@@ -17,56 +17,50 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const APIError = require('../../../../api/APIError');
const BaseService = require('../../../BaseService');
const { TypedValue } = require('../../../drivers/meta/Runtime');
const { Context } = require('../../../../util/context');
const { Together } = require('together-ai');
import { Together } from 'together-ai';
import APIError from '../../../../../api/APIError.js';
import { Context } from '../../../../../util/context.js';
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
import { IGenerateVideoParams, IVideoModel, IVideoProvider } from '../types.js';
import { TypedValue } from '../../../../drivers/meta/Runtime.js';
import { TOGETHER_VIDEO_GENERATION_MODELS } from './models.js';
const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4';
const POLL_INTERVAL_MS = 5_000;
const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000;
const DEFAULT_MODEL = 'minimax/video-01-director';
const DEFAULT_DURATION_SECONDS = 6;
const DEFAULT_USAGE_KEY = 'together-video:default';
let models = [];
export class TogetherVideoGenerationProvider implements IVideoProvider {
#client: Together;
#meteringService: MeteringService;
class TogetherVideoGenerationService extends BaseService {
/** @type {import('../../../MeteringService/MeteringService').MeteringService} */
get meteringService () {
return this.services.get('meteringService').meteringService;
}
static MODULES = {};
async _init () {
const apiKey =
this.config?.apiKey ??
this.global_config?.services?.['together-ai']?.apiKey;
if ( ! apiKey ) {
constructor (config: { apiKey: string }, meteringService: MeteringService) {
if ( ! config.apiKey ) {
throw new Error('Together AI video generation requires an API key');
}
this.client = new Together({ apiKey });
this.#client = new Together({ apiKey: config.apiKey });
this.#meteringService = meteringService;
}
static IMPLEMENTS = {
'driver-capabilities': {
supports_test_mode (iface, method_name) {
return iface === 'puter-video-generation' &&
method_name === 'generate';
},
},
'puter-video-generation': {
async generate (params) {
return await this.generateVideo(params);
},
},
};
getDefaultModel (): string {
return 'togetherai:minimax/video-01-director';
}
async generateVideo (params) {
async models (): Promise<IVideoModel[]> {
return TOGETHER_VIDEO_GENERATION_MODELS.map((model) => ({
...model,
aliases: [model.model],
durationSeconds: model.durationSeconds ?? undefined,
dimensions: model.dimensions ?? undefined,
fps: model.fps ?? undefined,
keyframes: model.keyframes ?? undefined,
promptLength: model.promptLength ?? undefined,
promptSupported: model.promptSupported ?? undefined,
}));
}
async generate (params: IGenerateVideoParams): Promise<unknown> {
const {
prompt,
model: requestedModel,
@@ -97,6 +91,7 @@ class TogetherVideoGenerationService extends BaseService {
}
const model = this.#stripTogetherPrefix(requestedModel ?? DEFAULT_MODEL);
const selectedModel = await this.#getModel(requestedModel);
if ( testMode ) {
return new TypedValue({
@@ -105,10 +100,15 @@ class TogetherVideoGenerationService extends BaseService {
}, DEFAULT_TEST_VIDEO_URL);
}
const costPerVideoCents = selectedModel?.costs?.['per-video'];
if ( ! costPerVideoCents ) {
throw new Error(`No pricing configured for video model ${model}`);
}
const costInMicroCents = costPerVideoCents * 1_000_000;
let normalizedSeconds = this.#coercePositiveInteger(seconds ?? duration);
if ( ! no_extra_params )
{
if ( ! no_extra_params ) {
normalizedSeconds ??= DEFAULT_DURATION_SECONDS;
}
@@ -117,21 +117,18 @@ class TogetherVideoGenerationService extends BaseService {
throw new Error('actor not found in context');
}
const estimatedUsageUnits = 1; // Together video billing is per generated video
const usageKey = this.#determineUsageKey(model);
const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, estimatedUsageUnits);
const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents);
if ( ! usageAllowed ) {
throw APIError.create('insufficient_funds');
}
const createPayload = {
const createPayload: Together.VideoCreateParams & { metadata?: object } = {
prompt,
model,
};
if ( normalizedSeconds ) {
createPayload.seconds = normalizedSeconds;
createPayload.seconds = String(normalizedSeconds);
}
if ( this.#isFiniteNumber(width) ) {
createPayload.width = Number(width);
@@ -152,7 +149,7 @@ class TogetherVideoGenerationService extends BaseService {
createPayload.seed = Number(seed);
}
if ( typeof outputFormat === 'string' && outputFormat.trim() ) {
createPayload.output_format = outputFormat.trim();
createPayload.output_format = outputFormat.trim() as Together.VideoCreateParams['output_format'];
}
if ( this.#isFiniteNumber(outputQuality) ) {
createPayload.output_quality = Number(outputQuality);
@@ -161,16 +158,16 @@ class TogetherVideoGenerationService extends BaseService {
createPayload.negative_prompt = negativePrompt;
}
if ( Array.isArray(referenceImages) && referenceImages.length > 0 ) {
createPayload.reference_images = referenceImages.filter(item => typeof item === 'string' && item.trim().length > 0);
createPayload.reference_images = referenceImages.filter((item: string) => typeof item === 'string' && item.trim().length > 0);
}
if ( Array.isArray(frameImages) && frameImages.length > 0 ) {
createPayload.frame_images = frameImages.filter(frame => frame && typeof frame === 'object');
createPayload.frame_images = frameImages.filter((frame: any) => frame && typeof frame === 'object' && typeof frame.input_image === 'string') as Together.VideoCreateParams['frame_images'];
}
if ( metadata && typeof metadata === 'object' ) {
createPayload.metadata = metadata;
}
const job = await this.client.videos.create(createPayload);
const job = await this.#client.videos.create(createPayload);
const finalJob = await this.#pollUntilComplete(job.id);
if ( finalJob.status === 'failed' ) {
@@ -185,7 +182,8 @@ class TogetherVideoGenerationService extends BaseService {
throw new Error('Video generation was cancelled');
}
this.meteringService.incrementUsage(actor, usageKey, 1);
const usageKey = `together-video:${model}`;
await this.#meteringService.incrementUsage(actor, usageKey, 1, costInMicroCents);
const videoUrl = finalJob?.outputs?.video_url;
if ( typeof videoUrl === 'string' && videoUrl.trim() ) {
@@ -198,41 +196,9 @@ class TogetherVideoGenerationService extends BaseService {
throw new Error('Together AI response did not include a video URL');
}
async models () {
if ( models.length > 0 && models[0].costs_currency ) {
return models;
}
const { TOGETHER_VIDEO_GENERATION_MODELS } = await import('./models.js');
const costMapModule = await import('../../../MeteringService/costMaps/togetherCostMap.ts');
const TOGETHER_COST_MAP = costMapModule.TOGETHER_COST_MAP;
// Convert microcents to cents (divide by 1,000,000)
const microCentsToCents = (microCents) => microCents / 1_000_000;
models = TOGETHER_VIDEO_GENERATION_MODELS.map(model => {
const result = { ...model };
// Convert model ID from 'togetherai:google/veo-3.0' to cost key 'together-video:google/veo-3.0'
const costKey = model.id.replace('togetherai:', 'together-video:');
const costMicroCents = TOGETHER_COST_MAP[costKey];
if ( costMicroCents !== undefined && costMicroCents > 0 ) {
result.costs_currency = 'usd-cents';
result.costs = {
'per-video': microCentsToCents(costMicroCents),
};
result.output_cost_key = 'per-video';
}
return result;
});
return models;
}
async #pollUntilComplete (jobId) {
let job = await this.client.videos.retrieve(jobId);
async #pollUntilComplete (jobId: string): Promise<any> {
// any here because sdk types are wrong https://docs.together.ai/docs/videos-overview -> "Job Status Reference"
let job = await (this.#client as any).videos.retrieve(jobId);
const start = Date.now();
while ( job.status === 'queued' || job.status === 'in_progress' ) {
@@ -241,31 +207,30 @@ class TogetherVideoGenerationService extends BaseService {
}
await this.#delay(POLL_INTERVAL_MS);
job = await this.client.videos.retrieve(jobId);
job = await (this.#client as any).videos.retrieve(jobId);
}
return job;
}
async #delay (ms) {
async #delay (ms: number): Promise<void> {
return await new Promise(resolve => setTimeout(resolve, ms));
}
#determineUsageKey (model) {
if ( typeof model === 'string' && model.trim() ) {
return `together-video:${model}`;
}
return DEFAULT_USAGE_KEY;
async #getModel (requestedModel?: string): Promise<IVideoModel | undefined> {
const bareModel = this.#stripTogetherPrefix(requestedModel ?? DEFAULT_MODEL);
const allModels = await this.models();
return allModels.find(m => m.model?.toLowerCase() === bareModel.toLowerCase());
}
#stripTogetherPrefix (model) {
#stripTogetherPrefix (model: string): string {
if ( typeof model === 'string' && model.startsWith('togetherai:') ) {
return model.slice('togetherai:'.length);
}
return model;
}
#coercePositiveInteger (value) {
#coercePositiveInteger (value: unknown): number | undefined {
if ( typeof value === 'number' && Number.isFinite(value) ) {
const rounded = Math.round(value);
return rounded > 0 ? rounded : undefined;
@@ -277,7 +242,7 @@ class TogetherVideoGenerationService extends BaseService {
return undefined;
}
#isFiniteNumber (value) {
#isFiniteNumber (value: unknown): boolean {
if ( typeof value === 'number' ) {
return Number.isFinite(value);
}
@@ -288,7 +253,3 @@ class TogetherVideoGenerationService extends BaseService {
return false;
}
}
module.exports = {
TogetherVideoGenerationService,
};
@@ -1,10 +1,45 @@
export const TOGETHER_VIDEO_GENERATION_MODELS = [
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import { IVideoModel } from '../types.js';
interface ITogetherVideoModel extends IVideoModel {
model: string;
organization: string;
durationSeconds: number[] | null;
dimensions: string[] | null;
fps: number[] | null;
keyframes: string[] | null;
promptLength: { min: number; max: number } | null;
promptSupported: boolean | null;
}
export const TOGETHER_VIDEO_GENERATION_MODELS: ITogetherVideoModel[] = [
{
id: 'togetherai:minimax/video-01-director',
organization: 'MiniMax',
name: 'MiniMax 01 Director',
model: 'minimax/video-01-director',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 28 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: ['1366x768'],
fps: [25],
keyframes: ['first'],
@@ -16,7 +51,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'MiniMax',
name: 'MiniMax Hailuo 02',
model: 'minimax/hailuo-02',
durationSeconds: 10,
costs_currency: 'usd-cents',
costs: { 'per-video': 56 },
output_cost_key: 'per-video',
durationSeconds: [10],
dimensions: ['1366x768', '1920x1080'],
fps: [25],
keyframes: ['first'],
@@ -28,7 +66,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Google',
name: 'Veo 2.0',
model: 'google/veo-2.0',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 250 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: ['1280x720', '720x1280'],
fps: [24],
keyframes: ['first', 'last'],
@@ -40,7 +81,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Google',
name: 'Veo 3.0',
model: 'google/veo-3.0',
durationSeconds: 8,
costs_currency: 'usd-cents',
costs: { 'per-video': 160 },
output_cost_key: 'per-video',
durationSeconds: [8],
dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -52,7 +96,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Google',
name: 'Veo 3.0 + Audio',
model: 'google/veo-3.0-audio',
durationSeconds: 8,
costs_currency: 'usd-cents',
costs: { 'per-video': 320 },
output_cost_key: 'per-video',
durationSeconds: [8],
dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -64,7 +111,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Google',
name: 'Veo 3.0 Fast',
model: 'google/veo-3.0-fast',
durationSeconds: 8,
costs_currency: 'usd-cents',
costs: { 'per-video': 80 },
output_cost_key: 'per-video',
durationSeconds: [8],
dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -76,7 +126,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Google',
name: 'Veo 3.0 Fast + Audio',
model: 'google/veo-3.0-fast-audio',
durationSeconds: 8,
costs_currency: 'usd-cents',
costs: { 'per-video': 120 },
output_cost_key: 'per-video',
durationSeconds: [8],
dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -88,7 +141,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'ByteDance',
name: 'Seedance 1.0 Lite',
model: 'ByteDance/Seedance-1.0-lite',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 14 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: [
'864x480',
'736x544',
@@ -111,7 +167,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'ByteDance',
name: 'Seedance 1.0 Pro',
model: 'ByteDance/Seedance-1.0-pro',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 57 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: [
'864x480',
'736x544',
@@ -134,7 +193,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'PixVerse',
name: 'PixVerse v5',
model: 'pixverse/pixverse-v5',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 30 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: [
'640x360',
'480x360',
@@ -167,7 +229,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 2.1 Master',
model: 'kwaivgI/kling-2.1-master',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 92 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -179,7 +244,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 2.1 Standard',
model: 'kwaivgI/kling-2.1-standard',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 18 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -191,7 +259,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 2.1 Pro',
model: 'kwaivgI/kling-2.1-pro',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 32 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [24],
keyframes: ['first', 'last'],
@@ -203,7 +274,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 2.0 Master',
model: 'kwaivgI/kling-2.0-master',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 92 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: ['1280x720', '720x720', '720x1280'],
fps: [24],
keyframes: ['first'],
@@ -215,7 +289,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 1.6 Standard',
model: 'kwaivgI/kling-1.6-standard',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 19 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [30, 24],
keyframes: ['first'],
@@ -227,7 +304,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Kuaishou',
name: 'Kling 1.6 Pro',
model: 'kwaivgI/kling-1.6-pro',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 32 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [24],
keyframes: ['first'],
@@ -239,6 +319,9 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Wan-AI',
name: 'Wan 2.2 I2V',
model: 'Wan-AI/Wan2.2-I2V-A14B',
costs_currency: 'usd-cents',
costs: { 'per-video': 31 },
output_cost_key: 'per-video',
durationSeconds: null,
dimensions: null,
fps: null,
@@ -251,6 +334,9 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Wan-AI',
name: 'Wan 2.2 T2V',
model: 'Wan-AI/Wan2.2-T2V-A14B',
costs_currency: 'usd-cents',
costs: { 'per-video': 66 },
output_cost_key: 'per-video',
durationSeconds: null,
dimensions: null,
fps: null,
@@ -263,7 +349,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Vidu',
name: 'Vidu 2.0',
model: 'vidu/vidu-2.0',
durationSeconds: 8,
costs_currency: 'usd-cents',
costs: { 'per-video': 28 },
output_cost_key: 'per-video',
durationSeconds: [8],
dimensions: [
'1920x1080',
'1080x1080',
@@ -285,7 +374,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'Vidu',
name: 'Vidu Q1',
model: 'vidu/vidu-q1',
durationSeconds: 5,
costs_currency: 'usd-cents',
costs: { 'per-video': 22 },
output_cost_key: 'per-video',
durationSeconds: [5],
dimensions: ['1920x1080', '1080x1080', '1080x1920'],
fps: [24],
keyframes: ['first', 'last'],
@@ -297,7 +389,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'OpenAI',
name: 'Sora 2',
model: 'openai/sora-2',
durationSeconds: 8,
costs_currency: 'usd-cents',
costs: { 'per-video': 80 },
output_cost_key: 'per-video',
durationSeconds: [8],
dimensions: ['1280x720', '720x1280'],
fps: null,
keyframes: ['first'],
@@ -309,7 +404,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [
organization: 'OpenAI',
name: 'Sora 2 Pro',
model: 'openai/sora-2-pro',
durationSeconds: 8,
costs_currency: 'usd-cents',
costs: { 'per-video': 400 },
output_cost_key: 'per-video',
durationSeconds: [8],
dimensions: ['1280x720', '720x1280'],
fps: null,
keyframes: ['first'],
@@ -0,0 +1,72 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
export interface IVideoModel {
id: string;
name: string;
puterId?: string;
provider?: string;
aliases?: string[];
description?: string;
version?: string;
costs_currency?: string;
index_cost_key?: string;
output_cost_key?: string;
costs?: Record<string, number>;
durationSeconds?: number[] | null;
dimensions?: string[] | null;
defaultUsageKey?: string;
organization?: string;
model?: string;
fps?: number[] | null;
keyframes?: string[] | null;
promptLength?: { min: number; max: number } | null;
promptSupported?: boolean | null;
}
export interface IGenerateVideoParams {
prompt: string;
model?: string;
provider?: string;
test_mode?: boolean;
seconds?: number | string;
duration?: number | string;
size?: string;
resolution?: string;
width?: number;
height?: number;
fps?: number;
steps?: number;
guidance_scale?: number;
seed?: number;
output_format?: string;
output_quality?: number;
negative_prompt?: string;
reference_images?: string[];
frame_images?: object[];
metadata?: object;
input_reference?: unknown;
no_extra_params?: boolean;
}
export interface IVideoProvider {
generate (params: IGenerateVideoParams): Promise<unknown>;
models (): Promise<IVideoModel[]> | IVideoModel[];
getDefaultModel (): string;
}
@@ -293,7 +293,7 @@ class DriverService extends BaseService {
'puter-speech2txt': 'openai-speech2txt',
'puter-chat-completion': 'openai-completion',
'puter-image-generation': 'openai-image-generation',
'puter-video-generation': 'openai-video-generation',
'puter-video-generation': 'ai-video',
'puter-apps': 'es:app',
'puter-subdomains': 'es:subdomain',
'puter-notifications': 'es:notification',
+6 -36
View File
@@ -11,16 +11,6 @@ const normalizeTTSProvider = (value) => {
return value;
};
const TOGETHER_VIDEO_MODEL_PREFIXES = [
'minimax/',
'google/',
'bytedance/',
'pixverse/',
'kwaivgi/',
'vidu/',
'wan-ai/',
];
class AI {
/**
* Creates a new instance with the given authentication token, API origin, and app ID,
@@ -908,39 +898,19 @@ class AI {
throw ({ message: 'Prompt parameter is required', code: 'prompt_required' });
}
if ( ! options.model ) {
options.model = 'sora-2';
}
if ( options.duration !== undefined && options.seconds === undefined ) {
options.seconds = options.duration;
}
// This sucks, should be backend's job like we do for chat models now
let videoService = 'openai-video-generation';
if ( options.test_mode === true ) {
testMode = true;
}
let videoService = 'ai-video';
const driverHint = typeof options.driver === 'string' ? options.driver : undefined;
const driverHintLower = driverHint ? driverHint.toLowerCase() : undefined;
const providerRaw = typeof options.provider === 'string'
? options.provider
: (typeof options.service === 'string' ? options.service : undefined);
const providerHint = typeof providerRaw === 'string' ? providerRaw.toLowerCase() : undefined;
const modelLower = typeof options.model === 'string' ? options.model.toLowerCase() : '';
const looksLikeTogetherVideoModel = typeof options.model === 'string' &&
(TOGETHER_VIDEO_MODEL_PREFIXES.some(prefix => modelLower.startsWith(prefix)) || options.model.startsWith('togetherai:'));
if ( driverHintLower === 'together' || driverHintLower === 'together-ai' ) {
videoService = 'together-video-generation';
} else if ( driverHintLower === 'together-video-generation' ) {
videoService = 'together-video-generation';
} else if ( driverHintLower === 'openai' ) {
videoService = 'openai-video-generation';
} else if ( driverHint ) {
if ( driverHint ) {
videoService = driverHint;
} else if ( providerHint === 'together' || providerHint === 'together-ai' ) {
videoService = 'together-video-generation';
} else if ( looksLikeTogetherVideoModel ) {
videoService = 'together-video-generation';
}
return await utils.make_driver_method(['prompt'], 'puter-video-generation', videoService, 'generate', {
+2
View File
@@ -82,8 +82,10 @@ export interface Txt2ImgOptions {
export interface Txt2VidOptions {
prompt?: string;
provider?: string;
driver?: string;
model?: string;
seconds?: number;
duration?: number;
test_mode?: boolean;
// OpenAI options