diff --git a/src/backend/src/modules/ai/PuterAIChatModule.js b/src/backend/src/modules/ai/PuterAIChatModule.js index 99f93ea43..0ff818c82 100644 --- a/src/backend/src/modules/ai/PuterAIChatModule.js +++ b/src/backend/src/modules/ai/PuterAIChatModule.js @@ -28,9 +28,7 @@ import { OpenAISpeechToTextService } from '../../services/ai/stt/OpenAISpeechToT import { AWSPollyService } from '../../services/ai/tts/AWSPollyService.js'; import { ElevenLabsTTSService } from '../../services/ai/tts/ElevenLabsTTSService.js'; import { OpenAITTSService } from '../../services/ai/tts/OpenAITTSService.js'; -import { TogetherVideoGenerationService } from '../../services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js'; -import { OpenAIVideoGenerationService } from '../../services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js'; -// import { AIVideoGenerationService } from '../../services/ai/video/AIVideoGenerationService.js'; +import { AIVideoGenerationService } from '../../services/ai/video/AIVideoGenerationService.js'; /** * PuterAIModule class extends AdvancedBase to manage and register various AI services. @@ -59,7 +57,7 @@ export class PuterAIModule extends AdvancedBase { services.registerService('ai-image', AIImageGenerationService); // video generation ai service - // services.registerService('ai-video', AIVideoGenerationService); + services.registerService('ai-video', AIVideoGenerationService); // TODO DS: centralize other service types too // TODO: services should govern their own availability instead of the module deciding what to register @@ -83,14 +81,6 @@ export class PuterAIModule extends AdvancedBase { services.registerService('openai-tts', OpenAITTSService); services.registerService('openai-speech2txt', OpenAISpeechToTextService); - - // TODO DS: move to video service - services.registerService('openai-video-generation', OpenAIVideoGenerationService); - } - - if ( config?.services?.['together-ai'] ) { - // TODO DS: move to video service - services.registerService('together-video-generation', TogetherVideoGenerationService); } } } diff --git a/src/backend/src/services/ChatAPIService.js b/src/backend/src/services/ChatAPIService.js index 8e1079ead..e92968d6b 100644 --- a/src/backend/src/services/ChatAPIService.js +++ b/src/backend/src/services/ChatAPIService.js @@ -160,20 +160,8 @@ class ChatAPIService extends BaseService { try { const svc_su = this.services.get('su'); const models = await svc_su.sudo(async () => { - const items = []; - if ( this.services.has('openai-video-generation') ) { - const svc_video = this.services.get('openai-video-generation'); - if ( typeof svc_video.models === 'function' ) { - items.push(...await svc_video.models()); - } - } - if ( this.services.has('together-video-generation') ) { - const svc_video = this.services.get('together-video-generation'); - if ( typeof svc_video.models === 'function' ) { - items.push(...await svc_video.models()); - } - } - return items; + const svc_video = this.services.get('ai-video'); + return svc_video.models(); }); res.json({ models }); } catch ( error ) { @@ -190,20 +178,8 @@ class ChatAPIService extends BaseService { try { const svc_su = this.services.get('su'); const models = await svc_su.sudo(async () => { - const items = []; - if ( this.services.has('openai-video-generation') ) { - const svc_video = this.services.get('openai-video-generation'); - if ( typeof svc_video.models === 'function' ) { - items.push(...(await svc_video.models()).map(model => model.puterId || model.id)); - } - } - if ( this.services.has('together-video-generation') ) { - const svc_video = this.services.get('together-video-generation'); - if ( typeof svc_video.models === 'function' ) { - items.push(...(await svc_video.models()).map(model => model.id)); - } - } - return items; + const svc_video = this.services.get('ai-video'); + return svc_video.list(); }); res.json({ models }); } catch ( error ) { diff --git a/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts b/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts index f7ed45e6a..4d84e2617 100644 --- a/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts +++ b/src/backend/src/services/MeteringService/costMaps/openaiVideoCostMap.ts @@ -5,4 +5,5 @@ export const OPENAI_VIDEO_COST_MAP = { 'openai:sora-2:default': toMicroCents(0.10), 'openai:sora-2-pro:default': toMicroCents(0.30), 'openai:sora-2-pro:xl': toMicroCents(0.50), + 'openai:sora-2-pro:xxl': toMicroCents(0.70), }; diff --git a/src/backend/src/services/ai/video/.gitignore b/src/backend/src/services/ai/video/.gitignore new file mode 100644 index 000000000..aa4a6da26 --- /dev/null +++ b/src/backend/src/services/ai/video/.gitignore @@ -0,0 +1,2 @@ +*.js +*.js.map \ No newline at end of file diff --git a/src/backend/src/services/ai/video/AIVideoGenerationService.ts b/src/backend/src/services/ai/video/AIVideoGenerationService.ts new file mode 100644 index 000000000..8c9a8283b --- /dev/null +++ b/src/backend/src/services/ai/video/AIVideoGenerationService.ts @@ -0,0 +1,288 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import { APIError } from '../../../api/APIError.js'; +import { Context } from '../../../util/context.js'; +import BaseService from '../../BaseService.js'; +import { DriverService } from '../../drivers/DriverService.js'; +import { EventService } from '../../EventService.js'; +import { MeteringService } from '../../MeteringService/MeteringService.js'; +import { OpenAIVideoGenerationProvider } from './providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.js'; +import { TogetherVideoGenerationProvider } from './providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.js'; +import { IGenerateVideoParams, IVideoModel, IVideoProvider } from './providers/types.js'; + +export class AIVideoGenerationService extends BaseService { + + static SERVICE_NAME = 'ai-video'; + + static DEFAULT_PROVIDER = 'openai-video-generation'; + + get meteringService (): MeteringService { + return this.services.get('meteringService').meteringService; + } + + get eventService (): EventService { + return this.services.get('event'); + } + + get driverService (): DriverService { + return this.services.get('driver'); + } + + getProvider (name: string): IVideoProvider | undefined { + return this.#providers[name]; + } + + #providers: Record = {}; + #modelIdMap: Record = {}; + + static IMPLEMENTS = { + 'driver-capabilities': { + supports_test_mode (iface: string, method_name: string) { + return iface === 'puter-video-generation' && + method_name === 'generate'; + }, + }, + 'puter-video-generation': { + async generate (...parameters: Parameters) { + return (this as unknown as AIVideoGenerationService).generate(...parameters); + }, + }, + }; + + getModel ({ modelId, provider }: { modelId: string, provider?: string }) { + const models = this.#modelIdMap[modelId]; + if ( ! models ) { + return undefined; + } + + if ( provider ) { + const model = models.find(m => m.provider === provider); + return model ?? models[0]; + } + + // Prefer exact primary ID match over alias matches + const exactIdMatch = models.find(m => m.id === modelId); + if ( exactIdMatch ) { + return exactIdMatch; + } + + const exactPuterIdMatch = models.find(m => m.puterId === modelId); + if ( exactPuterIdMatch ) { + return exactPuterIdMatch; + } + + return models[0]; + } + + private async registerProviders () { + const openAiConfig = this.config.providers?.['openai-video-generation'] || this.global_config?.services?.['openai'] || this.global_config?.openai; + if ( openAiConfig && (openAiConfig.apiKey || openAiConfig.secret_key) ) { + this.#providers['openai-video-generation'] = new OpenAIVideoGenerationProvider( + { apiKey: openAiConfig.apiKey || openAiConfig.secret_key }, + this.meteringService, + ); + } + + const togetherConfig = this.config.providers?.['together-video-generation'] || this.global_config?.services?.['together-ai']; + if ( togetherConfig && (togetherConfig.apiKey || togetherConfig.secret_key) ) { + this.#providers['together-video-generation'] = new TogetherVideoGenerationProvider( + { apiKey: togetherConfig.apiKey || togetherConfig.secret_key }, + this.meteringService, + ); + } + + // emit event for extensions to add providers + const extensionProviders = {} as Record; + await this.eventService.emit('ai.video.registerProviders', extensionProviders); + for ( const providerName in extensionProviders ) { + if ( this.#providers[providerName] ) { + console.warn('AIVideoGenerationService: provider name conflict for ', providerName, ' registering with -extension suffix'); + this.#providers[`${providerName}-extension`] = extensionProviders[providerName]; + continue; + } + this.#providers[providerName] = extensionProviders[providerName]; + } + } + + protected async '__on_boot.consolidation' () { + await this.registerProviders(); + + for ( const providerName in this.#providers ) { + const provider = this.#providers[providerName]; + + // alias all driver requests to go here to support legacy routing + this.driverService.register_service_alias( + AIVideoGenerationService.SERVICE_NAME, + providerName, + { iface: 'puter-video-generation' }, + ); + + // build model id map + for ( const model of await provider.models() ) { + model.id = model.id.trim().toLowerCase(); + if ( model.puterId ) { + model.puterId = model.puterId.trim().toLowerCase(); + } + if ( model.aliases ) { + model.aliases = model.aliases.map(alias => alias.trim().toLowerCase()); + } + if ( ! this.#modelIdMap[model.id] ) { + this.#modelIdMap[model.id] = []; + } + this.#modelIdMap[model.id].push({ ...model, provider: providerName }); + + if ( model.puterId ) { + if ( model.aliases ) { + model.aliases.push(model.puterId); + } else { + model.aliases = [model.puterId]; + } + } + + if ( model.aliases ) { + for ( let alias of model.aliases ) { + alias = alias.trim().toLowerCase(); + if ( ! this.#modelIdMap[alias] ) { + this.#modelIdMap[alias] = this.#modelIdMap[model.id]; + continue; + } + if ( this.#modelIdMap[alias] !== this.#modelIdMap[model.id] ) { + this.#modelIdMap[alias].push({ ...model, provider: providerName }); + this.#modelIdMap[model.id] = this.#modelIdMap[alias]; + continue; + } + } + } + this.#modelIdMap[model.id].sort((a, b) => { + const aCostKey = a.index_cost_key || a.output_cost_key || Object.keys(a.costs || {})[0]; + const bCostKey = b.index_cost_key || b.output_cost_key || Object.keys(b.costs || {})[0]; + const aCost = a.costs?.[aCostKey] ?? Infinity; + const bCost = b.costs?.[bCostKey] ?? Infinity; + return aCost - bCost; + }); + } + } + } + + models () { + const seen = new Set(); + return Object.entries(this.#modelIdMap) + .map(([_, models]) => models) + .flat() + .filter(model => { + const identity = `${model.provider}:${model.puterId || model.id}`; + if ( seen.has(identity) ) { + return false; + } + seen.add(identity); + return true; + }) + .sort((a, b) => { + if ( a.provider === b.provider ) { + return a.id.localeCompare(b.id); + } + return a.provider!.localeCompare(b.provider!); + }); + } + + list () { + return this.models().map(m => (m.puterId || m.id)).sort(); + } + + async generate (parameters: IGenerateVideoParams) { + const clientDriverCall = Context.get('client_driver_call'); + let { test_mode: testMode, intended_service: legacyProviderName } = clientDriverCall as { test_mode?: boolean; response_metadata: Record; intended_service?: string }; + + if ( parameters.model ) { + parameters.model = parameters.model.trim().toLowerCase(); + } + + const configuredProviders = Object.keys(this.#providers); + if ( configuredProviders.length === 0 ) { + throw new Error('no video generation providers configured'); + } + + let intendedProvider = (parameters.provider || (legacyProviderName === AIVideoGenerationService.SERVICE_NAME ? '' : legacyProviderName)) ?? ''; + + if ( !parameters.model && !intendedProvider ) { + intendedProvider = configuredProviders.includes(AIVideoGenerationService.DEFAULT_PROVIDER) + ? AIVideoGenerationService.DEFAULT_PROVIDER + : configuredProviders[0]; + } + + if ( intendedProvider && !this.#providers[intendedProvider] ) { + intendedProvider = configuredProviders[0]; + } + + if ( !parameters.model && intendedProvider ) { + parameters.model = this.#providers[intendedProvider].getDefaultModel(); + } + + const model = parameters.model ? this.getModel({ modelId: parameters.model, provider: intendedProvider }) : undefined; + + if ( ! model ) { + const availableModelsUrl = `${this.global_config.origin}/puterai/video/models`; + + throw APIError.create('field_invalid', undefined, { + key: 'model', + expected: `a valid model name from ${availableModelsUrl}`, + got: parameters.model, + }); + } + + const provider = this.#providers[model.provider!]; + if ( ! provider ) { + throw new Error(`no provider found for model ${model.id}`); + } + + if ( model.durationSeconds?.length ) { + const requestedSeconds = parameters.seconds ?? parameters.duration; + const normalizedSeconds = typeof requestedSeconds === 'string' + ? Number.parseInt(requestedSeconds, 10) + : requestedSeconds; + const validSeconds = model.durationSeconds.includes(Number(normalizedSeconds)) + ? normalizedSeconds + : model.durationSeconds[0]; + parameters.seconds = validSeconds; + parameters.duration = validSeconds; + } + + if ( model.dimensions?.length ) { + const requestedResolution = typeof parameters.size === 'string' && parameters.size.trim() + ? parameters.size + : typeof parameters.resolution === 'string' && parameters.resolution.trim() + ? parameters.resolution + : undefined; + + const normalizedResolution = requestedResolution && model.dimensions.includes(requestedResolution) + ? requestedResolution + : model.dimensions[0]; + parameters.size = normalizedResolution; + parameters.resolution = normalizedResolution; + } + + return await provider.generate({ + ...parameters, + model: model.id, + provider: model.provider, + test_mode: testMode, + }); + } +} diff --git a/src/backend/src/services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js b/src/backend/src/services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js deleted file mode 100644 index 7318ab5c7..000000000 --- a/src/backend/src/services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Copyright (C) 2024-present Puter Technologies Inc. - * - * This file is part of Puter. - * - * Puter is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -const APIError = require('../../../../api/APIError'); -const BaseService = require('../../../BaseService'); -const { TypedValue } = require('../../../drivers/meta/Runtime'); -const { Context } = require('../../../../util/context'); -const { Readable } = require('stream'); - -const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4'; -const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes -const POLL_INTERVAL_MS = 5_000; -const DEFAULT_DURATION_SECONDS = 4; -const DEFAULT_SIZE = '720x1280'; -const ALLOWED_SIZES = new Set(['720x1280', '1280x720', '1024x1792', '1792x1024']); -const ALLOWED_SECONDS = new Set(['4', '8', '12']); -const OPENAI_VIDEO_MODELS = [ - { - puterId: 'openai:openai/sora-2', - id: 'sora-2', - aliases: ['openai/sora-2'], - defaultUsageKey: 'openai:sora-2:default', - }, - { - puterId: 'openai:openai/sora-2-pro', - id: 'sora-2-pro', - aliases: ['openai/sora-2-pro'], - defaultUsageKey: 'openai:sora-2-pro:default', - }, -]; - -class OpenAIVideoGenerationService extends BaseService { - /** @type {import('../../../MeteringService/MeteringService').MeteringService} */ - get meteringService () { - return this.services.get('meteringService').meteringService; - } - - static MODULES = { - openai: require('openai'), - }; - - _construct () { - this.models_ = Object.fromEntries(OPENAI_VIDEO_MODELS.map(model => [ - model.id, - { defaultUsageKey: model.defaultUsageKey }, - ])); - } - - async _init () { - let apiKey = - this.config?.services?.openai?.apiKey ?? - this.global_config?.services?.openai?.apiKey; - - if ( ! apiKey ) { - apiKey = - this.config?.openai?.secret_key ?? - this.global_config.openai?.secret_key; - - console.warn('The `openai.secret_key` configuration format is deprecated. ' + - 'Please use `services.openai.apiKey` instead.'); - } - - this.openai = new this.modules.openai.OpenAI({ - apiKey, - }); - } - - static IMPLEMENTS = { - 'driver-capabilities': { - supports_test_mode (iface, method_name) { - return iface === 'puter-video-generation' && - method_name === 'generate'; - }, - }, - 'puter-video-generation': { - async generate (params) { - return await this.generateVideo(params); - }, - }, - }; - - async models () { - // Import cost map dynamically - const costMapModule = await import('../../../MeteringService/costMaps/openaiVideoCostMap.ts'); - const OPENAI_VIDEO_COST_MAP = costMapModule.OPENAI_VIDEO_COST_MAP; - - // Convert microcents to cents (divide by 1,000,000) - const microCentsToCents = (microCents) => microCents / 1_000_000; - - return OPENAI_VIDEO_MODELS.map(model => { - const result = { ...model }; - - // Get cost for default usage key - const defaultCostMicroCents = OPENAI_VIDEO_COST_MAP[model.defaultUsageKey]; - if ( defaultCostMicroCents !== undefined ) { - const perSecondCost = microCentsToCents(defaultCostMicroCents); - result.costs_currency = 'usd-cents'; - result.costs = { - 'per-second': perSecondCost, - 'default-duration-per-video': perSecondCost * DEFAULT_DURATION_SECONDS, - }; - result.output_cost_key = 'default-duration-per-video'; - } - - // Add cost for xl variant if it exists (sora-2-pro only) - if ( model.id === 'sora-2-pro' ) { - const xlCostMicroCents = OPENAI_VIDEO_COST_MAP['openai:sora-2-pro:xl']; - if ( xlCostMicroCents !== undefined ) { - if ( ! result.costs ) { - result.costs = {}; - result.costs_currency = 'usd-cents'; - } - const perSecondXlCost = microCentsToCents(xlCostMicroCents); - result.costs['per-second-xl'] = perSecondXlCost; - result.costs['default-duration-per-video-xl'] = perSecondXlCost * DEFAULT_DURATION_SECONDS; - } - } - - return result; - }); - } - - async generateVideo (params) { - const { - prompt, - model: requestedModel, - duration, - seconds, - size, - resolution, - input_reference: inputReference, - test_mode: testMode, - } = params ?? {}; - - if ( typeof prompt !== 'string' || !prompt.trim() ) { - throw APIError.create('field_invalid', null, { - key: 'prompt', - expected: 'a non-empty string', - got: prompt, - }); - } - - const resolvedModel = OPENAI_VIDEO_MODELS.find(entry => - entry.id === requestedModel || - entry.puterId === requestedModel || - (entry.aliases || []).includes(requestedModel))?.id; - const model = resolvedModel ?? requestedModel ?? 'sora-2'; - const modelConfig = this.models_[model]; - if ( ! modelConfig ) { - throw APIError.create('field_invalid', null, { - key: 'model', - expected: `one of: ${ Object.keys(this.models_).join(', ')}`, - got: model, - }); - } - - if ( testMode ) { - return new TypedValue({ - $: 'string:url:web', - content_type: 'video', - }, DEFAULT_TEST_VIDEO_URL); - } - - const normalizedSize = this.#normalizeSize(size ?? resolution) ?? DEFAULT_SIZE; - const normalizedSeconds = this.#normalizeSeconds(seconds ?? duration) ?? '4'; - - const usageKey = this.#determineUsageKey(model, normalizedSize); - if ( ! usageKey ) { - throw new Error(`Unsupported pricing tier for model ${model}`); - } - - const estimatedUnits = this.#parseSeconds(normalizedSeconds) ?? DEFAULT_DURATION_SECONDS; - const actor = Context.get('actor'); - const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, estimatedUnits); - if ( ! usageAllowed ) { - throw APIError.create('insufficient_funds'); - } - - const createParams = { - model, - prompt, - seconds: normalizedSeconds, - size: normalizedSize, - }; - - if ( inputReference ) { - createParams.input_reference = inputReference; - } - - const createResponse = await this.openai.videos.create(createParams); - const finalJob = await this.#pollUntilComplete(createResponse); - - if ( finalJob.status === 'failed' ) { - const errorMessage = finalJob.error?.message ?? 'Video generation failed'; - throw new Error(errorMessage); - } - - const finalResolution = this.#normalizeSize(finalJob.size) ?? normalizedSize; - const finalUsageKey = this.#determineUsageKey(model, finalResolution); - if ( ! finalUsageKey ) { - throw new Error(`Unsupported pricing tier for model ${model}`); - } - - const actualSeconds = this.#parseSeconds(finalJob.seconds) ?? estimatedUnits; - - const downloadResponse = await this.openai.videos.downloadContent(finalJob.id); - const contentType = downloadResponse.headers.get('content-type') ?? 'video/mp4'; - - let stream = downloadResponse.body; - if ( stream && typeof stream.getReader === 'function' ) { - stream = Readable.fromWeb(stream); - } - - if ( ! stream ) { - const arrayBuffer = await downloadResponse.arrayBuffer(); - stream = Readable.from(Buffer.from(arrayBuffer)); - } - - this.meteringService.incrementUsage(actor, finalUsageKey, actualSeconds); - - return new TypedValue({ - $: 'stream', - content_type: contentType, - }, stream); - } - - async #pollUntilComplete (initialJob) { - let job = initialJob; - const start = Date.now(); - - while ( job.status === 'queued' || job.status === 'in_progress' ) { - if ( Date.now() - start > DEFAULT_TIMEOUT_MS ) { - throw new Error('Timed out waiting for Sora video generation to complete'); - } - - await this.#delay(POLL_INTERVAL_MS); - job = await this.openai.videos.retrieve(job.id); - } - - return job; - } - - async #delay (ms) { - return await new Promise(resolve => setTimeout(resolve, ms)); - } - - #normalizeSize (candidate) { - if ( ! candidate ) return undefined; - const normalized = this.#normalizeResolution(candidate); - if ( normalized && ALLOWED_SIZES.has(normalized) ) { - return normalized; - } - return undefined; - } - - #normalizeSeconds (value) { - if ( value === null || value === undefined ) { - return undefined; - } - - if ( typeof value === 'number' && Number.isFinite(value) ) { - const rounded = String(Math.round(value)); - return ALLOWED_SECONDS.has(rounded) ? rounded : undefined; - } - - if ( typeof value === 'string' ) { - const trimmed = value.trim(); - if ( ALLOWED_SECONDS.has(trimmed) ) { - return trimmed; - } - const numeric = Number.parseInt(trimmed, 10); - if ( Number.isFinite(numeric) ) { - const normalized = String(numeric); - return ALLOWED_SECONDS.has(normalized) ? normalized : undefined; - } - } - - return undefined; - } - - #determineUsageKey (model, normalizedSize) { - const config = this.models_[model]; - if ( ! config ) return null; - - if ( model === 'sora-2-pro' && normalizedSize === '1792x1024' ) { - return 'openai:sora-2-pro:xl'; - } - - return config.defaultUsageKey; - } - - #normalizeResolution (value) { - if ( ! value ) return undefined; - if ( typeof value === 'string' ) { - const match = value.match(/(\\d+)\\s*x\\s*(\\d+)/i); - if ( match ) { - const width = Number.parseInt(match[1], 10); - const height = Number.parseInt(match[2], 10); - if ( Number.isFinite(width) && Number.isFinite(height) ) { - const larger = Math.max(width, height); - const smaller = Math.min(width, height); - return `${larger}x${smaller}`; - } - } - } - return undefined; - } - - #parseSeconds (value) { - if ( value === null || value === undefined ) return undefined; - if ( typeof value === 'number' && Number.isFinite(value) ) { - return value; - } - if ( typeof value === 'string' ) { - const numeric = Number.parseInt(value, 10); - if ( Number.isFinite(numeric) ) { - return numeric; - } - } - return undefined; - } -} - -module.exports = { - OpenAIVideoGenerationService, -}; diff --git a/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.ts b/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.ts new file mode 100644 index 000000000..94d0fe64e --- /dev/null +++ b/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.ts @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import OpenAI from 'openai'; +import APIError from '../../../../../api/APIError.js'; +import { Context } from '../../../../../util/context.js'; +import { MeteringService } from '../../../../MeteringService/MeteringService.js'; +import { IGenerateVideoParams, IVideoModel, IVideoProvider } from '../types.js'; +import { TypedValue } from '../../../../drivers/meta/Runtime.js'; +import { Readable } from 'stream'; +import { OPENAI_VIDEO_MODELS, OPENAI_VIDEO_ALLOWED_SECONDS } from './models.js'; + +const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4'; +const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; +const POLL_INTERVAL_MS = 5_000; +const DEFAULT_DURATION_SECONDS = 4; + +export class OpenAIVideoGenerationProvider implements IVideoProvider { + #openai: OpenAI; + #meteringService: MeteringService; + + constructor (config: { apiKey: string }, meteringService: MeteringService) { + if ( ! config.apiKey ) { + throw new Error('OpenAI video generation requires an API key'); + } + this.#openai = new OpenAI({ apiKey: config.apiKey }); + this.#meteringService = meteringService; + } + + getDefaultModel (): string { + return OPENAI_VIDEO_MODELS[0].id; + } + + async models (): Promise { + return OPENAI_VIDEO_MODELS; + } + + async generate (params: IGenerateVideoParams): Promise { + const { + prompt, + model: requestedModel, + duration, + seconds, + size, + resolution, + input_reference: inputReference, + test_mode: testMode, + } = params ?? {}; + + if ( typeof prompt !== 'string' || !prompt.trim() ) { + throw APIError.create('field_invalid', null, { + key: 'prompt', + expected: 'a non-empty string', + got: prompt, + }); + } + + const selectedModel = await this.#selectModel(requestedModel); + + if ( ! selectedModel ) { + throw new Error(`Unknown video model: ${requestedModel}`); + } + + if ( testMode ) { + return new TypedValue({ + $: 'string:url:web', + content_type: 'video', + }, DEFAULT_TEST_VIDEO_URL); + } + + const defaultSize = selectedModel.dimensions?.[0] ?? '720x1280'; + const normalizedSize = this.#normalizeSize(size ?? resolution, selectedModel) ?? defaultSize; + const normalizedSeconds = this.#normalizeSeconds(seconds ?? duration) ?? String(DEFAULT_DURATION_SECONDS); + + const sizeTier = this.#determineSizeTier(selectedModel, normalizedSize); + const costPerSecondCents = this.#getCostPerSecond(selectedModel, sizeTier); + + if ( ! costPerSecondCents ) { + throw new Error(`No pricing configured for model ${selectedModel.id} at size ${normalizedSize}`); + } + + const estimatedUnits = this.#parseSeconds(normalizedSeconds) ?? DEFAULT_DURATION_SECONDS; + const actor = Context.get('actor'); + const costInMicroCents = costPerSecondCents * 1_000_000; + const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents * estimatedUnits); + if ( ! usageAllowed ) { + throw APIError.create('insufficient_funds'); + } + + const createParams: OpenAI.VideoCreateParams = { + prompt, + model: selectedModel.id, + seconds: normalizedSeconds as OpenAI.VideoSeconds, + size: normalizedSize as OpenAI.VideoSize, + }; + + if ( inputReference ) { + createParams.input_reference = inputReference as OpenAI.VideoCreateParams['input_reference']; + } + + const createResponse = await this.#openai.videos.create(createParams); + const finalJob = await this.#pollUntilComplete(createResponse); + + if ( finalJob.status === 'failed' ) { + const errorMessage = finalJob.error?.message ?? 'Video generation failed'; + throw new Error(errorMessage); + } + + const finalResolution = this.#normalizeSize(finalJob.size, selectedModel) ?? normalizedSize; + const finalTier = this.#determineSizeTier(selectedModel, finalResolution); + const finalCostPerSecondCents = this.#getCostPerSecond(selectedModel, finalTier); + + if ( ! finalCostPerSecondCents ) { + throw new Error(`No pricing configured for model ${selectedModel.id} at size ${finalResolution}`); + } + + const finalCostInMicroCents = finalCostPerSecondCents * 1_000_000; + const actualSeconds = this.#parseSeconds(finalJob.seconds) ?? estimatedUnits; + + const downloadResponse = await this.#openai.videos.downloadContent(finalJob.id); + const contentType = downloadResponse.headers.get('content-type') ?? 'video/mp4'; + + let stream: any = downloadResponse.body; + if ( stream && typeof stream.getReader === 'function' ) { + stream = Readable.fromWeb(stream as any); + } + + if ( ! stream ) { + const arrayBuffer = await downloadResponse.arrayBuffer(); + stream = Readable.from(Buffer.from(arrayBuffer)); + } + + const finalUsageKey = this.#getUsageKey(selectedModel, finalTier); + await this.#meteringService.incrementUsage(actor, finalUsageKey, actualSeconds, finalCostInMicroCents * actualSeconds); + + return new TypedValue({ + $: 'stream', + content_type: contentType, + }, stream); + } + + async #selectModel (requestedModel?: string): Promise { + const allModels = await this.models(); + return allModels.find(m => m.id.toLowerCase() === requestedModel?.toLowerCase()); + } + + async #pollUntilComplete (initialJob: OpenAI.Video): Promise { + let job = initialJob; + const start = Date.now(); + + while ( job.status === 'queued' || job.status === 'in_progress' ) { + if ( Date.now() - start > DEFAULT_TIMEOUT_MS ) { + throw new Error('Timed out waiting for Sora video generation to complete'); + } + + await this.#delay(POLL_INTERVAL_MS); + job = await this.#openai.videos.retrieve(job.id); + } + + return job; + } + + async #delay (ms: number): Promise { + return await new Promise(resolve => setTimeout(resolve, ms)); + } + + #normalizeSize (candidate: unknown, model: IVideoModel): string | undefined { + if ( ! candidate ) return undefined; + const normalized = this.#normalizeResolution(candidate); + if ( normalized && model.dimensions?.includes(normalized) ) { + return normalized; + } + return undefined; + } + + #normalizeSeconds (value: unknown): string | undefined { + if ( value === null || value === undefined ) { + return undefined; + } + const parsed = typeof value === 'number' ? String(Math.round(value)) : typeof value === 'string' ? value.trim() : undefined; + if ( parsed && OPENAI_VIDEO_ALLOWED_SECONDS.includes(Number(parsed) as typeof OPENAI_VIDEO_ALLOWED_SECONDS[number]) ) { + return parsed; + } + return undefined; + } + + #determineSizeTier (model: IVideoModel, size: string): string { + if ( model.id === 'sora-2-pro' ) { + if ( size === '1080x1920' || size === '1920x1080' ) return 'xxl'; + if ( size === '1024x1792' || size === '1792x1024' ) return 'xl'; + } + return 'default'; + } + + #getCostPerSecond (model: IVideoModel, tier: string): number | undefined { + const key = tier === 'default' ? 'per-second' : `per-second-${tier}`; + return model.costs?.[key]; + } + + #getUsageKey (model: IVideoModel, tier: string): string { + return `openai:${model.id}:${tier}`; + } + + #normalizeResolution (value: unknown): string | undefined { + if ( ! value ) return undefined; + if ( typeof value === 'string' ) { + const match = value.match(/(\d+)\s*x\s*(\d+)/i); + if ( match ) { + const w = Number.parseInt(match[1], 10); + const h = Number.parseInt(match[2], 10); + if ( Number.isFinite(w) && Number.isFinite(h) ) { + return `${w}x${h}`; + } + } + } + return undefined; + } + + #parseSeconds (value: unknown): number | undefined { + if ( value === null || value === undefined ) return undefined; + if ( typeof value === 'number' && Number.isFinite(value) ) { + return Math.round(value); + } + if ( typeof value === 'string' ) { + const numeric = Number.parseInt(value, 10); + return Number.isFinite(numeric) ? numeric : undefined; + } + return undefined; + } +} diff --git a/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/models.ts b/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/models.ts new file mode 100644 index 000000000..061cd341d --- /dev/null +++ b/src/backend/src/services/ai/video/providers/OpenAIVideoGenerationProvider/models.ts @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import { IVideoModel } from '../types.js'; + +export const OPENAI_VIDEO_ALLOWED_SECONDS = [4, 8, 12] as const; + +export const OPENAI_VIDEO_MODELS: IVideoModel[] = [ + { + id: 'sora-2', + puterId: 'openai:openai/sora-2', + aliases: ['openai/sora-2'], + name: 'Sora 2', + costs_currency: 'usd-cents', + costs: { + 'per-second': 10, + 'default-duration-per-video': 40, + }, + output_cost_key: 'default-duration-per-video', + durationSeconds: OPENAI_VIDEO_ALLOWED_SECONDS.slice(), + dimensions: ['720x1280', '1280x720'], + defaultUsageKey: 'openai:sora-2:default', + }, + { + id: 'sora-2-pro', + puterId: 'openai:openai/sora-2-pro', + aliases: ['openai/sora-2-pro'], + name: 'Sora 2 Pro', + costs_currency: 'usd-cents', + costs: { + 'per-second': 30, + 'default-duration-per-video': 120, + 'per-second-xl': 50, + 'default-duration-per-video-xl': 200, + 'per-second-xxl': 70, + 'default-duration-per-video-xxl': 280, + }, + output_cost_key: 'default-duration-per-video', + durationSeconds: OPENAI_VIDEO_ALLOWED_SECONDS.slice(), + dimensions: ['720x1280', '1280x720', '1024x1792', '1792x1024', '1080x1920', '1920x1080'], + defaultUsageKey: 'openai:sora-2-pro:default', + }, +]; diff --git a/src/backend/src/services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js b/src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.ts similarity index 61% rename from src/backend/src/services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js rename to src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.ts index a31b7ec56..10bf86fbc 100644 --- a/src/backend/src/services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js +++ b/src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.ts @@ -17,56 +17,50 @@ * along with this program. If not, see . */ -const APIError = require('../../../../api/APIError'); -const BaseService = require('../../../BaseService'); -const { TypedValue } = require('../../../drivers/meta/Runtime'); -const { Context } = require('../../../../util/context'); -const { Together } = require('together-ai'); +import { Together } from 'together-ai'; +import APIError from '../../../../../api/APIError.js'; +import { Context } from '../../../../../util/context.js'; +import { MeteringService } from '../../../../MeteringService/MeteringService.js'; +import { IGenerateVideoParams, IVideoModel, IVideoProvider } from '../types.js'; +import { TypedValue } from '../../../../drivers/meta/Runtime.js'; +import { TOGETHER_VIDEO_GENERATION_MODELS } from './models.js'; const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4'; const POLL_INTERVAL_MS = 5_000; -const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes +const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; const DEFAULT_MODEL = 'minimax/video-01-director'; const DEFAULT_DURATION_SECONDS = 6; -const DEFAULT_USAGE_KEY = 'together-video:default'; -let models = []; +export class TogetherVideoGenerationProvider implements IVideoProvider { + #client: Together; + #meteringService: MeteringService; -class TogetherVideoGenerationService extends BaseService { - /** @type {import('../../../MeteringService/MeteringService').MeteringService} */ - get meteringService () { - return this.services.get('meteringService').meteringService; - } - - static MODULES = {}; - - async _init () { - const apiKey = - this.config?.apiKey ?? - this.global_config?.services?.['together-ai']?.apiKey; - - if ( ! apiKey ) { + constructor (config: { apiKey: string }, meteringService: MeteringService) { + if ( ! config.apiKey ) { throw new Error('Together AI video generation requires an API key'); } - - this.client = new Together({ apiKey }); + this.#client = new Together({ apiKey: config.apiKey }); + this.#meteringService = meteringService; } - static IMPLEMENTS = { - 'driver-capabilities': { - supports_test_mode (iface, method_name) { - return iface === 'puter-video-generation' && - method_name === 'generate'; - }, - }, - 'puter-video-generation': { - async generate (params) { - return await this.generateVideo(params); - }, - }, - }; + getDefaultModel (): string { + return 'togetherai:minimax/video-01-director'; + } - async generateVideo (params) { + async models (): Promise { + return TOGETHER_VIDEO_GENERATION_MODELS.map((model) => ({ + ...model, + aliases: [model.model], + durationSeconds: model.durationSeconds ?? undefined, + dimensions: model.dimensions ?? undefined, + fps: model.fps ?? undefined, + keyframes: model.keyframes ?? undefined, + promptLength: model.promptLength ?? undefined, + promptSupported: model.promptSupported ?? undefined, + })); + } + + async generate (params: IGenerateVideoParams): Promise { const { prompt, model: requestedModel, @@ -97,6 +91,7 @@ class TogetherVideoGenerationService extends BaseService { } const model = this.#stripTogetherPrefix(requestedModel ?? DEFAULT_MODEL); + const selectedModel = await this.#getModel(requestedModel); if ( testMode ) { return new TypedValue({ @@ -105,10 +100,15 @@ class TogetherVideoGenerationService extends BaseService { }, DEFAULT_TEST_VIDEO_URL); } + const costPerVideoCents = selectedModel?.costs?.['per-video']; + if ( ! costPerVideoCents ) { + throw new Error(`No pricing configured for video model ${model}`); + } + const costInMicroCents = costPerVideoCents * 1_000_000; + let normalizedSeconds = this.#coercePositiveInteger(seconds ?? duration); - if ( ! no_extra_params ) - { + if ( ! no_extra_params ) { normalizedSeconds ??= DEFAULT_DURATION_SECONDS; } @@ -117,21 +117,18 @@ class TogetherVideoGenerationService extends BaseService { throw new Error('actor not found in context'); } - const estimatedUsageUnits = 1; // Together video billing is per generated video - const usageKey = this.#determineUsageKey(model); - - const usageAllowed = await this.meteringService.hasEnoughCreditsFor(actor, usageKey, estimatedUsageUnits); + const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents); if ( ! usageAllowed ) { throw APIError.create('insufficient_funds'); } - const createPayload = { + const createPayload: Together.VideoCreateParams & { metadata?: object } = { prompt, model, }; if ( normalizedSeconds ) { - createPayload.seconds = normalizedSeconds; + createPayload.seconds = String(normalizedSeconds); } if ( this.#isFiniteNumber(width) ) { createPayload.width = Number(width); @@ -152,7 +149,7 @@ class TogetherVideoGenerationService extends BaseService { createPayload.seed = Number(seed); } if ( typeof outputFormat === 'string' && outputFormat.trim() ) { - createPayload.output_format = outputFormat.trim(); + createPayload.output_format = outputFormat.trim() as Together.VideoCreateParams['output_format']; } if ( this.#isFiniteNumber(outputQuality) ) { createPayload.output_quality = Number(outputQuality); @@ -161,16 +158,16 @@ class TogetherVideoGenerationService extends BaseService { createPayload.negative_prompt = negativePrompt; } if ( Array.isArray(referenceImages) && referenceImages.length > 0 ) { - createPayload.reference_images = referenceImages.filter(item => typeof item === 'string' && item.trim().length > 0); + createPayload.reference_images = referenceImages.filter((item: string) => typeof item === 'string' && item.trim().length > 0); } if ( Array.isArray(frameImages) && frameImages.length > 0 ) { - createPayload.frame_images = frameImages.filter(frame => frame && typeof frame === 'object'); + createPayload.frame_images = frameImages.filter((frame: any) => frame && typeof frame === 'object' && typeof frame.input_image === 'string') as Together.VideoCreateParams['frame_images']; } if ( metadata && typeof metadata === 'object' ) { createPayload.metadata = metadata; } - const job = await this.client.videos.create(createPayload); + const job = await this.#client.videos.create(createPayload); const finalJob = await this.#pollUntilComplete(job.id); if ( finalJob.status === 'failed' ) { @@ -185,7 +182,8 @@ class TogetherVideoGenerationService extends BaseService { throw new Error('Video generation was cancelled'); } - this.meteringService.incrementUsage(actor, usageKey, 1); + const usageKey = `together-video:${model}`; + await this.#meteringService.incrementUsage(actor, usageKey, 1, costInMicroCents); const videoUrl = finalJob?.outputs?.video_url; if ( typeof videoUrl === 'string' && videoUrl.trim() ) { @@ -198,41 +196,9 @@ class TogetherVideoGenerationService extends BaseService { throw new Error('Together AI response did not include a video URL'); } - async models () { - if ( models.length > 0 && models[0].costs_currency ) { - return models; - } - - const { TOGETHER_VIDEO_GENERATION_MODELS } = await import('./models.js'); - const costMapModule = await import('../../../MeteringService/costMaps/togetherCostMap.ts'); - const TOGETHER_COST_MAP = costMapModule.TOGETHER_COST_MAP; - - // Convert microcents to cents (divide by 1,000,000) - const microCentsToCents = (microCents) => microCents / 1_000_000; - - models = TOGETHER_VIDEO_GENERATION_MODELS.map(model => { - const result = { ...model }; - - // Convert model ID from 'togetherai:google/veo-3.0' to cost key 'together-video:google/veo-3.0' - const costKey = model.id.replace('togetherai:', 'together-video:'); - const costMicroCents = TOGETHER_COST_MAP[costKey]; - - if ( costMicroCents !== undefined && costMicroCents > 0 ) { - result.costs_currency = 'usd-cents'; - result.costs = { - 'per-video': microCentsToCents(costMicroCents), - }; - result.output_cost_key = 'per-video'; - } - - return result; - }); - - return models; - } - - async #pollUntilComplete (jobId) { - let job = await this.client.videos.retrieve(jobId); + async #pollUntilComplete (jobId: string): Promise { + // any here because sdk types are wrong https://docs.together.ai/docs/videos-overview -> "Job Status Reference" + let job = await (this.#client as any).videos.retrieve(jobId); const start = Date.now(); while ( job.status === 'queued' || job.status === 'in_progress' ) { @@ -241,31 +207,30 @@ class TogetherVideoGenerationService extends BaseService { } await this.#delay(POLL_INTERVAL_MS); - job = await this.client.videos.retrieve(jobId); + job = await (this.#client as any).videos.retrieve(jobId); } return job; } - async #delay (ms) { + async #delay (ms: number): Promise { return await new Promise(resolve => setTimeout(resolve, ms)); } - #determineUsageKey (model) { - if ( typeof model === 'string' && model.trim() ) { - return `together-video:${model}`; - } - return DEFAULT_USAGE_KEY; + async #getModel (requestedModel?: string): Promise { + const bareModel = this.#stripTogetherPrefix(requestedModel ?? DEFAULT_MODEL); + const allModels = await this.models(); + return allModels.find(m => m.model?.toLowerCase() === bareModel.toLowerCase()); } - #stripTogetherPrefix (model) { + #stripTogetherPrefix (model: string): string { if ( typeof model === 'string' && model.startsWith('togetherai:') ) { return model.slice('togetherai:'.length); } return model; } - #coercePositiveInteger (value) { + #coercePositiveInteger (value: unknown): number | undefined { if ( typeof value === 'number' && Number.isFinite(value) ) { const rounded = Math.round(value); return rounded > 0 ? rounded : undefined; @@ -277,7 +242,7 @@ class TogetherVideoGenerationService extends BaseService { return undefined; } - #isFiniteNumber (value) { + #isFiniteNumber (value: unknown): boolean { if ( typeof value === 'number' ) { return Number.isFinite(value); } @@ -288,7 +253,3 @@ class TogetherVideoGenerationService extends BaseService { return false; } } - -module.exports = { - TogetherVideoGenerationService, -}; diff --git a/src/backend/src/services/ai/video/TogetherVideoGenerationService/models.js b/src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/models.ts similarity index 67% rename from src/backend/src/services/ai/video/TogetherVideoGenerationService/models.js rename to src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/models.ts index 1ea54fe97..4ea7e4417 100644 --- a/src/backend/src/services/ai/video/TogetherVideoGenerationService/models.js +++ b/src/backend/src/services/ai/video/providers/TogetherVideoGenerationProvider/models.ts @@ -1,10 +1,45 @@ -export const TOGETHER_VIDEO_GENERATION_MODELS = [ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import { IVideoModel } from '../types.js'; + +interface ITogetherVideoModel extends IVideoModel { + model: string; + organization: string; + durationSeconds: number[] | null; + dimensions: string[] | null; + fps: number[] | null; + keyframes: string[] | null; + promptLength: { min: number; max: number } | null; + promptSupported: boolean | null; +} + +export const TOGETHER_VIDEO_GENERATION_MODELS: ITogetherVideoModel[] = [ { id: 'togetherai:minimax/video-01-director', organization: 'MiniMax', name: 'MiniMax 01 Director', model: 'minimax/video-01-director', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 28 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: ['1366x768'], fps: [25], keyframes: ['first'], @@ -16,7 +51,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'MiniMax', name: 'MiniMax Hailuo 02', model: 'minimax/hailuo-02', - durationSeconds: 10, + costs_currency: 'usd-cents', + costs: { 'per-video': 56 }, + output_cost_key: 'per-video', + durationSeconds: [10], dimensions: ['1366x768', '1920x1080'], fps: [25], keyframes: ['first'], @@ -28,7 +66,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Google', name: 'Veo 2.0', model: 'google/veo-2.0', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 250 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: ['1280x720', '720x1280'], fps: [24], keyframes: ['first', 'last'], @@ -40,7 +81,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Google', name: 'Veo 3.0', model: 'google/veo-3.0', - durationSeconds: 8, + costs_currency: 'usd-cents', + costs: { 'per-video': 160 }, + output_cost_key: 'per-video', + durationSeconds: [8], dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'], fps: [24], keyframes: ['first'], @@ -52,7 +96,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Google', name: 'Veo 3.0 + Audio', model: 'google/veo-3.0-audio', - durationSeconds: 8, + costs_currency: 'usd-cents', + costs: { 'per-video': 320 }, + output_cost_key: 'per-video', + durationSeconds: [8], dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'], fps: [24], keyframes: ['first'], @@ -64,7 +111,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Google', name: 'Veo 3.0 Fast', model: 'google/veo-3.0-fast', - durationSeconds: 8, + costs_currency: 'usd-cents', + costs: { 'per-video': 80 }, + output_cost_key: 'per-video', + durationSeconds: [8], dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'], fps: [24], keyframes: ['first'], @@ -76,7 +126,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Google', name: 'Veo 3.0 Fast + Audio', model: 'google/veo-3.0-fast-audio', - durationSeconds: 8, + costs_currency: 'usd-cents', + costs: { 'per-video': 120 }, + output_cost_key: 'per-video', + durationSeconds: [8], dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'], fps: [24], keyframes: ['first'], @@ -88,7 +141,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'ByteDance', name: 'Seedance 1.0 Lite', model: 'ByteDance/Seedance-1.0-lite', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 14 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: [ '864x480', '736x544', @@ -111,7 +167,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'ByteDance', name: 'Seedance 1.0 Pro', model: 'ByteDance/Seedance-1.0-pro', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 57 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: [ '864x480', '736x544', @@ -134,7 +193,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'PixVerse', name: 'PixVerse v5', model: 'pixverse/pixverse-v5', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 30 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: [ '640x360', '480x360', @@ -167,7 +229,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Kuaishou', name: 'Kling 2.1 Master', model: 'kwaivgI/kling-2.1-master', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 92 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: ['1920x1080', '1080x1080', '1080x1920'], fps: [24], keyframes: ['first'], @@ -179,7 +244,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Kuaishou', name: 'Kling 2.1 Standard', model: 'kwaivgI/kling-2.1-standard', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 18 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: ['1920x1080', '1080x1080', '1080x1920'], fps: [24], keyframes: ['first'], @@ -191,7 +259,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Kuaishou', name: 'Kling 2.1 Pro', model: 'kwaivgI/kling-2.1-pro', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 32 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: ['1920x1080', '1080x1080', '1080x1920'], fps: [24], keyframes: ['first', 'last'], @@ -203,7 +274,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Kuaishou', name: 'Kling 2.0 Master', model: 'kwaivgI/kling-2.0-master', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 92 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: ['1280x720', '720x720', '720x1280'], fps: [24], keyframes: ['first'], @@ -215,7 +289,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Kuaishou', name: 'Kling 1.6 Standard', model: 'kwaivgI/kling-1.6-standard', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 19 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: ['1920x1080', '1080x1080', '1080x1920'], fps: [30, 24], keyframes: ['first'], @@ -227,7 +304,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Kuaishou', name: 'Kling 1.6 Pro', model: 'kwaivgI/kling-1.6-pro', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 32 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: ['1920x1080', '1080x1080', '1080x1920'], fps: [24], keyframes: ['first'], @@ -239,6 +319,9 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Wan-AI', name: 'Wan 2.2 I2V', model: 'Wan-AI/Wan2.2-I2V-A14B', + costs_currency: 'usd-cents', + costs: { 'per-video': 31 }, + output_cost_key: 'per-video', durationSeconds: null, dimensions: null, fps: null, @@ -251,6 +334,9 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Wan-AI', name: 'Wan 2.2 T2V', model: 'Wan-AI/Wan2.2-T2V-A14B', + costs_currency: 'usd-cents', + costs: { 'per-video': 66 }, + output_cost_key: 'per-video', durationSeconds: null, dimensions: null, fps: null, @@ -263,7 +349,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Vidu', name: 'Vidu 2.0', model: 'vidu/vidu-2.0', - durationSeconds: 8, + costs_currency: 'usd-cents', + costs: { 'per-video': 28 }, + output_cost_key: 'per-video', + durationSeconds: [8], dimensions: [ '1920x1080', '1080x1080', @@ -285,7 +374,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'Vidu', name: 'Vidu Q1', model: 'vidu/vidu-q1', - durationSeconds: 5, + costs_currency: 'usd-cents', + costs: { 'per-video': 22 }, + output_cost_key: 'per-video', + durationSeconds: [5], dimensions: ['1920x1080', '1080x1080', '1080x1920'], fps: [24], keyframes: ['first', 'last'], @@ -297,7 +389,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'OpenAI', name: 'Sora 2', model: 'openai/sora-2', - durationSeconds: 8, + costs_currency: 'usd-cents', + costs: { 'per-video': 80 }, + output_cost_key: 'per-video', + durationSeconds: [8], dimensions: ['1280x720', '720x1280'], fps: null, keyframes: ['first'], @@ -309,7 +404,10 @@ export const TOGETHER_VIDEO_GENERATION_MODELS = [ organization: 'OpenAI', name: 'Sora 2 Pro', model: 'openai/sora-2-pro', - durationSeconds: 8, + costs_currency: 'usd-cents', + costs: { 'per-video': 400 }, + output_cost_key: 'per-video', + durationSeconds: [8], dimensions: ['1280x720', '720x1280'], fps: null, keyframes: ['first'], diff --git a/src/backend/src/services/ai/video/providers/types.ts b/src/backend/src/services/ai/video/providers/types.ts new file mode 100644 index 000000000..2b378d48e --- /dev/null +++ b/src/backend/src/services/ai/video/providers/types.ts @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +export interface IVideoModel { + id: string; + name: string; + puterId?: string; + provider?: string; + aliases?: string[]; + description?: string; + version?: string; + costs_currency?: string; + index_cost_key?: string; + output_cost_key?: string; + costs?: Record; + durationSeconds?: number[] | null; + dimensions?: string[] | null; + defaultUsageKey?: string; + organization?: string; + model?: string; + fps?: number[] | null; + keyframes?: string[] | null; + promptLength?: { min: number; max: number } | null; + promptSupported?: boolean | null; +} + +export interface IGenerateVideoParams { + prompt: string; + model?: string; + provider?: string; + test_mode?: boolean; + seconds?: number | string; + duration?: number | string; + size?: string; + resolution?: string; + width?: number; + height?: number; + fps?: number; + steps?: number; + guidance_scale?: number; + seed?: number; + output_format?: string; + output_quality?: number; + negative_prompt?: string; + reference_images?: string[]; + frame_images?: object[]; + metadata?: object; + input_reference?: unknown; + no_extra_params?: boolean; +} + +export interface IVideoProvider { + generate (params: IGenerateVideoParams): Promise; + models (): Promise | IVideoModel[]; + getDefaultModel (): string; +} diff --git a/src/backend/src/services/drivers/DriverService.js b/src/backend/src/services/drivers/DriverService.js index 775f4b9a9..47c3b09b6 100644 --- a/src/backend/src/services/drivers/DriverService.js +++ b/src/backend/src/services/drivers/DriverService.js @@ -293,7 +293,7 @@ class DriverService extends BaseService { 'puter-speech2txt': 'openai-speech2txt', 'puter-chat-completion': 'openai-completion', 'puter-image-generation': 'openai-image-generation', - 'puter-video-generation': 'openai-video-generation', + 'puter-video-generation': 'ai-video', 'puter-apps': 'es:app', 'puter-subdomains': 'es:subdomain', 'puter-notifications': 'es:notification', diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js index a8712fdd7..f5eadcf06 100644 --- a/src/puter-js/src/modules/AI.js +++ b/src/puter-js/src/modules/AI.js @@ -11,16 +11,6 @@ const normalizeTTSProvider = (value) => { return value; }; -const TOGETHER_VIDEO_MODEL_PREFIXES = [ - 'minimax/', - 'google/', - 'bytedance/', - 'pixverse/', - 'kwaivgi/', - 'vidu/', - 'wan-ai/', -]; - class AI { /** * Creates a new instance with the given authentication token, API origin, and app ID, @@ -908,39 +898,19 @@ class AI { throw ({ message: 'Prompt parameter is required', code: 'prompt_required' }); } - if ( ! options.model ) { - options.model = 'sora-2'; - } - if ( options.duration !== undefined && options.seconds === undefined ) { options.seconds = options.duration; } - // This sucks, should be backend's job like we do for chat models now - let videoService = 'openai-video-generation'; + if ( options.test_mode === true ) { + testMode = true; + } + + let videoService = 'ai-video'; const driverHint = typeof options.driver === 'string' ? options.driver : undefined; - const driverHintLower = driverHint ? driverHint.toLowerCase() : undefined; - const providerRaw = typeof options.provider === 'string' - ? options.provider - : (typeof options.service === 'string' ? options.service : undefined); - const providerHint = typeof providerRaw === 'string' ? providerRaw.toLowerCase() : undefined; - const modelLower = typeof options.model === 'string' ? options.model.toLowerCase() : ''; - const looksLikeTogetherVideoModel = typeof options.model === 'string' && - (TOGETHER_VIDEO_MODEL_PREFIXES.some(prefix => modelLower.startsWith(prefix)) || options.model.startsWith('togetherai:')); - - if ( driverHintLower === 'together' || driverHintLower === 'together-ai' ) { - videoService = 'together-video-generation'; - } else if ( driverHintLower === 'together-video-generation' ) { - videoService = 'together-video-generation'; - } else if ( driverHintLower === 'openai' ) { - videoService = 'openai-video-generation'; - } else if ( driverHint ) { + if ( driverHint ) { videoService = driverHint; - } else if ( providerHint === 'together' || providerHint === 'together-ai' ) { - videoService = 'together-video-generation'; - } else if ( looksLikeTogetherVideoModel ) { - videoService = 'together-video-generation'; } return await utils.make_driver_method(['prompt'], 'puter-video-generation', videoService, 'generate', { diff --git a/src/puter-js/types/modules/ai.d.ts b/src/puter-js/types/modules/ai.d.ts index b997afd07..e97d0995a 100644 --- a/src/puter-js/types/modules/ai.d.ts +++ b/src/puter-js/types/modules/ai.d.ts @@ -82,8 +82,10 @@ export interface Txt2ImgOptions { export interface Txt2VidOptions { prompt?: string; provider?: string; + driver?: string; model?: string; seconds?: number; + duration?: number; test_mode?: boolean; // OpenAI options