diff --git a/src/backend/src/modules/ai/PuterAIChatModule.js b/src/backend/src/modules/ai/PuterAIChatModule.js index 64cc4e710..99f93ea43 100644 --- a/src/backend/src/modules/ai/PuterAIChatModule.js +++ b/src/backend/src/modules/ai/PuterAIChatModule.js @@ -28,8 +28,8 @@ import { OpenAISpeechToTextService } from '../../services/ai/stt/OpenAISpeechToT import { AWSPollyService } from '../../services/ai/tts/AWSPollyService.js'; import { ElevenLabsTTSService } from '../../services/ai/tts/ElevenLabsTTSService.js'; import { OpenAITTSService } from '../../services/ai/tts/OpenAITTSService.js'; -import { TogetherVideoGenerationService } from '../../services/ai/video/TogetherVideoGenerationService.js'; -import { OpenAIVideoGenerationService } from '../../services/ai/video/OpenAIVideoGenerationService.js'; +import { TogetherVideoGenerationService } from '../../services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js'; +import { OpenAIVideoGenerationService } from '../../services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js'; // import { AIVideoGenerationService } from '../../services/ai/video/AIVideoGenerationService.js'; /** diff --git a/src/backend/src/services/ChatAPIService.js b/src/backend/src/services/ChatAPIService.js index a9d7c0622..6e8395656 100644 --- a/src/backend/src/services/ChatAPIService.js +++ b/src/backend/src/services/ChatAPIService.js @@ -78,7 +78,7 @@ class ChatAPIService extends BaseService { }); // Return the list of models - res.json({ models: models.filter(e => !["costly", "fake", "abuse", "usage-limited", "model-fallback-test-1"].includes(e)) }); + res.json({ models: models.filter(e => !['costly', 'fake', 'abuse', 'usage-limited', 'model-fallback-test-1'].includes(e)) }); } catch ( error ) { this.log.error('Error fetching models:', error); throw APIError.create('internal_server_error'); @@ -101,7 +101,7 @@ class ChatAPIService extends BaseService { }); // Return the detailed list of models - res.json({ models: models.filter((e) => !["costly", "fake", "abuse", "usage-limited", "model-fallback-test-1"].includes(e.id)) }); + res.json({ models: models.filter((e) => !['costly', 'fake', 'abuse', 'usage-limited', 'model-fallback-test-1'].includes(e.id)) }); } catch ( error ) { this.log.error('Error fetching model details:', error); throw APIError.create('internal_server_error'); @@ -150,9 +150,69 @@ class ChatAPIService extends BaseService { } }, }).attach(router); + + Endpoint({ + route: '/video/models/details', + methods: ['GET'], + handler: async (req, res) => { + try { + const svc_su = this.services.get('su'); + const models = await svc_su.sudo(async () => { + const items = []; + if ( this.services.has('openai-video-generation') ) { + const svc_video = this.services.get('openai-video-generation'); + if ( typeof svc_video.models === 'function' ) { + items.push(...await svc_video.models()); + } + } + if ( this.services.has('together-video-generation') ) { + const svc_video = this.services.get('together-video-generation'); + if ( typeof svc_video.models === 'function' ) { + items.push(...await svc_video.models()); + } + } + return items; + }); + res.json({ models }); + } catch ( error ) { + this.log.error('Error fetching video model details:', error); + throw APIError.create('internal_server_error'); + } + }, + }).attach(router); + + Endpoint({ + route: '/video/models', + methods: ['GET'], + handler: async (req, res) => { + try { + const svc_su = this.services.get('su'); + const models = await svc_su.sudo(async () => { + const items = []; + if ( this.services.has('openai-video-generation') ) { + const svc_video = this.services.get('openai-video-generation'); + if ( typeof svc_video.models === 'function' ) { + items.push(...(await svc_video.models()).map(model => model.puterId || model.id)); + } + } + if ( this.services.has('together-video-generation') ) { + const svc_video = this.services.get('together-video-generation'); + if ( typeof svc_video.models === 'function' ) { + items.push(...(await svc_video.models()).map(model => model.id)); + } + } + return items; + }); + res.json({ models }); + } catch ( error ) { + this.log.error('Error fetching video models:', error); + throw APIError.create('internal_server_error'); + } + }, + }).attach(router); } } module.exports = { ChatAPIService, -}; \ No newline at end of file +}; diff --git a/src/backend/src/services/ai/video/OpenAIVideoGenerationService.js b/src/backend/src/services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js similarity index 88% rename from src/backend/src/services/ai/video/OpenAIVideoGenerationService.js rename to src/backend/src/services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js index c810ec410..b4d4644dc 100644 --- a/src/backend/src/services/ai/video/OpenAIVideoGenerationService.js +++ b/src/backend/src/services/ai/video/OpenAIVideoGenerationService/OpenAIVideoGenerationService.js @@ -17,10 +17,10 @@ * along with this program. If not, see . */ -const APIError = require('../../../api/APIError'); -const BaseService = require('../../BaseService'); -const { TypedValue } = require('../../drivers/meta/Runtime'); -const { Context } = require('../../../util/context'); +const APIError = require('../../../../api/APIError'); +const BaseService = require('../../../BaseService'); +const { TypedValue } = require('../../../drivers/meta/Runtime'); +const { Context } = require('../../../../util/context'); const { Readable } = require('stream'); const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4'; @@ -30,9 +30,23 @@ const DEFAULT_DURATION_SECONDS = 4; const DEFAULT_SIZE = '720x1280'; const ALLOWED_SIZES = new Set(['720x1280', '1280x720', '1024x1792', '1792x1024']); const ALLOWED_SECONDS = new Set(['4', '8', '12']); +const OPENAI_VIDEO_MODELS = [ + { + puterId: 'openai:openai/sora-2', + id: 'sora-2', + aliases: ['openai/sora-2'], + defaultUsageKey: 'openai:sora-2:default', + }, + { + puterId: 'openai:openai/sora-2-pro', + id: 'sora-2-pro', + aliases: ['openai/sora-2-pro'], + defaultUsageKey: 'openai:sora-2-pro:default', + }, +]; class OpenAIVideoGenerationService extends BaseService { - /** @type {import('../../MeteringService/MeteringService').MeteringService} */ + /** @type {import('../../../MeteringService/MeteringService').MeteringService} */ get meteringService () { return this.services.get('meteringService').meteringService; } @@ -42,14 +56,10 @@ class OpenAIVideoGenerationService extends BaseService { }; _construct () { - this.models_ = { - 'sora-2': { - defaultUsageKey: 'openai:sora-2:default', - }, - 'sora-2-pro': { - defaultUsageKey: 'openai:sora-2-pro:default', - }, - }; + this.models_ = Object.fromEntries(OPENAI_VIDEO_MODELS.map(model => [ + model.id, + { defaultUsageKey: model.defaultUsageKey }, + ])); } async _init () { @@ -85,6 +95,10 @@ class OpenAIVideoGenerationService extends BaseService { }, }; + models () { + return OPENAI_VIDEO_MODELS; + } + async generateVideo (params) { const { prompt, @@ -105,7 +119,11 @@ class OpenAIVideoGenerationService extends BaseService { }); } - const model = requestedModel ?? 'sora-2'; + const resolvedModel = OPENAI_VIDEO_MODELS.find(entry => + entry.id === requestedModel || + entry.puterId === requestedModel || + (entry.aliases || []).includes(requestedModel))?.id; + const model = resolvedModel ?? requestedModel ?? 'sora-2'; const modelConfig = this.models_[model]; if ( ! modelConfig ) { throw APIError.create('field_invalid', null, { diff --git a/src/backend/src/services/ai/video/TogetherVideoGenerationService.js b/src/backend/src/services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js similarity index 89% rename from src/backend/src/services/ai/video/TogetherVideoGenerationService.js rename to src/backend/src/services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js index 3476dcd0c..965d44926 100644 --- a/src/backend/src/services/ai/video/TogetherVideoGenerationService.js +++ b/src/backend/src/services/ai/video/TogetherVideoGenerationService/TogetherVideoGenerationService.js @@ -17,10 +17,10 @@ * along with this program. If not, see . */ -const APIError = require('../../../api/APIError'); -const BaseService = require('../../BaseService'); -const { TypedValue } = require('../../drivers/meta/Runtime'); -const { Context } = require('../../../util/context'); +const APIError = require('../../../../api/APIError'); +const BaseService = require('../../../BaseService'); +const { TypedValue } = require('../../../drivers/meta/Runtime'); +const { Context } = require('../../../../util/context'); const { Together } = require('together-ai'); const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4'; @@ -28,10 +28,12 @@ const POLL_INTERVAL_MS = 5_000; const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes const DEFAULT_MODEL = 'minimax/video-01-director'; const DEFAULT_DURATION_SECONDS = 6; -const DEFAULT_USAGE_KEY = 'together-video:default'; +const DEFAULT_USAGE_KEY = 'togetherai:default'; + +let models = []; class TogetherVideoGenerationService extends BaseService { - /** @type {import('../../MeteringService/MeteringService').MeteringService} */ + /** @type {import('../../../MeteringService/MeteringService').MeteringService} */ get meteringService () { return this.services.get('meteringService').meteringService; } @@ -94,7 +96,7 @@ class TogetherVideoGenerationService extends BaseService { }); } - const model = requestedModel ?? DEFAULT_MODEL; + const model = this.#stripTogetherPrefix(requestedModel ?? DEFAULT_MODEL); if ( testMode ) { return new TypedValue({ @@ -196,6 +198,16 @@ class TogetherVideoGenerationService extends BaseService { throw new Error('Together AI response did not include a video URL'); } + async models () { + if ( models.length > 0 ) { + return models; + } + + const { TOGETHER_VIDEO_GENERATION_MODELS } = await import('./models.js'); + models = TOGETHER_VIDEO_GENERATION_MODELS; + return models; + } + async #pollUntilComplete (jobId) { let job = await this.client.videos.retrieve(jobId); const start = Date.now(); @@ -218,11 +230,18 @@ class TogetherVideoGenerationService extends BaseService { #determineUsageKey (model) { if ( typeof model === 'string' && model.trim() ) { - return `together-video:${model}`; + return `togetherai:${model}`; } return DEFAULT_USAGE_KEY; } + #stripTogetherPrefix (model) { + if ( typeof model === 'string' && model.startsWith('togetherai:') ) { + return model.slice('togetherai:'.length); + } + return model; + } + #coercePositiveInteger (value) { if ( typeof value === 'number' && Number.isFinite(value) ) { const rounded = Math.round(value); diff --git a/src/backend/src/services/ai/video/TogetherVideoGenerationService/models.js b/src/backend/src/services/ai/video/TogetherVideoGenerationService/models.js new file mode 100644 index 000000000..1ea54fe97 --- /dev/null +++ b/src/backend/src/services/ai/video/TogetherVideoGenerationService/models.js @@ -0,0 +1,319 @@ +export const TOGETHER_VIDEO_GENERATION_MODELS = [ + { + id: 'togetherai:minimax/video-01-director', + organization: 'MiniMax', + name: 'MiniMax 01 Director', + model: 'minimax/video-01-director', + durationSeconds: 5, + dimensions: ['1366x768'], + fps: [25], + keyframes: ['first'], + promptLength: { min: 2, max: 3000 }, + promptSupported: true, + }, + { + id: 'togetherai:minimax/hailuo-02', + organization: 'MiniMax', + name: 'MiniMax Hailuo 02', + model: 'minimax/hailuo-02', + durationSeconds: 10, + dimensions: ['1366x768', '1920x1080'], + fps: [25], + keyframes: ['first'], + promptLength: { min: 2, max: 3000 }, + promptSupported: true, + }, + { + id: 'togetherai:google/veo-2.0', + organization: 'Google', + name: 'Veo 2.0', + model: 'google/veo-2.0', + durationSeconds: 5, + dimensions: ['1280x720', '720x1280'], + fps: [24], + keyframes: ['first', 'last'], + promptLength: { min: 2, max: 3000 }, + promptSupported: true, + }, + { + id: 'togetherai:google/veo-3.0', + organization: 'Google', + name: 'Veo 3.0', + model: 'google/veo-3.0', + durationSeconds: 8, + dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'], + fps: [24], + keyframes: ['first'], + promptLength: { min: 2, max: 3000 }, + promptSupported: true, + }, + { + id: 'togetherai:google/veo-3.0-audio', + organization: 'Google', + name: 'Veo 3.0 + Audio', + model: 'google/veo-3.0-audio', + durationSeconds: 8, + dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'], + fps: [24], + keyframes: ['first'], + promptLength: { min: 2, max: 3000 }, + promptSupported: true, + }, + { + id: 'togetherai:google/veo-3.0-fast', + organization: 'Google', + name: 'Veo 3.0 Fast', + model: 'google/veo-3.0-fast', + durationSeconds: 8, + dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'], + fps: [24], + keyframes: ['first'], + promptLength: { min: 2, max: 3000 }, + promptSupported: true, + }, + { + id: 'togetherai:google/veo-3.0-fast-audio', + organization: 'Google', + name: 'Veo 3.0 Fast + Audio', + model: 'google/veo-3.0-fast-audio', + durationSeconds: 8, + dimensions: ['1280x720', '720x1280', '1920x1080', '1080x1920'], + fps: [24], + keyframes: ['first'], + promptLength: { min: 2, max: 3000 }, + promptSupported: true, + }, + { + id: 'togetherai:ByteDance/Seedance-1.0-lite', + organization: 'ByteDance', + name: 'Seedance 1.0 Lite', + model: 'ByteDance/Seedance-1.0-lite', + durationSeconds: 5, + dimensions: [ + '864x480', + '736x544', + '640x640', + '960x416', + '416x960', + '1248x704', + '1120x832', + '960x960', + '1504x640', + '640x1504', + ], + fps: [24], + keyframes: ['first', 'last'], + promptLength: { min: 2, max: 3000 }, + promptSupported: true, + }, + { + id: 'togetherai:ByteDance/Seedance-1.0-pro', + organization: 'ByteDance', + name: 'Seedance 1.0 Pro', + model: 'ByteDance/Seedance-1.0-pro', + durationSeconds: 5, + dimensions: [ + '864x480', + '736x544', + '640x640', + '960x416', + '416x960', + '1248x704', + '1120x832', + '960x960', + '1504x640', + '640x1504', + ], + fps: [24], + keyframes: ['first', 'last'], + promptLength: { min: 2, max: 3000 }, + promptSupported: true, + }, + { + id: 'togetherai:pixverse/pixverse-v5', + organization: 'PixVerse', + name: 'PixVerse v5', + model: 'pixverse/pixverse-v5', + durationSeconds: 5, + dimensions: [ + '640x360', + '480x360', + '360x360', + '270x360', + '360x640', + '960x540', + '720x540', + '540x540', + '405x540', + '540x960', + '1280x720', + '960x720', + '720x720', + '540x720', + '720x1280', + '1920x1080', + '1440x1080', + '1080x1080', + '810x1080', + '1080x1920', + ], + fps: [16, 24], + keyframes: ['first', 'last'], + promptLength: { min: 2, max: 2048 }, + promptSupported: true, + }, + { + id: 'togetherai:kwaivgI/kling-2.1-master', + organization: 'Kuaishou', + name: 'Kling 2.1 Master', + model: 'kwaivgI/kling-2.1-master', + durationSeconds: 5, + dimensions: ['1920x1080', '1080x1080', '1080x1920'], + fps: [24], + keyframes: ['first'], + promptLength: { min: 2, max: 2500 }, + promptSupported: true, + }, + { + id: 'togetherai:kwaivgI/kling-2.1-standard', + organization: 'Kuaishou', + name: 'Kling 2.1 Standard', + model: 'kwaivgI/kling-2.1-standard', + durationSeconds: 5, + dimensions: ['1920x1080', '1080x1080', '1080x1920'], + fps: [24], + keyframes: ['first'], + promptLength: null, + promptSupported: false, + }, + { + id: 'togetherai:kwaivgI/kling-2.1-pro', + organization: 'Kuaishou', + name: 'Kling 2.1 Pro', + model: 'kwaivgI/kling-2.1-pro', + durationSeconds: 5, + dimensions: ['1920x1080', '1080x1080', '1080x1920'], + fps: [24], + keyframes: ['first', 'last'], + promptLength: null, + promptSupported: false, + }, + { + id: 'togetherai:kwaivgI/kling-2.0-master', + organization: 'Kuaishou', + name: 'Kling 2.0 Master', + model: 'kwaivgI/kling-2.0-master', + durationSeconds: 5, + dimensions: ['1280x720', '720x720', '720x1280'], + fps: [24], + keyframes: ['first'], + promptLength: { min: 2, max: 2500 }, + promptSupported: true, + }, + { + id: 'togetherai:kwaivgI/kling-1.6-standard', + organization: 'Kuaishou', + name: 'Kling 1.6 Standard', + model: 'kwaivgI/kling-1.6-standard', + durationSeconds: 5, + dimensions: ['1920x1080', '1080x1080', '1080x1920'], + fps: [30, 24], + keyframes: ['first'], + promptLength: { min: 2, max: 2500 }, + promptSupported: true, + }, + { + id: 'togetherai:kwaivgI/kling-1.6-pro', + organization: 'Kuaishou', + name: 'Kling 1.6 Pro', + model: 'kwaivgI/kling-1.6-pro', + durationSeconds: 5, + dimensions: ['1920x1080', '1080x1080', '1080x1920'], + fps: [24], + keyframes: ['first'], + promptLength: null, + promptSupported: false, + }, + { + id: 'togetherai:Wan-AI/Wan2.2-I2V-A14B', + organization: 'Wan-AI', + name: 'Wan 2.2 I2V', + model: 'Wan-AI/Wan2.2-I2V-A14B', + durationSeconds: null, + dimensions: null, + fps: null, + keyframes: null, + promptLength: null, + promptSupported: null, + }, + { + id: 'togetherai:Wan-AI/Wan2.2-T2V-A14B', + organization: 'Wan-AI', + name: 'Wan 2.2 T2V', + model: 'Wan-AI/Wan2.2-T2V-A14B', + durationSeconds: null, + dimensions: null, + fps: null, + keyframes: null, + promptLength: null, + promptSupported: null, + }, + { + id: 'togetherai:vidu/vidu-2.0', + organization: 'Vidu', + name: 'Vidu 2.0', + model: 'vidu/vidu-2.0', + durationSeconds: 8, + dimensions: [ + '1920x1080', + '1080x1080', + '1080x1920', + '1280x720', + '720x720', + '720x1280', + '640x360', + '360x360', + '360x640', + ], + fps: [24], + keyframes: ['first', 'last'], + promptLength: { min: 2, max: 3000 }, + promptSupported: true, + }, + { + id: 'togetherai:vidu/vidu-q1', + organization: 'Vidu', + name: 'Vidu Q1', + model: 'vidu/vidu-q1', + durationSeconds: 5, + dimensions: ['1920x1080', '1080x1080', '1080x1920'], + fps: [24], + keyframes: ['first', 'last'], + promptLength: { min: 2, max: 3000 }, + promptSupported: true, + }, + { + id: 'togetherai:openai/sora-2', + organization: 'OpenAI', + name: 'Sora 2', + model: 'openai/sora-2', + durationSeconds: 8, + dimensions: ['1280x720', '720x1280'], + fps: null, + keyframes: ['first'], + promptLength: { min: 1, max: 4000 }, + promptSupported: true, + }, + { + id: 'togetherai:openai/sora-2-pro', + organization: 'OpenAI', + name: 'Sora 2 Pro', + model: 'openai/sora-2-pro', + durationSeconds: 8, + dimensions: ['1280x720', '720x1280'], + fps: null, + keyframes: ['first'], + promptLength: { min: 1, max: 4000 }, + promptSupported: true, + }, +]; diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js index 7c62e17d7..9cba00ad3 100644 --- a/src/puter-js/src/modules/AI.js +++ b/src/puter-js/src/modules/AI.js @@ -70,7 +70,7 @@ class AI { const tryFetchModels = async () => { const resp = await fetch(`${this.APIOrigin }/puterai/chat/models/details`, { headers }); - if ( !resp.ok ) return null; + if ( ! resp.ok ) return null; const data = await resp.json(); const models = Array.isArray(data?.models) ? data.models : []; return provider ? models.filter(model => model.provider === provider) : models; @@ -298,13 +298,13 @@ class AI { if ( ! options.voice ) { options.voice = '21m00Tcm4TlvDq8ikWAM'; } - if ( ! options.model && typeof options.engine === 'string' ) { + if ( !options.model && typeof options.engine === 'string' ) { options.model = options.engine; } if ( ! options.model ) { options.model = 'eleven_multilingual_v2'; } - if ( ! options.output_format && !options.response_format ) { + if ( !options.output_format && !options.response_format ) { options.output_format = 'mp3_44100_128'; } if ( options.response_format && !options.output_format ) { @@ -835,8 +835,8 @@ class AI { options.model = 'gemini-2.5-flash-image-preview'; } - if (options.model === "nano-banana-pro") { - options.model = "gemini-3-pro-image-preview"; + if ( options.model === 'nano-banana-pro' ) { + options.model = 'gemini-3-pro-image-preview'; } const driverHint = typeof options.driver === 'string' ? options.driver : undefined; @@ -912,6 +912,7 @@ class AI { options.seconds = options.duration; } + // This sucks, should be backend's job like we do for chat models now let videoService = 'openai-video-generation'; const driverHint = typeof options.driver === 'string' ? options.driver : undefined; const driverHintLower = driverHint ? driverHint.toLowerCase() : undefined; @@ -922,7 +923,7 @@ class AI { const modelLower = typeof options.model === 'string' ? options.model.toLowerCase() : ''; const looksLikeTogetherVideoModel = typeof options.model === 'string' && - TOGETHER_VIDEO_MODEL_PREFIXES.some(prefix => modelLower.startsWith(prefix)); + (TOGETHER_VIDEO_MODEL_PREFIXES.some(prefix => modelLower.startsWith(prefix)) || options.model.startsWith('togetherai:')); if ( driverHintLower === 'together' || driverHintLower === 'together-ai' ) { videoService = 'together-video-generation'; @@ -958,7 +959,8 @@ class AI { return result; } - const video = (globalThis.document?.createElement('video') || {setAttribute: ()=>{}}); + const video = (globalThis.document?.createElement('video') || { setAttribute: () => { + } }); video.src = sourceUrl; video.controls = true; video.preload = 'metadata';