diff --git a/src/backend/src/services/ai/chat/providers/GeminiProvider/models.ts b/src/backend/src/services/ai/chat/providers/GeminiProvider/models.ts index 5b24279b4..89fb04af9 100644 --- a/src/backend/src/services/ai/chat/providers/GeminiProvider/models.ts +++ b/src/backend/src/services/ai/chat/providers/GeminiProvider/models.ts @@ -112,28 +112,6 @@ export const GEMINI_MODELS: IChatModel[] = [ }, max_tokens: 200_000, }, - { - puterId: 'google:google/gemini-3-pro-preview', - id: 'gemini-3-pro-preview', - modalities: { 'input': ['text', 'image', 'video', 'audio', 'pdf'], 'output': ['text'] }, - open_weights: false, - tool_call: true, - knowledge: '2025-01', - release_date: '2025-11-18', - name: 'Gemini 3 Pro', - aliases: ['google/gemini-3-pro-preview'], - context: 1_048_576, - costs_currency: 'usd-cents', - input_cost_key: 'prompt_tokens', - output_cost_key: 'completion_tokens', - costs: { - tokens: 1_000_000, - prompt_tokens: 200, - completion_tokens: 1200, - cached_tokens: 20, - }, - max_tokens: 200_000, - }, { puterId: 'google:google/gemini-3.1-pro-preview', id: 'gemini-3.1-pro-preview', @@ -178,4 +156,26 @@ export const GEMINI_MODELS: IChatModel[] = [ }, max_tokens: 65536, }, + { + puterId: 'google:google/gemini-3.1-flash-lite-preview', + id: 'gemini-3.1-flash-lite-preview', + modalities: { 'input': ['text', 'image', 'video', 'audio', 'pdf'], 'output': ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2025-01', + release_date: '2026-03-18', + name: 'Gemini 3.1 Flash-Lite', + aliases: ['google/gemini-3.1-flash-lite-preview'], + context: 1_048_576, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 25, + completion_tokens: 150, + cached_tokens: 2.5, + }, + max_tokens: 65536, + }, ]; diff --git a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts index 656e23d08..a9ca08fe0 100644 --- a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts +++ b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/GeminiImageGenerationProvider.ts @@ -22,7 +22,7 @@ import APIError from '../../../../../api/APIError.js'; import { ErrorService } from '../../../../../modules/core/ErrorService.js'; import { Context } from '../../../../../util/context.js'; import { MeteringService } from '../../../../MeteringService/MeteringService.js'; -import { GEMINI_DEFAULT_RATIO, GEMINI_ESTIMATED_IMAGE_TOKENS, GEMINI_IMAGE_GENERATION_MODELS } from './models.js'; +import { GEMINI_DEFAULT_RATIO, GEMINI_ESTIMATED_IMAGE_TOKENS, GEMINI_IMAGE_GENERATION_MODELS, IGeminiImageModel } from './models.js'; import { IGenerateParams, IImageModel, IImageProvider } from '../types.js'; const MIME_SIGNATURES: Record = { @@ -65,7 +65,8 @@ export class GeminiImageGenerationProvider implements IImageProvider { const { prompt, test_mode, input_image, input_image_mime_type, model, quality } = params; let { ratio, input_images } = params; - const selectedModel = this.models().find(m => m.id === model) || this.models().find(m => m.id === this.getDefaultModel())!; + const selectedModel = (this.models() as IGeminiImageModel[]).find(m => m.id === model) + || (this.models() as IGeminiImageModel[]).find(m => m.id === this.getDefaultModel())!; if ( test_mode ) { return 'https://puter-sample-data.puter.site/image_example.png'; @@ -75,6 +76,10 @@ export class GeminiImageGenerationProvider implements IImageProvider { throw new Error('`prompt` must be a non-empty string'); } + if ( selectedModel.apiType === 'generateImages' ) { + return this.#generateWithImagen(prompt, selectedModel, params); + } + const allowedRatios = selectedModel.allowedRatios ?? [GEMINI_DEFAULT_RATIO]; ratio = ratio && this.#isValidRatio(ratio, allowedRatios) ? ratio : allowedRatios[0]; @@ -197,6 +202,64 @@ export class GeminiImageGenerationProvider implements IImageProvider { return url; } + async #generateWithImagen (prompt: string, selectedModel: IGeminiImageModel, params: IGenerateParams): Promise { + const actor = Context.get('actor'); + if ( ! actor ) { + throw new Error('actor not found in context'); + } + const costCents = selectedModel.costs?.['per-image']; + if ( costCents === undefined ) { + throw new Error(`No per-image cost configured for model '${selectedModel.id}'`); + } + const costInMicroCents = Math.ceil(costCents * 1_000_000); + + const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents); + if ( ! usageAllowed ) { + throw APIError.create('insufficient_funds'); + } + + const allowedRatios = selectedModel.allowedRatios ?? [GEMINI_DEFAULT_RATIO]; + const ratio = params.ratio && this.#isValidRatio(params.ratio, allowedRatios) + ? params.ratio : allowedRatios[0]; + const aspectRatio = `${ratio.w}:${ratio.h}`; + + const config: Record = { + numberOfImages: 1, + aspectRatio, + }; + + if ( params.quality && selectedModel.allowedQualityLevels?.includes(params.quality) ) { + config.imageSize = params.quality; + } + + const response = await this.#client.models.generateImages({ + model: selectedModel.id, + prompt, + config, + }); + + const generated = response?.generatedImages; + if ( !generated || generated.length === 0 ) { + throw new Error('Imagen response did not include an image'); + } + + const entry = generated[0]; + if ( entry.raiFilteredReason ) { + throw new Error(`Image was filtered: ${entry.raiFilteredReason}`); + } + + const image = entry.image; + if ( ! image?.imageBytes ) { + throw new Error('Imagen response did not include image bytes'); + } + + const usageKey = `gemini:${selectedModel.id}`; + await this.#meteringService.incrementUsage(actor, usageKey, 1, costInMicroCents); + + const mimeType = image.mimeType ?? 'image/png'; + return `data:${mimeType};base64,${image.imageBytes}`; + } + #buildContents (prompt: string, input_images?: string[], input_image_mime_type?: string) { const parts: Record[] = [{ text: prompt }]; diff --git a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts index eb057bb52..d59417bc7 100644 --- a/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts +++ b/src/backend/src/services/ai/image/providers/GeminiImageGenerationProvider/models.ts @@ -19,6 +19,10 @@ import { IImageModel } from '../types'; +export interface IGeminiImageModel extends IImageModel { + apiType?: 'generateContent' | 'generateImages'; +} + export const GEMINI_DEFAULT_RATIO = { w: 1024, h: 1024 }; // Estimated image output token counts for pre-flight cost checks. @@ -37,7 +41,7 @@ export const GEMINI_ESTIMATED_IMAGE_TOKENS: Record = { 'gemini-3.1-flash-image-preview:4K': 2520, }; -export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ +export const GEMINI_IMAGE_GENERATION_MODELS: IGeminiImageModel[] = [ { puterId: 'google:google/gemini-2.5-flash-image', id: 'gemini-2.5-flash-image', @@ -148,4 +152,80 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [ '1K:1x1': 6.7, }, }, + + // ── Imagen models (use generateImages API) ───────────────────── + { + puterId: 'google:google/imagen-4.0-fast', + id: 'imagen-4.0-fast-generate-001', + apiType: 'generateImages', + name: 'Imagen 4.0 Fast', + version: '1.0', + costs_currency: 'usd-cents', + index_cost_key: 'per-image', + aliases: [ + 'imagen-4.0-fast', + 'google/imagen-4.0-fast', + 'google:google/imagen-4.0-fast', + ], + allowedRatios: [ + { w: 1, h: 1 }, + { w: 3, h: 4 }, + { w: 4, h: 3 }, + { w: 9, h: 16 }, + { w: 16, h: 9 }, + ], + costs: { + 'per-image': 2, // $0.02 per image + }, + }, + { + puterId: 'google:google/imagen-4.0', + id: 'imagen-4.0-generate-001', + apiType: 'generateImages', + name: 'Imagen 4.0', + version: '1.0', + costs_currency: 'usd-cents', + index_cost_key: 'per-image', + aliases: [ + 'imagen-4.0', + 'google/imagen-4.0', + 'google:google/imagen-4.0', + ], + allowedQualityLevels: ['1K', '2K'], + allowedRatios: [ + { w: 1, h: 1 }, + { w: 3, h: 4 }, + { w: 4, h: 3 }, + { w: 9, h: 16 }, + { w: 16, h: 9 }, + ], + costs: { + 'per-image': 4, // $0.04 per image + }, + }, + { + puterId: 'google:google/imagen-4.0-ultra', + id: 'imagen-4.0-ultra-generate-001', + apiType: 'generateImages', + name: 'Imagen 4.0 Ultra', + version: '1.0', + costs_currency: 'usd-cents', + index_cost_key: 'per-image', + aliases: [ + 'imagen-4.0-ultra', + 'google/imagen-4.0-ultra', + 'google:google/imagen-4.0-ultra', + ], + allowedQualityLevels: ['1K', '2K'], + allowedRatios: [ + { w: 1, h: 1 }, + { w: 3, h: 4 }, + { w: 4, h: 3 }, + { w: 9, h: 16 }, + { w: 16, h: 9 }, + ], + costs: { + 'per-image': 6, // $0.06 per image + }, + }, ]; diff --git a/src/backend/src/services/ai/video/AIVideoGenerationService.ts b/src/backend/src/services/ai/video/AIVideoGenerationService.ts index 8c9a8283b..9710bd8a9 100644 --- a/src/backend/src/services/ai/video/AIVideoGenerationService.ts +++ b/src/backend/src/services/ai/video/AIVideoGenerationService.ts @@ -23,6 +23,7 @@ import BaseService from '../../BaseService.js'; import { DriverService } from '../../drivers/DriverService.js'; import { EventService } from '../../EventService.js'; import { MeteringService } from '../../MeteringService/MeteringService.js'; +import { GeminiVideoGenerationProvider } from './providers/GeminiVideoGenerationProvider/GeminiVideoGenerationProvider.js'; import { OpenAIVideoGenerationProvider } from './providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.js'; import { TogetherVideoGenerationProvider } from './providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.js'; import { IGenerateVideoParams, IVideoModel, IVideoProvider } from './providers/types.js'; @@ -108,6 +109,14 @@ export class AIVideoGenerationService extends BaseService { ); } + const geminiVideoConfig = this.config.providers?.['gemini-video-generation'] || this.global_config?.services?.gemini; + if ( geminiVideoConfig && (geminiVideoConfig.apiKey || geminiVideoConfig.secret_key) ) { + this.#providers['gemini-video-generation'] = new GeminiVideoGenerationProvider( + { apiKey: geminiVideoConfig.apiKey || geminiVideoConfig.secret_key }, + this.meteringService, + ); + } + // emit event for extensions to add providers const extensionProviders = {} as Record; await this.eventService.emit('ai.video.registerProviders', extensionProviders); diff --git a/src/backend/src/services/ai/video/providers/GeminiVideoGenerationProvider/GeminiVideoGenerationProvider.ts b/src/backend/src/services/ai/video/providers/GeminiVideoGenerationProvider/GeminiVideoGenerationProvider.ts new file mode 100644 index 000000000..af8d5aa10 --- /dev/null +++ b/src/backend/src/services/ai/video/providers/GeminiVideoGenerationProvider/GeminiVideoGenerationProvider.ts @@ -0,0 +1,296 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import { GoogleGenAI, GenerateVideosOperation, GenerateVideosParameters } from '@google/genai'; +import APIError from '../../../../../api/APIError.js'; +import { Context } from '../../../../../util/context.js'; +import { MeteringService } from '../../../../MeteringService/MeteringService.js'; +import { IGenerateVideoParams, IVideoModel, IVideoProvider } from '../types.js'; +import { TypedValue } from '../../../../drivers/meta/Runtime.js'; +import { GEMINI_VIDEO_GENERATION_MODELS, IGeminiVideoModel } from './models.js'; + +const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4'; +const POLL_INTERVAL_MS = 10_000; +const DEFAULT_TIMEOUT_MS = 10 * 60 * 1000; + +const DIMENSION_MAP: Record = { + '1280x720': { aspectRatio: '16:9', resolution: '720p' }, + '720x1280': { aspectRatio: '9:16', resolution: '720p' }, + '1920x1080': { aspectRatio: '16:9', resolution: '1080p' }, + '1080x1920': { aspectRatio: '9:16', resolution: '1080p' }, + '3840x2160': { aspectRatio: '16:9', resolution: '4k' }, + '2160x3840': { aspectRatio: '9:16', resolution: '4k' }, +}; + +export class GeminiVideoGenerationProvider implements IVideoProvider { + #client: GoogleGenAI; + #meteringService: MeteringService; + + constructor (config: { apiKey: string }, meteringService: MeteringService) { + if ( ! config.apiKey ) { + throw new Error('Gemini video generation requires an API key'); + } + this.#client = new GoogleGenAI({ apiKey: config.apiKey }); + this.#meteringService = meteringService; + } + + getDefaultModel (): string { + return GEMINI_VIDEO_GENERATION_MODELS[0].id; + } + + async models (): Promise { + return GEMINI_VIDEO_GENERATION_MODELS.map(model => ({ + ...model, + aliases: [model.id, `google/${model.id}`], + })); + } + + async generate (params: IGenerateVideoParams): Promise { + const { + prompt, + model: requestedModel, + seconds, + duration, + size, + resolution, + negative_prompt: negativePrompt, + reference_images: referenceImages, + input_reference: inputReference, + last_frame: lastFrame, + test_mode: testMode, + } = params ?? {}; + + if ( typeof prompt !== 'string' || !prompt.trim() ) { + throw APIError.create('field_invalid', null, { + key: 'prompt', + expected: 'a non-empty string', + got: prompt, + }); + } + + const selectedModel = this.#getModel(requestedModel); + + if ( testMode ) { + return new TypedValue({ + $: 'string:url:web', + content_type: 'video', + }, DEFAULT_TEST_VIDEO_URL); + } + + const hasFirstFrame = selectedModel.supportsImageInput + && typeof inputReference === 'string' && inputReference.trim().length > 0; + const hasRefImages = selectedModel.supportsReferenceImages + && Array.isArray(referenceImages) && referenceImages.length > 0; + + const { aspectRatio, videoResolution } = this.#resolveAspectAndResolution(size, selectedModel); + + // 1080p and 4K require duration=8 + const isHighRes = videoResolution === '1080p' || videoResolution === '4k'; + let durationSeconds = this.#coercePositiveInteger(seconds ?? duration) + ?? selectedModel.durationSeconds?.[0] ?? 8; + if ( isHighRes || hasRefImages ) { + durationSeconds = 8; + } + + const is4K = videoResolution === '4k'; + const is1080p = videoResolution === '1080p'; + const perSecondCents = is4K + ? selectedModel.costs?.['per-second-4k'] ?? selectedModel.costs?.['per-second'] + : is1080p + ? selectedModel.costs?.['per-second-1080p'] ?? selectedModel.costs?.['per-second'] + : selectedModel.costs?.['per-second']; + if ( perSecondCents === undefined ) { + throw new Error(`No per-second cost configured for video model '${selectedModel.id}'`); + } + const costCents = perSecondCents * durationSeconds; + const costInMicroCents = Math.ceil(costCents * 1_000_000); + + const actor = Context.get('actor'); + if ( ! actor ) { + throw new Error('actor not found in context'); + } + + const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents); + if ( ! usageAllowed ) { + throw APIError.create('insufficient_funds'); + } + + const config: Record = { + numberOfVideos: 1, + durationSeconds, + }; + + if ( aspectRatio ) config.aspectRatio = aspectRatio; + if ( videoResolution && selectedModel.resolutions.length > 0 ) { + config.resolution = videoResolution; + } + if ( typeof negativePrompt === 'string' && negativePrompt.trim() ) { + config.negativePrompt = negativePrompt; + } + + // Reference images (Veo 3.1 supports up to 3) + // When referenceImages is set, image (first frame), video, and lastFrame are not supported. + if ( hasRefImages ) { + const validImages = referenceImages + .filter((img: string) => typeof img === 'string' && img.trim().length > 0) + .slice(0, 3); + config.referenceImages = validImages.map((img: string) => ({ + image: this.#parseImageInput(img), + referenceType: 'asset', + })); + } + + if ( !hasRefImages && typeof lastFrame === 'string' && lastFrame.trim() ) { + config.lastFrame = this.#parseImageInput(lastFrame); + } + + const generateParams: GenerateVideosParameters = { + model: selectedModel.id, + prompt, + config, + }; + + // First frame (image-to-video) + if ( hasFirstFrame && !hasRefImages ) { + generateParams.image = this.#parseImageInput(inputReference as string); + } + + let operation: GenerateVideosOperation; + try { + operation = await this.#client.models.generateVideos(generateParams); + } catch (e) { + console.error('Gemini video generation error:', e); + throw e; + } + + const completed = await this.#pollUntilComplete(operation); + + const generatedVideos = completed.response?.generatedVideos; + if ( !generatedVideos || generatedVideos.length === 0 ) { + const filtered = completed.response?.raiMediaFilteredCount ?? 0; + if ( filtered > 0 ) { + const reasons = completed.response?.raiMediaFilteredReasons?.join(', ') || 'content policy'; + throw new Error(`Video was filtered due to ${reasons}`); + } + throw new Error('Gemini response did not include a video'); + } + + const video = generatedVideos[0].video; + if ( ! video ) { + throw new Error('Gemini response video entry was empty'); + } + + const resTier = is4K ? ':4k' : is1080p && selectedModel.costs?.['per-second-1080p'] ? ':1080p' : ''; + const usageKey = `gemini:${selectedModel.id}${resTier}`; + await this.#meteringService.incrementUsage(actor, usageKey, durationSeconds, costInMicroCents); + + if ( video.uri ) { + return new TypedValue({ + $: 'string:url:web', + content_type: 'video', + }, video.uri); + } + + if ( video.videoBytes ) { + const mimeType = video.mimeType ?? 'video/mp4'; + const dataUri = `data:${mimeType};base64,${video.videoBytes}`; + return new TypedValue({ + $: 'string:url:data', + content_type: 'video', + }, dataUri); + } + + throw new Error('Gemini video response contained neither uri nor videoBytes'); + } + + async #pollUntilComplete (operation: GenerateVideosOperation): Promise { + let op = operation; + const start = Date.now(); + + while ( !op.done ) { + if ( Date.now() - start > DEFAULT_TIMEOUT_MS ) { + throw new Error('Timed out waiting for Gemini video generation to complete'); + } + + await this.#delay(POLL_INTERVAL_MS); + op = await this.#client.operations.getVideosOperation({ operation: op }); + } + + if ( op.error ) { + const msg = (op.error as Record).message ?? JSON.stringify(op.error); + throw new Error(`Gemini video generation failed: ${msg}`); + } + + return op; + } + + #parseImageInput (input: string): { imageBytes: string; mimeType: string } { + if ( input.startsWith('data:') ) { + const commaIdx = input.indexOf(','); + if ( commaIdx !== -1 ) { + const header = input.substring(5, commaIdx); + if ( header.endsWith(';base64') ) { + const mimeType = header.substring(0, header.length - 7); + if ( mimeType.length > 0 ) { + return { imageBytes: input.substring(commaIdx + 1), mimeType }; + } + } + } + } + return { imageBytes: input, mimeType: 'image/png' }; + } + + #getModel (requestedModel?: string): IGeminiVideoModel { + return GEMINI_VIDEO_GENERATION_MODELS.find(m => m.id === requestedModel) + ?? GEMINI_VIDEO_GENERATION_MODELS[0]; + } + + #resolveAspectAndResolution ( + size: string | undefined, + model: IGeminiVideoModel, + ): { aspectRatio: string; videoResolution: string | undefined } { + if ( size && DIMENSION_MAP[size] ) { + return { + aspectRatio: DIMENSION_MAP[size].aspectRatio, + videoResolution: DIMENSION_MAP[size].resolution, + }; + } + + return { + aspectRatio: model.aspectRatios[0], + videoResolution: model.resolutions[0], + }; + } + + #coercePositiveInteger (value: unknown): number | undefined { + if ( typeof value === 'number' && Number.isFinite(value) ) { + const rounded = Math.round(value); + return rounded > 0 ? rounded : undefined; + } + if ( typeof value === 'string' ) { + const numeric = Number.parseInt(value, 10); + return Number.isFinite(numeric) && numeric > 0 ? numeric : undefined; + } + return undefined; + } + + async #delay (ms: number): Promise { + return await new Promise(resolve => setTimeout(resolve, ms)); + } +} diff --git a/src/backend/src/services/ai/video/providers/GeminiVideoGenerationProvider/models.ts b/src/backend/src/services/ai/video/providers/GeminiVideoGenerationProvider/models.ts new file mode 100644 index 000000000..263ed7ba7 --- /dev/null +++ b/src/backend/src/services/ai/video/providers/GeminiVideoGenerationProvider/models.ts @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import { IVideoModel } from '../types.js'; + +export interface IGeminiVideoModel extends IVideoModel { + aspectRatios: string[]; + resolutions: string[]; + supportsImageInput: boolean; + supportsReferenceImages: boolean; +} + +// Dimension strings used by the service layer for validation. +const STANDARD_DIMENSIONS = ['1280x720', '720x1280', '1920x1080', '1080x1920']; +const DIMENSIONS_WITH_4K = [...STANDARD_DIMENSIONS, '3840x2160', '2160x3840']; + +// https://ai.google.dev/gemini-api/docs/video +// https://ai.google.dev/gemini-api/docs/pricing +export const GEMINI_VIDEO_GENERATION_MODELS: IGeminiVideoModel[] = [ + { + puterId: 'google:google/veo-2.0', + id: 'veo-2.0-generate-001', + name: 'Veo 2.0', + costs_currency: 'usd-cents', + costs: { 'per-second': 35 }, + output_cost_key: 'per-second', + durationSeconds: [5, 6, 8], + dimensions: ['1280x720', '720x1280'], + aspectRatios: ['16:9', '9:16'], + resolutions: [], + supportsImageInput: true, + supportsReferenceImages: false, + }, + { + puterId: 'google:google/veo-3.0', + id: 'veo-3.0-generate-001', + name: 'Veo 3.0', + costs_currency: 'usd-cents', + costs: { 'per-second': 40 }, + output_cost_key: 'per-second', + durationSeconds: [4, 6, 8], + dimensions: STANDARD_DIMENSIONS, + aspectRatios: ['16:9', '9:16'], + resolutions: ['720p', '1080p'], + supportsImageInput: true, + supportsReferenceImages: false, + }, + { + puterId: 'google:google/veo-3.0-fast', + id: 'veo-3.0-fast-generate-001', + name: 'Veo 3.0 Fast', + costs_currency: 'usd-cents', + costs: { 'per-second': 15 }, + output_cost_key: 'per-second', + durationSeconds: [4, 6, 8], + dimensions: STANDARD_DIMENSIONS, + aspectRatios: ['16:9', '9:16'], + resolutions: ['720p', '1080p'], + supportsImageInput: true, + supportsReferenceImages: false, + }, + { + puterId: 'google:google/veo-3.1', + id: 'veo-3.1-generate-preview', + name: 'Veo 3.1', + costs_currency: 'usd-cents', + costs: { 'per-second': 40, 'per-second-4k': 60 }, + output_cost_key: 'per-second', + durationSeconds: [4, 6, 8], + dimensions: DIMENSIONS_WITH_4K, + aspectRatios: ['16:9', '9:16'], + resolutions: ['720p', '1080p', '4k'], + supportsImageInput: true, + supportsReferenceImages: true, + }, + { + puterId: 'google:google/veo-3.1-fast', + id: 'veo-3.1-fast-generate-preview', + name: 'Veo 3.1 Fast', + costs_currency: 'usd-cents', + costs: { 'per-second': 15, 'per-second-4k': 35 }, + output_cost_key: 'per-second', + durationSeconds: [4, 6, 8], + dimensions: DIMENSIONS_WITH_4K, + aspectRatios: ['16:9', '9:16'], + resolutions: ['720p', '1080p', '4k'], + supportsImageInput: true, + supportsReferenceImages: true, + }, + { + puterId: 'google:google/veo-3.1-lite', + id: 'veo-3.1-lite-generate-preview', + name: 'Veo 3.1 Lite', + costs_currency: 'usd-cents', + costs: { 'per-second': 5, 'per-second-1080p': 8 }, + output_cost_key: 'per-second', + durationSeconds: [4, 6, 8], + dimensions: STANDARD_DIMENSIONS, + aspectRatios: ['16:9', '9:16'], + resolutions: ['720p', '1080p'], + supportsImageInput: true, + supportsReferenceImages: false, + }, +]; diff --git a/src/backend/src/services/ai/video/providers/types.ts b/src/backend/src/services/ai/video/providers/types.ts index 2b378d48e..c5e3a8ad3 100644 --- a/src/backend/src/services/ai/video/providers/types.ts +++ b/src/backend/src/services/ai/video/providers/types.ts @@ -60,6 +60,7 @@ export interface IGenerateVideoParams { negative_prompt?: string; reference_images?: string[]; frame_images?: object[]; + last_frame?: string; metadata?: object; input_reference?: unknown; no_extra_params?: boolean; diff --git a/src/docs/src/AI/txt2img.md b/src/docs/src/AI/txt2img.md index 70035453e..826f887fb 100755 --- a/src/docs/src/AI/txt2img.md +++ b/src/docs/src/AI/txt2img.md @@ -49,14 +49,14 @@ For more details, see the [OpenAI API reference](https://platform.openai.com/doc #### Gemini Options -Available when `provider: 'gemini'` or inferred from model (`gemini-2.5-flash-image-preview`, `gemini-3-pro-image-preview`): +Available when `provider: 'gemini'` or inferred from model: | Option | Type | Description | |--------|------|-------------| | `model` | `String` | Image model to use. | -| `ratio` | `Object` | Currently only `{ w: 1024, h: 1024 }` is supported | -| `input_image` | `String` | Base64 encoded input image for image-to-image generation | -| `input_image_mime_type` | `String` | MIME type of the input image. Options: `'image/png'`, `'image/jpeg'`, `'image/jpg'`, `'image/webp'` | +| `ratio` | `Object` | Aspect ratio as `{ w, h }` (e.g., `{ w: 16, h: 9 }`). | +| `quality` | `String` | Output size tier: `'512'`, `'1K'`, `'2K'`, `'4K'` (availability varies by model) | +| `input_images` | `Array` | Base64 input images for image-to-image (Gemini models only) | #### xAI (Grok) Options diff --git a/src/docs/src/AI/txt2vid.md b/src/docs/src/AI/txt2vid.md index 5ed575ec2..f5db05943 100644 --- a/src/docs/src/AI/txt2vid.md +++ b/src/docs/src/AI/txt2vid.md @@ -31,14 +31,13 @@ Additional settings for the generation request. Available options depend on the | Option | Type | Description | |--------|------|-------------| | `prompt` | `String` | Text description for the video generation | -| `provider` | `String` | The AI provider to use. `'openai' (default) \| 'together'` | | `model` | `String` | Video model to use (provider-specific). Defaults to `'sora-2'` | | `seconds` | `Number` | Target clip length in seconds | | `test_mode` | `Boolean` | When `true`, returns a sample video without using credits | #### OpenAI Options -Available when `provider: 'openai'` or inferred from model (`sora-2`, `sora-2-pro`): +Available when using model `sora-2` or `sora-2-pro`: | Option | Type | Description | |--------|------|-------------| @@ -49,9 +48,25 @@ Available when `provider: 'openai'` or inferred from model (`sora-2`, `sora-2-pr For more details about each option, see the [OpenAI API reference](https://platform.openai.com/docs/api-reference/videos/create). +#### Google (Veo) Options + +Available when using a Veo model (`veo-2.0-generate-001`, `veo-3.0-generate-001`, `veo-3.1-generate-preview`, etc.): + +| Option | Type | Description | +|--------|------|-------------| +| `model` | `String` | Video model to use. Available: `'veo-2.0-generate-001'`, `'veo-3.0-generate-001'`, `'veo-3.0-fast-generate-001'`, `'veo-3.1-generate-preview'`, `'veo-3.1-fast-generate-preview'`, `'veo-3.1-lite-generate-preview'` | +| `seconds` | `Number` | Target clip length in seconds. Veo 2.0: `5`, `6`, `8`. Veo 3.x: `4`, `6`, `8`. Note: 1080p and 4K output require `seconds: 8` | +| `size` | `String` | Output dimensions (e.g., `'1280x720'`, `'1920x1080'`, `'3840x2160'`). `resolution` is an alias. 4K sizes only available on Veo 3.1 models | +| `negative_prompt` | `String` | Text describing what to avoid in the video | +| `input_reference` | `String` | Base64 image used as the first frame (image-to-video). | +| `reference_images` | `Array` | Up to 3 base64 images used as style/asset references. Supported on Veo 3.1 models only | +| `last_frame` | `String` | Base64 image used as the last frame | + +For more details, see the [Google Veo API reference](https://ai.google.dev/gemini-api/docs/video). + #### TogetherAI Options -Available when `provider: 'together'` or inferred from model: +Available when using a TogetherAI model: | Option | Type | Description | |--------|------|-------------| @@ -76,7 +91,7 @@ Any properties not set fall back to provider defaults. A `Promise` that resolves to an `HTMLVideoElement`. The element is preloaded, has `controls` enabled, and exposes metadata via `data-mime-type` and `data-source` attributes. Append it to the DOM to display the generated clip immediately. -> **Note:** Real Sora renders can take a couple of minutes to complete. The returned promise resolves only when the MP4 is ready, so keep your UI responsive (for example, by showing a spinner) while you wait. Each successful generation consumes the user’s AI credits in accordance with the model, duration, and resolution you request. +> **Note:** Video generation can take several minutes to complete. The returned promise resolves only when the video is ready, so keep your UI responsive (for example, by showing a spinner) while you wait. Each successful generation consumes the user’s AI credits in accordance with the model, duration, and resolution you request. ## Examples