add google video native provider, imagen models (#2759)
Docker Image CI / build-and-push-image (push) Has been cancelled
Maintain Release Merge PR / update-release-pr (push) Has been cancelled
Notify HeyPuter / notify (push) Has been cancelled
release-please / release-please (push) Has been cancelled
test / test-backend (24.x) (push) Has been cancelled
test / API tests (node env, api-test) (24.x) (push) Has been cancelled
test / puterjs (node env, vitest) (24.x) (push) Has been cancelled

* update gemini .chat models, add imagen, add gemini veo

* add models

* update documentation

* add veo 3.1 lite, 1080p pricing
This commit is contained in:
Shruc
2026-04-02 23:03:38 +03:00
committed by GitHub
parent 0505a5df1f
commit 5e6aff63c4
9 changed files with 617 additions and 33 deletions
@@ -112,28 +112,6 @@ export const GEMINI_MODELS: IChatModel[] = [
},
max_tokens: 200_000,
},
{
puterId: 'google:google/gemini-3-pro-preview',
id: 'gemini-3-pro-preview',
modalities: { 'input': ['text', 'image', 'video', 'audio', 'pdf'], 'output': ['text'] },
open_weights: false,
tool_call: true,
knowledge: '2025-01',
release_date: '2025-11-18',
name: 'Gemini 3 Pro',
aliases: ['google/gemini-3-pro-preview'],
context: 1_048_576,
costs_currency: 'usd-cents',
input_cost_key: 'prompt_tokens',
output_cost_key: 'completion_tokens',
costs: {
tokens: 1_000_000,
prompt_tokens: 200,
completion_tokens: 1200,
cached_tokens: 20,
},
max_tokens: 200_000,
},
{
puterId: 'google:google/gemini-3.1-pro-preview',
id: 'gemini-3.1-pro-preview',
@@ -178,4 +156,26 @@ export const GEMINI_MODELS: IChatModel[] = [
},
max_tokens: 65536,
},
{
puterId: 'google:google/gemini-3.1-flash-lite-preview',
id: 'gemini-3.1-flash-lite-preview',
modalities: { 'input': ['text', 'image', 'video', 'audio', 'pdf'], 'output': ['text'] },
open_weights: false,
tool_call: true,
knowledge: '2025-01',
release_date: '2026-03-18',
name: 'Gemini 3.1 Flash-Lite',
aliases: ['google/gemini-3.1-flash-lite-preview'],
context: 1_048_576,
costs_currency: 'usd-cents',
input_cost_key: 'prompt_tokens',
output_cost_key: 'completion_tokens',
costs: {
tokens: 1_000_000,
prompt_tokens: 25,
completion_tokens: 150,
cached_tokens: 2.5,
},
max_tokens: 65536,
},
];
@@ -22,7 +22,7 @@ import APIError from '../../../../../api/APIError.js';
import { ErrorService } from '../../../../../modules/core/ErrorService.js';
import { Context } from '../../../../../util/context.js';
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
import { GEMINI_DEFAULT_RATIO, GEMINI_ESTIMATED_IMAGE_TOKENS, GEMINI_IMAGE_GENERATION_MODELS } from './models.js';
import { GEMINI_DEFAULT_RATIO, GEMINI_ESTIMATED_IMAGE_TOKENS, GEMINI_IMAGE_GENERATION_MODELS, IGeminiImageModel } from './models.js';
import { IGenerateParams, IImageModel, IImageProvider } from '../types.js';
const MIME_SIGNATURES: Record<string, string> = {
@@ -65,7 +65,8 @@ export class GeminiImageGenerationProvider implements IImageProvider {
const { prompt, test_mode, input_image, input_image_mime_type, model, quality } = params;
let { ratio, input_images } = params;
const selectedModel = this.models().find(m => m.id === model) || this.models().find(m => m.id === this.getDefaultModel())!;
const selectedModel = (this.models() as IGeminiImageModel[]).find(m => m.id === model)
|| (this.models() as IGeminiImageModel[]).find(m => m.id === this.getDefaultModel())!;
if ( test_mode ) {
return 'https://puter-sample-data.puter.site/image_example.png';
@@ -75,6 +76,10 @@ export class GeminiImageGenerationProvider implements IImageProvider {
throw new Error('`prompt` must be a non-empty string');
}
if ( selectedModel.apiType === 'generateImages' ) {
return this.#generateWithImagen(prompt, selectedModel, params);
}
const allowedRatios = selectedModel.allowedRatios ?? [GEMINI_DEFAULT_RATIO];
ratio = ratio && this.#isValidRatio(ratio, allowedRatios) ? ratio : allowedRatios[0];
@@ -197,6 +202,64 @@ export class GeminiImageGenerationProvider implements IImageProvider {
return url;
}
async #generateWithImagen (prompt: string, selectedModel: IGeminiImageModel, params: IGenerateParams): Promise<string> {
const actor = Context.get('actor');
if ( ! actor ) {
throw new Error('actor not found in context');
}
const costCents = selectedModel.costs?.['per-image'];
if ( costCents === undefined ) {
throw new Error(`No per-image cost configured for model '${selectedModel.id}'`);
}
const costInMicroCents = Math.ceil(costCents * 1_000_000);
const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents);
if ( ! usageAllowed ) {
throw APIError.create('insufficient_funds');
}
const allowedRatios = selectedModel.allowedRatios ?? [GEMINI_DEFAULT_RATIO];
const ratio = params.ratio && this.#isValidRatio(params.ratio, allowedRatios)
? params.ratio : allowedRatios[0];
const aspectRatio = `${ratio.w}:${ratio.h}`;
const config: Record<string, unknown> = {
numberOfImages: 1,
aspectRatio,
};
if ( params.quality && selectedModel.allowedQualityLevels?.includes(params.quality) ) {
config.imageSize = params.quality;
}
const response = await this.#client.models.generateImages({
model: selectedModel.id,
prompt,
config,
});
const generated = response?.generatedImages;
if ( !generated || generated.length === 0 ) {
throw new Error('Imagen response did not include an image');
}
const entry = generated[0];
if ( entry.raiFilteredReason ) {
throw new Error(`Image was filtered: ${entry.raiFilteredReason}`);
}
const image = entry.image;
if ( ! image?.imageBytes ) {
throw new Error('Imagen response did not include image bytes');
}
const usageKey = `gemini:${selectedModel.id}`;
await this.#meteringService.incrementUsage(actor, usageKey, 1, costInMicroCents);
const mimeType = image.mimeType ?? 'image/png';
return `data:${mimeType};base64,${image.imageBytes}`;
}
#buildContents (prompt: string, input_images?: string[], input_image_mime_type?: string) {
const parts: Record<string, unknown>[] = [{ text: prompt }];
@@ -19,6 +19,10 @@
import { IImageModel } from '../types';
export interface IGeminiImageModel extends IImageModel {
apiType?: 'generateContent' | 'generateImages';
}
export const GEMINI_DEFAULT_RATIO = { w: 1024, h: 1024 };
// Estimated image output token counts for pre-flight cost checks.
@@ -37,7 +41,7 @@ export const GEMINI_ESTIMATED_IMAGE_TOKENS: Record<string, number> = {
'gemini-3.1-flash-image-preview:4K': 2520,
};
export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [
export const GEMINI_IMAGE_GENERATION_MODELS: IGeminiImageModel[] = [
{
puterId: 'google:google/gemini-2.5-flash-image',
id: 'gemini-2.5-flash-image',
@@ -148,4 +152,80 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [
'1K:1x1': 6.7,
},
},
// ── Imagen models (use generateImages API) ─────────────────────
{
puterId: 'google:google/imagen-4.0-fast',
id: 'imagen-4.0-fast-generate-001',
apiType: 'generateImages',
name: 'Imagen 4.0 Fast',
version: '1.0',
costs_currency: 'usd-cents',
index_cost_key: 'per-image',
aliases: [
'imagen-4.0-fast',
'google/imagen-4.0-fast',
'google:google/imagen-4.0-fast',
],
allowedRatios: [
{ w: 1, h: 1 },
{ w: 3, h: 4 },
{ w: 4, h: 3 },
{ w: 9, h: 16 },
{ w: 16, h: 9 },
],
costs: {
'per-image': 2, // $0.02 per image
},
},
{
puterId: 'google:google/imagen-4.0',
id: 'imagen-4.0-generate-001',
apiType: 'generateImages',
name: 'Imagen 4.0',
version: '1.0',
costs_currency: 'usd-cents',
index_cost_key: 'per-image',
aliases: [
'imagen-4.0',
'google/imagen-4.0',
'google:google/imagen-4.0',
],
allowedQualityLevels: ['1K', '2K'],
allowedRatios: [
{ w: 1, h: 1 },
{ w: 3, h: 4 },
{ w: 4, h: 3 },
{ w: 9, h: 16 },
{ w: 16, h: 9 },
],
costs: {
'per-image': 4, // $0.04 per image
},
},
{
puterId: 'google:google/imagen-4.0-ultra',
id: 'imagen-4.0-ultra-generate-001',
apiType: 'generateImages',
name: 'Imagen 4.0 Ultra',
version: '1.0',
costs_currency: 'usd-cents',
index_cost_key: 'per-image',
aliases: [
'imagen-4.0-ultra',
'google/imagen-4.0-ultra',
'google:google/imagen-4.0-ultra',
],
allowedQualityLevels: ['1K', '2K'],
allowedRatios: [
{ w: 1, h: 1 },
{ w: 3, h: 4 },
{ w: 4, h: 3 },
{ w: 9, h: 16 },
{ w: 16, h: 9 },
],
costs: {
'per-image': 6, // $0.06 per image
},
},
];
@@ -23,6 +23,7 @@ import BaseService from '../../BaseService.js';
import { DriverService } from '../../drivers/DriverService.js';
import { EventService } from '../../EventService.js';
import { MeteringService } from '../../MeteringService/MeteringService.js';
import { GeminiVideoGenerationProvider } from './providers/GeminiVideoGenerationProvider/GeminiVideoGenerationProvider.js';
import { OpenAIVideoGenerationProvider } from './providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.js';
import { TogetherVideoGenerationProvider } from './providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.js';
import { IGenerateVideoParams, IVideoModel, IVideoProvider } from './providers/types.js';
@@ -108,6 +109,14 @@ export class AIVideoGenerationService extends BaseService {
);
}
const geminiVideoConfig = this.config.providers?.['gemini-video-generation'] || this.global_config?.services?.gemini;
if ( geminiVideoConfig && (geminiVideoConfig.apiKey || geminiVideoConfig.secret_key) ) {
this.#providers['gemini-video-generation'] = new GeminiVideoGenerationProvider(
{ apiKey: geminiVideoConfig.apiKey || geminiVideoConfig.secret_key },
this.meteringService,
);
}
// emit event for extensions to add providers
const extensionProviders = {} as Record<string, IVideoProvider>;
await this.eventService.emit('ai.video.registerProviders', extensionProviders);
@@ -0,0 +1,296 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import { GoogleGenAI, GenerateVideosOperation, GenerateVideosParameters } from '@google/genai';
import APIError from '../../../../../api/APIError.js';
import { Context } from '../../../../../util/context.js';
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
import { IGenerateVideoParams, IVideoModel, IVideoProvider } from '../types.js';
import { TypedValue } from '../../../../drivers/meta/Runtime.js';
import { GEMINI_VIDEO_GENERATION_MODELS, IGeminiVideoModel } from './models.js';
const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4';
const POLL_INTERVAL_MS = 10_000;
const DEFAULT_TIMEOUT_MS = 10 * 60 * 1000;
const DIMENSION_MAP: Record<string, { aspectRatio: string; resolution: string }> = {
'1280x720': { aspectRatio: '16:9', resolution: '720p' },
'720x1280': { aspectRatio: '9:16', resolution: '720p' },
'1920x1080': { aspectRatio: '16:9', resolution: '1080p' },
'1080x1920': { aspectRatio: '9:16', resolution: '1080p' },
'3840x2160': { aspectRatio: '16:9', resolution: '4k' },
'2160x3840': { aspectRatio: '9:16', resolution: '4k' },
};
export class GeminiVideoGenerationProvider implements IVideoProvider {
#client: GoogleGenAI;
#meteringService: MeteringService;
constructor (config: { apiKey: string }, meteringService: MeteringService) {
if ( ! config.apiKey ) {
throw new Error('Gemini video generation requires an API key');
}
this.#client = new GoogleGenAI({ apiKey: config.apiKey });
this.#meteringService = meteringService;
}
getDefaultModel (): string {
return GEMINI_VIDEO_GENERATION_MODELS[0].id;
}
async models (): Promise<IVideoModel[]> {
return GEMINI_VIDEO_GENERATION_MODELS.map(model => ({
...model,
aliases: [model.id, `google/${model.id}`],
}));
}
async generate (params: IGenerateVideoParams): Promise<unknown> {
const {
prompt,
model: requestedModel,
seconds,
duration,
size,
resolution,
negative_prompt: negativePrompt,
reference_images: referenceImages,
input_reference: inputReference,
last_frame: lastFrame,
test_mode: testMode,
} = params ?? {};
if ( typeof prompt !== 'string' || !prompt.trim() ) {
throw APIError.create('field_invalid', null, {
key: 'prompt',
expected: 'a non-empty string',
got: prompt,
});
}
const selectedModel = this.#getModel(requestedModel);
if ( testMode ) {
return new TypedValue({
$: 'string:url:web',
content_type: 'video',
}, DEFAULT_TEST_VIDEO_URL);
}
const hasFirstFrame = selectedModel.supportsImageInput
&& typeof inputReference === 'string' && inputReference.trim().length > 0;
const hasRefImages = selectedModel.supportsReferenceImages
&& Array.isArray(referenceImages) && referenceImages.length > 0;
const { aspectRatio, videoResolution } = this.#resolveAspectAndResolution(size, selectedModel);
// 1080p and 4K require duration=8
const isHighRes = videoResolution === '1080p' || videoResolution === '4k';
let durationSeconds = this.#coercePositiveInteger(seconds ?? duration)
?? selectedModel.durationSeconds?.[0] ?? 8;
if ( isHighRes || hasRefImages ) {
durationSeconds = 8;
}
const is4K = videoResolution === '4k';
const is1080p = videoResolution === '1080p';
const perSecondCents = is4K
? selectedModel.costs?.['per-second-4k'] ?? selectedModel.costs?.['per-second']
: is1080p
? selectedModel.costs?.['per-second-1080p'] ?? selectedModel.costs?.['per-second']
: selectedModel.costs?.['per-second'];
if ( perSecondCents === undefined ) {
throw new Error(`No per-second cost configured for video model '${selectedModel.id}'`);
}
const costCents = perSecondCents * durationSeconds;
const costInMicroCents = Math.ceil(costCents * 1_000_000);
const actor = Context.get('actor');
if ( ! actor ) {
throw new Error('actor not found in context');
}
const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents);
if ( ! usageAllowed ) {
throw APIError.create('insufficient_funds');
}
const config: Record<string, unknown> = {
numberOfVideos: 1,
durationSeconds,
};
if ( aspectRatio ) config.aspectRatio = aspectRatio;
if ( videoResolution && selectedModel.resolutions.length > 0 ) {
config.resolution = videoResolution;
}
if ( typeof negativePrompt === 'string' && negativePrompt.trim() ) {
config.negativePrompt = negativePrompt;
}
// Reference images (Veo 3.1 supports up to 3)
// When referenceImages is set, image (first frame), video, and lastFrame are not supported.
if ( hasRefImages ) {
const validImages = referenceImages
.filter((img: string) => typeof img === 'string' && img.trim().length > 0)
.slice(0, 3);
config.referenceImages = validImages.map((img: string) => ({
image: this.#parseImageInput(img),
referenceType: 'asset',
}));
}
if ( !hasRefImages && typeof lastFrame === 'string' && lastFrame.trim() ) {
config.lastFrame = this.#parseImageInput(lastFrame);
}
const generateParams: GenerateVideosParameters = {
model: selectedModel.id,
prompt,
config,
};
// First frame (image-to-video)
if ( hasFirstFrame && !hasRefImages ) {
generateParams.image = this.#parseImageInput(inputReference as string);
}
let operation: GenerateVideosOperation;
try {
operation = await this.#client.models.generateVideos(generateParams);
} catch (e) {
console.error('Gemini video generation error:', e);
throw e;
}
const completed = await this.#pollUntilComplete(operation);
const generatedVideos = completed.response?.generatedVideos;
if ( !generatedVideos || generatedVideos.length === 0 ) {
const filtered = completed.response?.raiMediaFilteredCount ?? 0;
if ( filtered > 0 ) {
const reasons = completed.response?.raiMediaFilteredReasons?.join(', ') || 'content policy';
throw new Error(`Video was filtered due to ${reasons}`);
}
throw new Error('Gemini response did not include a video');
}
const video = generatedVideos[0].video;
if ( ! video ) {
throw new Error('Gemini response video entry was empty');
}
const resTier = is4K ? ':4k' : is1080p && selectedModel.costs?.['per-second-1080p'] ? ':1080p' : '';
const usageKey = `gemini:${selectedModel.id}${resTier}`;
await this.#meteringService.incrementUsage(actor, usageKey, durationSeconds, costInMicroCents);
if ( video.uri ) {
return new TypedValue({
$: 'string:url:web',
content_type: 'video',
}, video.uri);
}
if ( video.videoBytes ) {
const mimeType = video.mimeType ?? 'video/mp4';
const dataUri = `data:${mimeType};base64,${video.videoBytes}`;
return new TypedValue({
$: 'string:url:data',
content_type: 'video',
}, dataUri);
}
throw new Error('Gemini video response contained neither uri nor videoBytes');
}
async #pollUntilComplete (operation: GenerateVideosOperation): Promise<GenerateVideosOperation> {
let op = operation;
const start = Date.now();
while ( !op.done ) {
if ( Date.now() - start > DEFAULT_TIMEOUT_MS ) {
throw new Error('Timed out waiting for Gemini video generation to complete');
}
await this.#delay(POLL_INTERVAL_MS);
op = await this.#client.operations.getVideosOperation({ operation: op });
}
if ( op.error ) {
const msg = (op.error as Record<string, unknown>).message ?? JSON.stringify(op.error);
throw new Error(`Gemini video generation failed: ${msg}`);
}
return op;
}
#parseImageInput (input: string): { imageBytes: string; mimeType: string } {
if ( input.startsWith('data:') ) {
const commaIdx = input.indexOf(',');
if ( commaIdx !== -1 ) {
const header = input.substring(5, commaIdx);
if ( header.endsWith(';base64') ) {
const mimeType = header.substring(0, header.length - 7);
if ( mimeType.length > 0 ) {
return { imageBytes: input.substring(commaIdx + 1), mimeType };
}
}
}
}
return { imageBytes: input, mimeType: 'image/png' };
}
#getModel (requestedModel?: string): IGeminiVideoModel {
return GEMINI_VIDEO_GENERATION_MODELS.find(m => m.id === requestedModel)
?? GEMINI_VIDEO_GENERATION_MODELS[0];
}
#resolveAspectAndResolution (
size: string | undefined,
model: IGeminiVideoModel,
): { aspectRatio: string; videoResolution: string | undefined } {
if ( size && DIMENSION_MAP[size] ) {
return {
aspectRatio: DIMENSION_MAP[size].aspectRatio,
videoResolution: DIMENSION_MAP[size].resolution,
};
}
return {
aspectRatio: model.aspectRatios[0],
videoResolution: model.resolutions[0],
};
}
#coercePositiveInteger (value: unknown): number | undefined {
if ( typeof value === 'number' && Number.isFinite(value) ) {
const rounded = Math.round(value);
return rounded > 0 ? rounded : undefined;
}
if ( typeof value === 'string' ) {
const numeric = Number.parseInt(value, 10);
return Number.isFinite(numeric) && numeric > 0 ? numeric : undefined;
}
return undefined;
}
async #delay (ms: number): Promise<void> {
return await new Promise(resolve => setTimeout(resolve, ms));
}
}
@@ -0,0 +1,120 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import { IVideoModel } from '../types.js';
export interface IGeminiVideoModel extends IVideoModel {
aspectRatios: string[];
resolutions: string[];
supportsImageInput: boolean;
supportsReferenceImages: boolean;
}
// Dimension strings used by the service layer for validation.
const STANDARD_DIMENSIONS = ['1280x720', '720x1280', '1920x1080', '1080x1920'];
const DIMENSIONS_WITH_4K = [...STANDARD_DIMENSIONS, '3840x2160', '2160x3840'];
// https://ai.google.dev/gemini-api/docs/video
// https://ai.google.dev/gemini-api/docs/pricing
export const GEMINI_VIDEO_GENERATION_MODELS: IGeminiVideoModel[] = [
{
puterId: 'google:google/veo-2.0',
id: 'veo-2.0-generate-001',
name: 'Veo 2.0',
costs_currency: 'usd-cents',
costs: { 'per-second': 35 },
output_cost_key: 'per-second',
durationSeconds: [5, 6, 8],
dimensions: ['1280x720', '720x1280'],
aspectRatios: ['16:9', '9:16'],
resolutions: [],
supportsImageInput: true,
supportsReferenceImages: false,
},
{
puterId: 'google:google/veo-3.0',
id: 'veo-3.0-generate-001',
name: 'Veo 3.0',
costs_currency: 'usd-cents',
costs: { 'per-second': 40 },
output_cost_key: 'per-second',
durationSeconds: [4, 6, 8],
dimensions: STANDARD_DIMENSIONS,
aspectRatios: ['16:9', '9:16'],
resolutions: ['720p', '1080p'],
supportsImageInput: true,
supportsReferenceImages: false,
},
{
puterId: 'google:google/veo-3.0-fast',
id: 'veo-3.0-fast-generate-001',
name: 'Veo 3.0 Fast',
costs_currency: 'usd-cents',
costs: { 'per-second': 15 },
output_cost_key: 'per-second',
durationSeconds: [4, 6, 8],
dimensions: STANDARD_DIMENSIONS,
aspectRatios: ['16:9', '9:16'],
resolutions: ['720p', '1080p'],
supportsImageInput: true,
supportsReferenceImages: false,
},
{
puterId: 'google:google/veo-3.1',
id: 'veo-3.1-generate-preview',
name: 'Veo 3.1',
costs_currency: 'usd-cents',
costs: { 'per-second': 40, 'per-second-4k': 60 },
output_cost_key: 'per-second',
durationSeconds: [4, 6, 8],
dimensions: DIMENSIONS_WITH_4K,
aspectRatios: ['16:9', '9:16'],
resolutions: ['720p', '1080p', '4k'],
supportsImageInput: true,
supportsReferenceImages: true,
},
{
puterId: 'google:google/veo-3.1-fast',
id: 'veo-3.1-fast-generate-preview',
name: 'Veo 3.1 Fast',
costs_currency: 'usd-cents',
costs: { 'per-second': 15, 'per-second-4k': 35 },
output_cost_key: 'per-second',
durationSeconds: [4, 6, 8],
dimensions: DIMENSIONS_WITH_4K,
aspectRatios: ['16:9', '9:16'],
resolutions: ['720p', '1080p', '4k'],
supportsImageInput: true,
supportsReferenceImages: true,
},
{
puterId: 'google:google/veo-3.1-lite',
id: 'veo-3.1-lite-generate-preview',
name: 'Veo 3.1 Lite',
costs_currency: 'usd-cents',
costs: { 'per-second': 5, 'per-second-1080p': 8 },
output_cost_key: 'per-second',
durationSeconds: [4, 6, 8],
dimensions: STANDARD_DIMENSIONS,
aspectRatios: ['16:9', '9:16'],
resolutions: ['720p', '1080p'],
supportsImageInput: true,
supportsReferenceImages: false,
},
];
@@ -60,6 +60,7 @@ export interface IGenerateVideoParams {
negative_prompt?: string;
reference_images?: string[];
frame_images?: object[];
last_frame?: string;
metadata?: object;
input_reference?: unknown;
no_extra_params?: boolean;
+4 -4
View File
@@ -49,14 +49,14 @@ For more details, see the [OpenAI API reference](https://platform.openai.com/doc
#### Gemini Options
Available when `provider: 'gemini'` or inferred from model (`gemini-2.5-flash-image-preview`, `gemini-3-pro-image-preview`):
Available when `provider: 'gemini'` or inferred from model:
| Option | Type | Description |
|--------|------|-------------|
| `model` | `String` | Image model to use. |
| `ratio` | `Object` | Currently only `{ w: 1024, h: 1024 }` is supported |
| `input_image` | `String` | Base64 encoded input image for image-to-image generation |
| `input_image_mime_type` | `String` | MIME type of the input image. Options: `'image/png'`, `'image/jpeg'`, `'image/jpg'`, `'image/webp'` |
| `ratio` | `Object` | Aspect ratio as `{ w, h }` (e.g., `{ w: 16, h: 9 }`). |
| `quality` | `String` | Output size tier: `'512'`, `'1K'`, `'2K'`, `'4K'` (availability varies by model) |
| `input_images` | `Array<String>` | Base64 input images for image-to-image (Gemini models only) |
#### xAI (Grok) Options
+19 -4
View File
@@ -31,14 +31,13 @@ Additional settings for the generation request. Available options depend on the
| Option | Type | Description |
|--------|------|-------------|
| `prompt` | `String` | Text description for the video generation |
| `provider` | `String` | The AI provider to use. `'openai' (default) \| 'together'` |
| `model` | `String` | Video model to use (provider-specific). Defaults to `'sora-2'` |
| `seconds` | `Number` | Target clip length in seconds |
| `test_mode` | `Boolean` | When `true`, returns a sample video without using credits |
#### OpenAI Options
Available when `provider: 'openai'` or inferred from model (`sora-2`, `sora-2-pro`):
Available when using model `sora-2` or `sora-2-pro`:
| Option | Type | Description |
|--------|------|-------------|
@@ -49,9 +48,25 @@ Available when `provider: 'openai'` or inferred from model (`sora-2`, `sora-2-pr
For more details about each option, see the [OpenAI API reference](https://platform.openai.com/docs/api-reference/videos/create).
#### Google (Veo) Options
Available when using a Veo model (`veo-2.0-generate-001`, `veo-3.0-generate-001`, `veo-3.1-generate-preview`, etc.):
| Option | Type | Description |
|--------|------|-------------|
| `model` | `String` | Video model to use. Available: `'veo-2.0-generate-001'`, `'veo-3.0-generate-001'`, `'veo-3.0-fast-generate-001'`, `'veo-3.1-generate-preview'`, `'veo-3.1-fast-generate-preview'`, `'veo-3.1-lite-generate-preview'` |
| `seconds` | `Number` | Target clip length in seconds. Veo 2.0: `5`, `6`, `8`. Veo 3.x: `4`, `6`, `8`. Note: 1080p and 4K output require `seconds: 8` |
| `size` | `String` | Output dimensions (e.g., `'1280x720'`, `'1920x1080'`, `'3840x2160'`). `resolution` is an alias. 4K sizes only available on Veo 3.1 models |
| `negative_prompt` | `String` | Text describing what to avoid in the video |
| `input_reference` | `String` | Base64 image used as the first frame (image-to-video). |
| `reference_images` | `Array<String>` | Up to 3 base64 images used as style/asset references. Supported on Veo 3.1 models only |
| `last_frame` | `String` | Base64 image used as the last frame |
For more details, see the [Google Veo API reference](https://ai.google.dev/gemini-api/docs/video).
#### TogetherAI Options
Available when `provider: 'together'` or inferred from model:
Available when using a TogetherAI model:
| Option | Type | Description |
|--------|------|-------------|
@@ -76,7 +91,7 @@ Any properties not set fall back to provider defaults.
A `Promise` that resolves to an `HTMLVideoElement`. The element is preloaded, has `controls` enabled, and exposes metadata via `data-mime-type` and `data-source` attributes. Append it to the DOM to display the generated clip immediately.
> **Note:** Real Sora renders can take a couple of minutes to complete. The returned promise resolves only when the MP4 is ready, so keep your UI responsive (for example, by showing a spinner) while you wait. Each successful generation consumes the users AI credits in accordance with the model, duration, and resolution you request.
> **Note:** Video generation can take several minutes to complete. The returned promise resolves only when the video is ready, so keep your UI responsive (for example, by showing a spinner) while you wait. Each successful generation consumes the users AI credits in accordance with the model, duration, and resolution you request.
## Examples