mirror of
https://github.com/HeyPuter/puter.git
synced 2026-05-06 09:30:49 +00:00
add google video native provider, imagen models (#2759)
Docker Image CI / build-and-push-image (push) Has been cancelled
Maintain Release Merge PR / update-release-pr (push) Has been cancelled
Notify HeyPuter / notify (push) Has been cancelled
release-please / release-please (push) Has been cancelled
test / test-backend (24.x) (push) Has been cancelled
test / API tests (node env, api-test) (24.x) (push) Has been cancelled
test / puterjs (node env, vitest) (24.x) (push) Has been cancelled
Docker Image CI / build-and-push-image (push) Has been cancelled
Maintain Release Merge PR / update-release-pr (push) Has been cancelled
Notify HeyPuter / notify (push) Has been cancelled
release-please / release-please (push) Has been cancelled
test / test-backend (24.x) (push) Has been cancelled
test / API tests (node env, api-test) (24.x) (push) Has been cancelled
test / puterjs (node env, vitest) (24.x) (push) Has been cancelled
* update gemini .chat models, add imagen, add gemini veo * add models * update documentation * add veo 3.1 lite, 1080p pricing
This commit is contained in:
@@ -112,28 +112,6 @@ export const GEMINI_MODELS: IChatModel[] = [
|
||||
},
|
||||
max_tokens: 200_000,
|
||||
},
|
||||
{
|
||||
puterId: 'google:google/gemini-3-pro-preview',
|
||||
id: 'gemini-3-pro-preview',
|
||||
modalities: { 'input': ['text', 'image', 'video', 'audio', 'pdf'], 'output': ['text'] },
|
||||
open_weights: false,
|
||||
tool_call: true,
|
||||
knowledge: '2025-01',
|
||||
release_date: '2025-11-18',
|
||||
name: 'Gemini 3 Pro',
|
||||
aliases: ['google/gemini-3-pro-preview'],
|
||||
context: 1_048_576,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 200,
|
||||
completion_tokens: 1200,
|
||||
cached_tokens: 20,
|
||||
},
|
||||
max_tokens: 200_000,
|
||||
},
|
||||
{
|
||||
puterId: 'google:google/gemini-3.1-pro-preview',
|
||||
id: 'gemini-3.1-pro-preview',
|
||||
@@ -178,4 +156,26 @@ export const GEMINI_MODELS: IChatModel[] = [
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
puterId: 'google:google/gemini-3.1-flash-lite-preview',
|
||||
id: 'gemini-3.1-flash-lite-preview',
|
||||
modalities: { 'input': ['text', 'image', 'video', 'audio', 'pdf'], 'output': ['text'] },
|
||||
open_weights: false,
|
||||
tool_call: true,
|
||||
knowledge: '2025-01',
|
||||
release_date: '2026-03-18',
|
||||
name: 'Gemini 3.1 Flash-Lite',
|
||||
aliases: ['google/gemini-3.1-flash-lite-preview'],
|
||||
context: 1_048_576,
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 25,
|
||||
completion_tokens: 150,
|
||||
cached_tokens: 2.5,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
];
|
||||
|
||||
+65
-2
@@ -22,7 +22,7 @@ import APIError from '../../../../../api/APIError.js';
|
||||
import { ErrorService } from '../../../../../modules/core/ErrorService.js';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import { GEMINI_DEFAULT_RATIO, GEMINI_ESTIMATED_IMAGE_TOKENS, GEMINI_IMAGE_GENERATION_MODELS } from './models.js';
|
||||
import { GEMINI_DEFAULT_RATIO, GEMINI_ESTIMATED_IMAGE_TOKENS, GEMINI_IMAGE_GENERATION_MODELS, IGeminiImageModel } from './models.js';
|
||||
import { IGenerateParams, IImageModel, IImageProvider } from '../types.js';
|
||||
|
||||
const MIME_SIGNATURES: Record<string, string> = {
|
||||
@@ -65,7 +65,8 @@ export class GeminiImageGenerationProvider implements IImageProvider {
|
||||
const { prompt, test_mode, input_image, input_image_mime_type, model, quality } = params;
|
||||
let { ratio, input_images } = params;
|
||||
|
||||
const selectedModel = this.models().find(m => m.id === model) || this.models().find(m => m.id === this.getDefaultModel())!;
|
||||
const selectedModel = (this.models() as IGeminiImageModel[]).find(m => m.id === model)
|
||||
|| (this.models() as IGeminiImageModel[]).find(m => m.id === this.getDefaultModel())!;
|
||||
|
||||
if ( test_mode ) {
|
||||
return 'https://puter-sample-data.puter.site/image_example.png';
|
||||
@@ -75,6 +76,10 @@ export class GeminiImageGenerationProvider implements IImageProvider {
|
||||
throw new Error('`prompt` must be a non-empty string');
|
||||
}
|
||||
|
||||
if ( selectedModel.apiType === 'generateImages' ) {
|
||||
return this.#generateWithImagen(prompt, selectedModel, params);
|
||||
}
|
||||
|
||||
const allowedRatios = selectedModel.allowedRatios ?? [GEMINI_DEFAULT_RATIO];
|
||||
ratio = ratio && this.#isValidRatio(ratio, allowedRatios) ? ratio : allowedRatios[0];
|
||||
|
||||
@@ -197,6 +202,64 @@ export class GeminiImageGenerationProvider implements IImageProvider {
|
||||
return url;
|
||||
}
|
||||
|
||||
async #generateWithImagen (prompt: string, selectedModel: IGeminiImageModel, params: IGenerateParams): Promise<string> {
|
||||
const actor = Context.get('actor');
|
||||
if ( ! actor ) {
|
||||
throw new Error('actor not found in context');
|
||||
}
|
||||
const costCents = selectedModel.costs?.['per-image'];
|
||||
if ( costCents === undefined ) {
|
||||
throw new Error(`No per-image cost configured for model '${selectedModel.id}'`);
|
||||
}
|
||||
const costInMicroCents = Math.ceil(costCents * 1_000_000);
|
||||
|
||||
const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents);
|
||||
if ( ! usageAllowed ) {
|
||||
throw APIError.create('insufficient_funds');
|
||||
}
|
||||
|
||||
const allowedRatios = selectedModel.allowedRatios ?? [GEMINI_DEFAULT_RATIO];
|
||||
const ratio = params.ratio && this.#isValidRatio(params.ratio, allowedRatios)
|
||||
? params.ratio : allowedRatios[0];
|
||||
const aspectRatio = `${ratio.w}:${ratio.h}`;
|
||||
|
||||
const config: Record<string, unknown> = {
|
||||
numberOfImages: 1,
|
||||
aspectRatio,
|
||||
};
|
||||
|
||||
if ( params.quality && selectedModel.allowedQualityLevels?.includes(params.quality) ) {
|
||||
config.imageSize = params.quality;
|
||||
}
|
||||
|
||||
const response = await this.#client.models.generateImages({
|
||||
model: selectedModel.id,
|
||||
prompt,
|
||||
config,
|
||||
});
|
||||
|
||||
const generated = response?.generatedImages;
|
||||
if ( !generated || generated.length === 0 ) {
|
||||
throw new Error('Imagen response did not include an image');
|
||||
}
|
||||
|
||||
const entry = generated[0];
|
||||
if ( entry.raiFilteredReason ) {
|
||||
throw new Error(`Image was filtered: ${entry.raiFilteredReason}`);
|
||||
}
|
||||
|
||||
const image = entry.image;
|
||||
if ( ! image?.imageBytes ) {
|
||||
throw new Error('Imagen response did not include image bytes');
|
||||
}
|
||||
|
||||
const usageKey = `gemini:${selectedModel.id}`;
|
||||
await this.#meteringService.incrementUsage(actor, usageKey, 1, costInMicroCents);
|
||||
|
||||
const mimeType = image.mimeType ?? 'image/png';
|
||||
return `data:${mimeType};base64,${image.imageBytes}`;
|
||||
}
|
||||
|
||||
#buildContents (prompt: string, input_images?: string[], input_image_mime_type?: string) {
|
||||
const parts: Record<string, unknown>[] = [{ text: prompt }];
|
||||
|
||||
|
||||
+81
-1
@@ -19,6 +19,10 @@
|
||||
|
||||
import { IImageModel } from '../types';
|
||||
|
||||
export interface IGeminiImageModel extends IImageModel {
|
||||
apiType?: 'generateContent' | 'generateImages';
|
||||
}
|
||||
|
||||
export const GEMINI_DEFAULT_RATIO = { w: 1024, h: 1024 };
|
||||
|
||||
// Estimated image output token counts for pre-flight cost checks.
|
||||
@@ -37,7 +41,7 @@ export const GEMINI_ESTIMATED_IMAGE_TOKENS: Record<string, number> = {
|
||||
'gemini-3.1-flash-image-preview:4K': 2520,
|
||||
};
|
||||
|
||||
export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [
|
||||
export const GEMINI_IMAGE_GENERATION_MODELS: IGeminiImageModel[] = [
|
||||
{
|
||||
puterId: 'google:google/gemini-2.5-flash-image',
|
||||
id: 'gemini-2.5-flash-image',
|
||||
@@ -148,4 +152,80 @@ export const GEMINI_IMAGE_GENERATION_MODELS: IImageModel[] = [
|
||||
'1K:1x1': 6.7,
|
||||
},
|
||||
},
|
||||
|
||||
// ── Imagen models (use generateImages API) ─────────────────────
|
||||
{
|
||||
puterId: 'google:google/imagen-4.0-fast',
|
||||
id: 'imagen-4.0-fast-generate-001',
|
||||
apiType: 'generateImages',
|
||||
name: 'Imagen 4.0 Fast',
|
||||
version: '1.0',
|
||||
costs_currency: 'usd-cents',
|
||||
index_cost_key: 'per-image',
|
||||
aliases: [
|
||||
'imagen-4.0-fast',
|
||||
'google/imagen-4.0-fast',
|
||||
'google:google/imagen-4.0-fast',
|
||||
],
|
||||
allowedRatios: [
|
||||
{ w: 1, h: 1 },
|
||||
{ w: 3, h: 4 },
|
||||
{ w: 4, h: 3 },
|
||||
{ w: 9, h: 16 },
|
||||
{ w: 16, h: 9 },
|
||||
],
|
||||
costs: {
|
||||
'per-image': 2, // $0.02 per image
|
||||
},
|
||||
},
|
||||
{
|
||||
puterId: 'google:google/imagen-4.0',
|
||||
id: 'imagen-4.0-generate-001',
|
||||
apiType: 'generateImages',
|
||||
name: 'Imagen 4.0',
|
||||
version: '1.0',
|
||||
costs_currency: 'usd-cents',
|
||||
index_cost_key: 'per-image',
|
||||
aliases: [
|
||||
'imagen-4.0',
|
||||
'google/imagen-4.0',
|
||||
'google:google/imagen-4.0',
|
||||
],
|
||||
allowedQualityLevels: ['1K', '2K'],
|
||||
allowedRatios: [
|
||||
{ w: 1, h: 1 },
|
||||
{ w: 3, h: 4 },
|
||||
{ w: 4, h: 3 },
|
||||
{ w: 9, h: 16 },
|
||||
{ w: 16, h: 9 },
|
||||
],
|
||||
costs: {
|
||||
'per-image': 4, // $0.04 per image
|
||||
},
|
||||
},
|
||||
{
|
||||
puterId: 'google:google/imagen-4.0-ultra',
|
||||
id: 'imagen-4.0-ultra-generate-001',
|
||||
apiType: 'generateImages',
|
||||
name: 'Imagen 4.0 Ultra',
|
||||
version: '1.0',
|
||||
costs_currency: 'usd-cents',
|
||||
index_cost_key: 'per-image',
|
||||
aliases: [
|
||||
'imagen-4.0-ultra',
|
||||
'google/imagen-4.0-ultra',
|
||||
'google:google/imagen-4.0-ultra',
|
||||
],
|
||||
allowedQualityLevels: ['1K', '2K'],
|
||||
allowedRatios: [
|
||||
{ w: 1, h: 1 },
|
||||
{ w: 3, h: 4 },
|
||||
{ w: 4, h: 3 },
|
||||
{ w: 9, h: 16 },
|
||||
{ w: 16, h: 9 },
|
||||
],
|
||||
costs: {
|
||||
'per-image': 6, // $0.06 per image
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
@@ -23,6 +23,7 @@ import BaseService from '../../BaseService.js';
|
||||
import { DriverService } from '../../drivers/DriverService.js';
|
||||
import { EventService } from '../../EventService.js';
|
||||
import { MeteringService } from '../../MeteringService/MeteringService.js';
|
||||
import { GeminiVideoGenerationProvider } from './providers/GeminiVideoGenerationProvider/GeminiVideoGenerationProvider.js';
|
||||
import { OpenAIVideoGenerationProvider } from './providers/OpenAIVideoGenerationProvider/OpenAIVideoGenerationProvider.js';
|
||||
import { TogetherVideoGenerationProvider } from './providers/TogetherVideoGenerationProvider/TogetherVideoGenerationProvider.js';
|
||||
import { IGenerateVideoParams, IVideoModel, IVideoProvider } from './providers/types.js';
|
||||
@@ -108,6 +109,14 @@ export class AIVideoGenerationService extends BaseService {
|
||||
);
|
||||
}
|
||||
|
||||
const geminiVideoConfig = this.config.providers?.['gemini-video-generation'] || this.global_config?.services?.gemini;
|
||||
if ( geminiVideoConfig && (geminiVideoConfig.apiKey || geminiVideoConfig.secret_key) ) {
|
||||
this.#providers['gemini-video-generation'] = new GeminiVideoGenerationProvider(
|
||||
{ apiKey: geminiVideoConfig.apiKey || geminiVideoConfig.secret_key },
|
||||
this.meteringService,
|
||||
);
|
||||
}
|
||||
|
||||
// emit event for extensions to add providers
|
||||
const extensionProviders = {} as Record<string, IVideoProvider>;
|
||||
await this.eventService.emit('ai.video.registerProviders', extensionProviders);
|
||||
|
||||
+296
@@ -0,0 +1,296 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { GoogleGenAI, GenerateVideosOperation, GenerateVideosParameters } from '@google/genai';
|
||||
import APIError from '../../../../../api/APIError.js';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import { IGenerateVideoParams, IVideoModel, IVideoProvider } from '../types.js';
|
||||
import { TypedValue } from '../../../../drivers/meta/Runtime.js';
|
||||
import { GEMINI_VIDEO_GENERATION_MODELS, IGeminiVideoModel } from './models.js';
|
||||
|
||||
const DEFAULT_TEST_VIDEO_URL = 'https://assets.puter.site/txt2vid.mp4';
|
||||
const POLL_INTERVAL_MS = 10_000;
|
||||
const DEFAULT_TIMEOUT_MS = 10 * 60 * 1000;
|
||||
|
||||
const DIMENSION_MAP: Record<string, { aspectRatio: string; resolution: string }> = {
|
||||
'1280x720': { aspectRatio: '16:9', resolution: '720p' },
|
||||
'720x1280': { aspectRatio: '9:16', resolution: '720p' },
|
||||
'1920x1080': { aspectRatio: '16:9', resolution: '1080p' },
|
||||
'1080x1920': { aspectRatio: '9:16', resolution: '1080p' },
|
||||
'3840x2160': { aspectRatio: '16:9', resolution: '4k' },
|
||||
'2160x3840': { aspectRatio: '9:16', resolution: '4k' },
|
||||
};
|
||||
|
||||
export class GeminiVideoGenerationProvider implements IVideoProvider {
|
||||
#client: GoogleGenAI;
|
||||
#meteringService: MeteringService;
|
||||
|
||||
constructor (config: { apiKey: string }, meteringService: MeteringService) {
|
||||
if ( ! config.apiKey ) {
|
||||
throw new Error('Gemini video generation requires an API key');
|
||||
}
|
||||
this.#client = new GoogleGenAI({ apiKey: config.apiKey });
|
||||
this.#meteringService = meteringService;
|
||||
}
|
||||
|
||||
getDefaultModel (): string {
|
||||
return GEMINI_VIDEO_GENERATION_MODELS[0].id;
|
||||
}
|
||||
|
||||
async models (): Promise<IVideoModel[]> {
|
||||
return GEMINI_VIDEO_GENERATION_MODELS.map(model => ({
|
||||
...model,
|
||||
aliases: [model.id, `google/${model.id}`],
|
||||
}));
|
||||
}
|
||||
|
||||
async generate (params: IGenerateVideoParams): Promise<unknown> {
|
||||
const {
|
||||
prompt,
|
||||
model: requestedModel,
|
||||
seconds,
|
||||
duration,
|
||||
size,
|
||||
resolution,
|
||||
negative_prompt: negativePrompt,
|
||||
reference_images: referenceImages,
|
||||
input_reference: inputReference,
|
||||
last_frame: lastFrame,
|
||||
test_mode: testMode,
|
||||
} = params ?? {};
|
||||
|
||||
if ( typeof prompt !== 'string' || !prompt.trim() ) {
|
||||
throw APIError.create('field_invalid', null, {
|
||||
key: 'prompt',
|
||||
expected: 'a non-empty string',
|
||||
got: prompt,
|
||||
});
|
||||
}
|
||||
|
||||
const selectedModel = this.#getModel(requestedModel);
|
||||
|
||||
if ( testMode ) {
|
||||
return new TypedValue({
|
||||
$: 'string:url:web',
|
||||
content_type: 'video',
|
||||
}, DEFAULT_TEST_VIDEO_URL);
|
||||
}
|
||||
|
||||
const hasFirstFrame = selectedModel.supportsImageInput
|
||||
&& typeof inputReference === 'string' && inputReference.trim().length > 0;
|
||||
const hasRefImages = selectedModel.supportsReferenceImages
|
||||
&& Array.isArray(referenceImages) && referenceImages.length > 0;
|
||||
|
||||
const { aspectRatio, videoResolution } = this.#resolveAspectAndResolution(size, selectedModel);
|
||||
|
||||
// 1080p and 4K require duration=8
|
||||
const isHighRes = videoResolution === '1080p' || videoResolution === '4k';
|
||||
let durationSeconds = this.#coercePositiveInteger(seconds ?? duration)
|
||||
?? selectedModel.durationSeconds?.[0] ?? 8;
|
||||
if ( isHighRes || hasRefImages ) {
|
||||
durationSeconds = 8;
|
||||
}
|
||||
|
||||
const is4K = videoResolution === '4k';
|
||||
const is1080p = videoResolution === '1080p';
|
||||
const perSecondCents = is4K
|
||||
? selectedModel.costs?.['per-second-4k'] ?? selectedModel.costs?.['per-second']
|
||||
: is1080p
|
||||
? selectedModel.costs?.['per-second-1080p'] ?? selectedModel.costs?.['per-second']
|
||||
: selectedModel.costs?.['per-second'];
|
||||
if ( perSecondCents === undefined ) {
|
||||
throw new Error(`No per-second cost configured for video model '${selectedModel.id}'`);
|
||||
}
|
||||
const costCents = perSecondCents * durationSeconds;
|
||||
const costInMicroCents = Math.ceil(costCents * 1_000_000);
|
||||
|
||||
const actor = Context.get('actor');
|
||||
if ( ! actor ) {
|
||||
throw new Error('actor not found in context');
|
||||
}
|
||||
|
||||
const usageAllowed = await this.#meteringService.hasEnoughCredits(actor, costInMicroCents);
|
||||
if ( ! usageAllowed ) {
|
||||
throw APIError.create('insufficient_funds');
|
||||
}
|
||||
|
||||
const config: Record<string, unknown> = {
|
||||
numberOfVideos: 1,
|
||||
durationSeconds,
|
||||
};
|
||||
|
||||
if ( aspectRatio ) config.aspectRatio = aspectRatio;
|
||||
if ( videoResolution && selectedModel.resolutions.length > 0 ) {
|
||||
config.resolution = videoResolution;
|
||||
}
|
||||
if ( typeof negativePrompt === 'string' && negativePrompt.trim() ) {
|
||||
config.negativePrompt = negativePrompt;
|
||||
}
|
||||
|
||||
// Reference images (Veo 3.1 supports up to 3)
|
||||
// When referenceImages is set, image (first frame), video, and lastFrame are not supported.
|
||||
if ( hasRefImages ) {
|
||||
const validImages = referenceImages
|
||||
.filter((img: string) => typeof img === 'string' && img.trim().length > 0)
|
||||
.slice(0, 3);
|
||||
config.referenceImages = validImages.map((img: string) => ({
|
||||
image: this.#parseImageInput(img),
|
||||
referenceType: 'asset',
|
||||
}));
|
||||
}
|
||||
|
||||
if ( !hasRefImages && typeof lastFrame === 'string' && lastFrame.trim() ) {
|
||||
config.lastFrame = this.#parseImageInput(lastFrame);
|
||||
}
|
||||
|
||||
const generateParams: GenerateVideosParameters = {
|
||||
model: selectedModel.id,
|
||||
prompt,
|
||||
config,
|
||||
};
|
||||
|
||||
// First frame (image-to-video)
|
||||
if ( hasFirstFrame && !hasRefImages ) {
|
||||
generateParams.image = this.#parseImageInput(inputReference as string);
|
||||
}
|
||||
|
||||
let operation: GenerateVideosOperation;
|
||||
try {
|
||||
operation = await this.#client.models.generateVideos(generateParams);
|
||||
} catch (e) {
|
||||
console.error('Gemini video generation error:', e);
|
||||
throw e;
|
||||
}
|
||||
|
||||
const completed = await this.#pollUntilComplete(operation);
|
||||
|
||||
const generatedVideos = completed.response?.generatedVideos;
|
||||
if ( !generatedVideos || generatedVideos.length === 0 ) {
|
||||
const filtered = completed.response?.raiMediaFilteredCount ?? 0;
|
||||
if ( filtered > 0 ) {
|
||||
const reasons = completed.response?.raiMediaFilteredReasons?.join(', ') || 'content policy';
|
||||
throw new Error(`Video was filtered due to ${reasons}`);
|
||||
}
|
||||
throw new Error('Gemini response did not include a video');
|
||||
}
|
||||
|
||||
const video = generatedVideos[0].video;
|
||||
if ( ! video ) {
|
||||
throw new Error('Gemini response video entry was empty');
|
||||
}
|
||||
|
||||
const resTier = is4K ? ':4k' : is1080p && selectedModel.costs?.['per-second-1080p'] ? ':1080p' : '';
|
||||
const usageKey = `gemini:${selectedModel.id}${resTier}`;
|
||||
await this.#meteringService.incrementUsage(actor, usageKey, durationSeconds, costInMicroCents);
|
||||
|
||||
if ( video.uri ) {
|
||||
return new TypedValue({
|
||||
$: 'string:url:web',
|
||||
content_type: 'video',
|
||||
}, video.uri);
|
||||
}
|
||||
|
||||
if ( video.videoBytes ) {
|
||||
const mimeType = video.mimeType ?? 'video/mp4';
|
||||
const dataUri = `data:${mimeType};base64,${video.videoBytes}`;
|
||||
return new TypedValue({
|
||||
$: 'string:url:data',
|
||||
content_type: 'video',
|
||||
}, dataUri);
|
||||
}
|
||||
|
||||
throw new Error('Gemini video response contained neither uri nor videoBytes');
|
||||
}
|
||||
|
||||
async #pollUntilComplete (operation: GenerateVideosOperation): Promise<GenerateVideosOperation> {
|
||||
let op = operation;
|
||||
const start = Date.now();
|
||||
|
||||
while ( !op.done ) {
|
||||
if ( Date.now() - start > DEFAULT_TIMEOUT_MS ) {
|
||||
throw new Error('Timed out waiting for Gemini video generation to complete');
|
||||
}
|
||||
|
||||
await this.#delay(POLL_INTERVAL_MS);
|
||||
op = await this.#client.operations.getVideosOperation({ operation: op });
|
||||
}
|
||||
|
||||
if ( op.error ) {
|
||||
const msg = (op.error as Record<string, unknown>).message ?? JSON.stringify(op.error);
|
||||
throw new Error(`Gemini video generation failed: ${msg}`);
|
||||
}
|
||||
|
||||
return op;
|
||||
}
|
||||
|
||||
#parseImageInput (input: string): { imageBytes: string; mimeType: string } {
|
||||
if ( input.startsWith('data:') ) {
|
||||
const commaIdx = input.indexOf(',');
|
||||
if ( commaIdx !== -1 ) {
|
||||
const header = input.substring(5, commaIdx);
|
||||
if ( header.endsWith(';base64') ) {
|
||||
const mimeType = header.substring(0, header.length - 7);
|
||||
if ( mimeType.length > 0 ) {
|
||||
return { imageBytes: input.substring(commaIdx + 1), mimeType };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return { imageBytes: input, mimeType: 'image/png' };
|
||||
}
|
||||
|
||||
#getModel (requestedModel?: string): IGeminiVideoModel {
|
||||
return GEMINI_VIDEO_GENERATION_MODELS.find(m => m.id === requestedModel)
|
||||
?? GEMINI_VIDEO_GENERATION_MODELS[0];
|
||||
}
|
||||
|
||||
#resolveAspectAndResolution (
|
||||
size: string | undefined,
|
||||
model: IGeminiVideoModel,
|
||||
): { aspectRatio: string; videoResolution: string | undefined } {
|
||||
if ( size && DIMENSION_MAP[size] ) {
|
||||
return {
|
||||
aspectRatio: DIMENSION_MAP[size].aspectRatio,
|
||||
videoResolution: DIMENSION_MAP[size].resolution,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
aspectRatio: model.aspectRatios[0],
|
||||
videoResolution: model.resolutions[0],
|
||||
};
|
||||
}
|
||||
|
||||
#coercePositiveInteger (value: unknown): number | undefined {
|
||||
if ( typeof value === 'number' && Number.isFinite(value) ) {
|
||||
const rounded = Math.round(value);
|
||||
return rounded > 0 ? rounded : undefined;
|
||||
}
|
||||
if ( typeof value === 'string' ) {
|
||||
const numeric = Number.parseInt(value, 10);
|
||||
return Number.isFinite(numeric) && numeric > 0 ? numeric : undefined;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async #delay (ms: number): Promise<void> {
|
||||
return await new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import { IVideoModel } from '../types.js';
|
||||
|
||||
export interface IGeminiVideoModel extends IVideoModel {
|
||||
aspectRatios: string[];
|
||||
resolutions: string[];
|
||||
supportsImageInput: boolean;
|
||||
supportsReferenceImages: boolean;
|
||||
}
|
||||
|
||||
// Dimension strings used by the service layer for validation.
|
||||
const STANDARD_DIMENSIONS = ['1280x720', '720x1280', '1920x1080', '1080x1920'];
|
||||
const DIMENSIONS_WITH_4K = [...STANDARD_DIMENSIONS, '3840x2160', '2160x3840'];
|
||||
|
||||
// https://ai.google.dev/gemini-api/docs/video
|
||||
// https://ai.google.dev/gemini-api/docs/pricing
|
||||
export const GEMINI_VIDEO_GENERATION_MODELS: IGeminiVideoModel[] = [
|
||||
{
|
||||
puterId: 'google:google/veo-2.0',
|
||||
id: 'veo-2.0-generate-001',
|
||||
name: 'Veo 2.0',
|
||||
costs_currency: 'usd-cents',
|
||||
costs: { 'per-second': 35 },
|
||||
output_cost_key: 'per-second',
|
||||
durationSeconds: [5, 6, 8],
|
||||
dimensions: ['1280x720', '720x1280'],
|
||||
aspectRatios: ['16:9', '9:16'],
|
||||
resolutions: [],
|
||||
supportsImageInput: true,
|
||||
supportsReferenceImages: false,
|
||||
},
|
||||
{
|
||||
puterId: 'google:google/veo-3.0',
|
||||
id: 'veo-3.0-generate-001',
|
||||
name: 'Veo 3.0',
|
||||
costs_currency: 'usd-cents',
|
||||
costs: { 'per-second': 40 },
|
||||
output_cost_key: 'per-second',
|
||||
durationSeconds: [4, 6, 8],
|
||||
dimensions: STANDARD_DIMENSIONS,
|
||||
aspectRatios: ['16:9', '9:16'],
|
||||
resolutions: ['720p', '1080p'],
|
||||
supportsImageInput: true,
|
||||
supportsReferenceImages: false,
|
||||
},
|
||||
{
|
||||
puterId: 'google:google/veo-3.0-fast',
|
||||
id: 'veo-3.0-fast-generate-001',
|
||||
name: 'Veo 3.0 Fast',
|
||||
costs_currency: 'usd-cents',
|
||||
costs: { 'per-second': 15 },
|
||||
output_cost_key: 'per-second',
|
||||
durationSeconds: [4, 6, 8],
|
||||
dimensions: STANDARD_DIMENSIONS,
|
||||
aspectRatios: ['16:9', '9:16'],
|
||||
resolutions: ['720p', '1080p'],
|
||||
supportsImageInput: true,
|
||||
supportsReferenceImages: false,
|
||||
},
|
||||
{
|
||||
puterId: 'google:google/veo-3.1',
|
||||
id: 'veo-3.1-generate-preview',
|
||||
name: 'Veo 3.1',
|
||||
costs_currency: 'usd-cents',
|
||||
costs: { 'per-second': 40, 'per-second-4k': 60 },
|
||||
output_cost_key: 'per-second',
|
||||
durationSeconds: [4, 6, 8],
|
||||
dimensions: DIMENSIONS_WITH_4K,
|
||||
aspectRatios: ['16:9', '9:16'],
|
||||
resolutions: ['720p', '1080p', '4k'],
|
||||
supportsImageInput: true,
|
||||
supportsReferenceImages: true,
|
||||
},
|
||||
{
|
||||
puterId: 'google:google/veo-3.1-fast',
|
||||
id: 'veo-3.1-fast-generate-preview',
|
||||
name: 'Veo 3.1 Fast',
|
||||
costs_currency: 'usd-cents',
|
||||
costs: { 'per-second': 15, 'per-second-4k': 35 },
|
||||
output_cost_key: 'per-second',
|
||||
durationSeconds: [4, 6, 8],
|
||||
dimensions: DIMENSIONS_WITH_4K,
|
||||
aspectRatios: ['16:9', '9:16'],
|
||||
resolutions: ['720p', '1080p', '4k'],
|
||||
supportsImageInput: true,
|
||||
supportsReferenceImages: true,
|
||||
},
|
||||
{
|
||||
puterId: 'google:google/veo-3.1-lite',
|
||||
id: 'veo-3.1-lite-generate-preview',
|
||||
name: 'Veo 3.1 Lite',
|
||||
costs_currency: 'usd-cents',
|
||||
costs: { 'per-second': 5, 'per-second-1080p': 8 },
|
||||
output_cost_key: 'per-second',
|
||||
durationSeconds: [4, 6, 8],
|
||||
dimensions: STANDARD_DIMENSIONS,
|
||||
aspectRatios: ['16:9', '9:16'],
|
||||
resolutions: ['720p', '1080p'],
|
||||
supportsImageInput: true,
|
||||
supportsReferenceImages: false,
|
||||
},
|
||||
];
|
||||
@@ -60,6 +60,7 @@ export interface IGenerateVideoParams {
|
||||
negative_prompt?: string;
|
||||
reference_images?: string[];
|
||||
frame_images?: object[];
|
||||
last_frame?: string;
|
||||
metadata?: object;
|
||||
input_reference?: unknown;
|
||||
no_extra_params?: boolean;
|
||||
|
||||
@@ -49,14 +49,14 @@ For more details, see the [OpenAI API reference](https://platform.openai.com/doc
|
||||
|
||||
#### Gemini Options
|
||||
|
||||
Available when `provider: 'gemini'` or inferred from model (`gemini-2.5-flash-image-preview`, `gemini-3-pro-image-preview`):
|
||||
Available when `provider: 'gemini'` or inferred from model:
|
||||
|
||||
| Option | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `model` | `String` | Image model to use. |
|
||||
| `ratio` | `Object` | Currently only `{ w: 1024, h: 1024 }` is supported |
|
||||
| `input_image` | `String` | Base64 encoded input image for image-to-image generation |
|
||||
| `input_image_mime_type` | `String` | MIME type of the input image. Options: `'image/png'`, `'image/jpeg'`, `'image/jpg'`, `'image/webp'` |
|
||||
| `ratio` | `Object` | Aspect ratio as `{ w, h }` (e.g., `{ w: 16, h: 9 }`). |
|
||||
| `quality` | `String` | Output size tier: `'512'`, `'1K'`, `'2K'`, `'4K'` (availability varies by model) |
|
||||
| `input_images` | `Array<String>` | Base64 input images for image-to-image (Gemini models only) |
|
||||
|
||||
#### xAI (Grok) Options
|
||||
|
||||
|
||||
@@ -31,14 +31,13 @@ Additional settings for the generation request. Available options depend on the
|
||||
| Option | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `prompt` | `String` | Text description for the video generation |
|
||||
| `provider` | `String` | The AI provider to use. `'openai' (default) \| 'together'` |
|
||||
| `model` | `String` | Video model to use (provider-specific). Defaults to `'sora-2'` |
|
||||
| `seconds` | `Number` | Target clip length in seconds |
|
||||
| `test_mode` | `Boolean` | When `true`, returns a sample video without using credits |
|
||||
|
||||
#### OpenAI Options
|
||||
|
||||
Available when `provider: 'openai'` or inferred from model (`sora-2`, `sora-2-pro`):
|
||||
Available when using model `sora-2` or `sora-2-pro`:
|
||||
|
||||
| Option | Type | Description |
|
||||
|--------|------|-------------|
|
||||
@@ -49,9 +48,25 @@ Available when `provider: 'openai'` or inferred from model (`sora-2`, `sora-2-pr
|
||||
|
||||
For more details about each option, see the [OpenAI API reference](https://platform.openai.com/docs/api-reference/videos/create).
|
||||
|
||||
#### Google (Veo) Options
|
||||
|
||||
Available when using a Veo model (`veo-2.0-generate-001`, `veo-3.0-generate-001`, `veo-3.1-generate-preview`, etc.):
|
||||
|
||||
| Option | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `model` | `String` | Video model to use. Available: `'veo-2.0-generate-001'`, `'veo-3.0-generate-001'`, `'veo-3.0-fast-generate-001'`, `'veo-3.1-generate-preview'`, `'veo-3.1-fast-generate-preview'`, `'veo-3.1-lite-generate-preview'` |
|
||||
| `seconds` | `Number` | Target clip length in seconds. Veo 2.0: `5`, `6`, `8`. Veo 3.x: `4`, `6`, `8`. Note: 1080p and 4K output require `seconds: 8` |
|
||||
| `size` | `String` | Output dimensions (e.g., `'1280x720'`, `'1920x1080'`, `'3840x2160'`). `resolution` is an alias. 4K sizes only available on Veo 3.1 models |
|
||||
| `negative_prompt` | `String` | Text describing what to avoid in the video |
|
||||
| `input_reference` | `String` | Base64 image used as the first frame (image-to-video). |
|
||||
| `reference_images` | `Array<String>` | Up to 3 base64 images used as style/asset references. Supported on Veo 3.1 models only |
|
||||
| `last_frame` | `String` | Base64 image used as the last frame |
|
||||
|
||||
For more details, see the [Google Veo API reference](https://ai.google.dev/gemini-api/docs/video).
|
||||
|
||||
#### TogetherAI Options
|
||||
|
||||
Available when `provider: 'together'` or inferred from model:
|
||||
Available when using a TogetherAI model:
|
||||
|
||||
| Option | Type | Description |
|
||||
|--------|------|-------------|
|
||||
@@ -76,7 +91,7 @@ Any properties not set fall back to provider defaults.
|
||||
|
||||
A `Promise` that resolves to an `HTMLVideoElement`. The element is preloaded, has `controls` enabled, and exposes metadata via `data-mime-type` and `data-source` attributes. Append it to the DOM to display the generated clip immediately.
|
||||
|
||||
> **Note:** Real Sora renders can take a couple of minutes to complete. The returned promise resolves only when the MP4 is ready, so keep your UI responsive (for example, by showing a spinner) while you wait. Each successful generation consumes the user’s AI credits in accordance with the model, duration, and resolution you request.
|
||||
> **Note:** Video generation can take several minutes to complete. The returned promise resolves only when the video is ready, so keep your UI responsive (for example, by showing a spinner) while you wait. Each successful generation consumes the user’s AI credits in accordance with the model, duration, and resolution you request.
|
||||
|
||||
## Examples
|
||||
|
||||
|
||||
Reference in New Issue
Block a user