Add Gemini TTS provider and integrate client/docs (#2889)

Introduce a new Gemini TTS provider and wire it through the driver, client, docs, and examples. Adds src/backend/drivers/ai-tts/providers/gemini/GeminiTTSProvider.ts (Google GenAI client usage, PCM->WAV wrapping, metering, model/voice validation) and a costs table in providers/gemini/costs.ts. Registers the provider in TTSDriver, exposes the alias gemini-tts, and prefers it in provider selection. Updates puter-js client to recognize "gemini" provider/engine and route driver calls to gemini-tts. Documentation updated with Gemini options, a usage example, and a playground example HTML file.
This commit is contained in:
Nariman Jelveh
2026-05-03 09:33:50 -07:00
committed by GitHub
parent cb27fd1f98
commit 33d5c0737e
7 changed files with 543 additions and 13 deletions
+35 -2
View File
@@ -23,6 +23,7 @@ import type { DriverStreamResult } from '../meta.js';
import { PuterDriver } from '../types.js';
import { AWSPollyTTSProvider } from './providers/awsPolly/AWSPollyTTSProvider.js';
import { ElevenLabsTTSProvider } from './providers/elevenlabs/ElevenLabsTTSProvider.js';
import { GeminiTTSProvider } from './providers/gemini/GeminiTTSProvider.js';
import { OpenAITTSProvider } from './providers/openai/OpenAITTSProvider.js';
import type {
ISynthesizeArgs,
@@ -43,12 +44,18 @@ import type {
// than passing `{ provider }` in args, so alias the unified driver under
// the names the client expects. `#providerFromAlias` normalizes those
// aliases to the internal provider keys used by `#providers`.
const TTS_ALIASES = ['aws-polly', 'openai-tts', 'elevenlabs-tts'] as const;
const TTS_ALIASES = [
'aws-polly',
'openai-tts',
'elevenlabs-tts',
'gemini-tts',
] as const;
type TTSAlias = (typeof TTS_ALIASES)[number];
const ALIAS_TO_PROVIDER: Record<TTSAlias, string> = {
'aws-polly': 'aws-polly',
'openai-tts': 'openai',
'elevenlabs-tts': 'elevenlabs',
'gemini-tts': 'gemini',
};
export class TTSDriver extends PuterDriver {
@@ -247,14 +254,40 @@ export class TTSDriver extends PuterDriver {
);
}
}
this.#registerGeminiProvider(providers);
}
#registerGeminiProvider(providers: Record<string, unknown>) {
const m = this.services.metering;
const gemini = (providers['gemini'] ?? providers['gemini-tts']) as
| Record<string, unknown>
| undefined;
const geminiKey =
(gemini?.apiKey as string | undefined) ??
(gemini?.api_key as string | undefined) ??
(gemini?.key as string | undefined);
if (geminiKey) {
try {
this.#providers['gemini'] = new GeminiTTSProvider(m, {
apiKey: geminiKey,
});
} catch (e) {
console.warn(
'[TTSDriver] Failed to init Gemini TTS provider:',
(e as Error).message,
);
}
}
}
#getDefaultProviderName(): string | null {
const names = Object.keys(this.#providers);
if (names.length === 0) return null;
// Prefer openai, then elevenlabs, then aws-polly
// Prefer openai, then elevenlabs, then gemini, then aws-polly
if (this.#providers['openai']) return 'openai';
if (this.#providers['elevenlabs']) return 'elevenlabs';
if (this.#providers['gemini']) return 'gemini';
return names[0];
}
}
@@ -0,0 +1,361 @@
/**
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import { GoogleGenAI } from '@google/genai';
import { Readable } from 'node:stream';
import { HttpError } from '../../../../core/http/HttpError.js';
import { Context } from '../../../../core/context.js';
import type { MeteringService } from '../../../../services/metering/MeteringService.js';
import type { DriverStreamResult } from '../../../meta.js';
import type { ITTSVoice, ITTSEngine, ISynthesizeArgs } from '../../types.js';
import { TTSProvider } from '../TTSProvider.js';
import { GEMINI_TTS_COSTS } from './costs.js';
const DEFAULT_MODEL = 'gemini-2.5-flash-preview-tts';
const DEFAULT_VOICE = 'Kore';
const SAMPLE_AUDIO_URL = 'https://puter-sample-data.puter.site/tts_example.mp3';
const GEMINI_TTS_MODELS = [
{
id: 'gemini-2.5-flash-preview-tts',
name: 'Gemini 2.5 Flash TTS',
},
{
id: 'gemini-2.5-pro-preview-tts',
name: 'Gemini 2.5 Pro TTS',
},
{
id: 'gemini-3.1-flash-tts-preview',
name: 'Gemini 3.1 Flash TTS',
},
];
const GEMINI_TTS_VOICES = [
{ id: 'Zephyr', name: 'Zephyr', description: 'Bright' },
{ id: 'Puck', name: 'Puck', description: 'Upbeat' },
{ id: 'Charon', name: 'Charon', description: 'Informative' },
{ id: 'Kore', name: 'Kore', description: 'Firm' },
{ id: 'Fenrir', name: 'Fenrir', description: 'Excitable' },
{ id: 'Leda', name: 'Leda', description: 'Youthful' },
{ id: 'Orus', name: 'Orus', description: 'Firm' },
{ id: 'Aoede', name: 'Aoede', description: 'Breezy' },
{ id: 'Callirrhoe', name: 'Callirrhoe', description: 'Easy-going' },
{ id: 'Autonoe', name: 'Autonoe', description: 'Bright' },
{ id: 'Enceladus', name: 'Enceladus', description: 'Breathy' },
{ id: 'Iapetus', name: 'Iapetus', description: 'Clear' },
{ id: 'Umbriel', name: 'Umbriel', description: 'Easy-going' },
{ id: 'Algieba', name: 'Algieba', description: 'Smooth' },
{ id: 'Despina', name: 'Despina', description: 'Smooth' },
{ id: 'Erinome', name: 'Erinome', description: 'Clear' },
{ id: 'Algenib', name: 'Algenib', description: 'Gravelly' },
{ id: 'Rasalgethi', name: 'Rasalgethi', description: 'Informative' },
{ id: 'Laomedeia', name: 'Laomedeia', description: 'Upbeat' },
{ id: 'Achernar', name: 'Achernar', description: 'Soft' },
{ id: 'Alnilam', name: 'Alnilam', description: 'Firm' },
{ id: 'Schedar', name: 'Schedar', description: 'Even' },
{ id: 'Gacrux', name: 'Gacrux', description: 'Mature' },
{ id: 'Pulcherrima', name: 'Pulcherrima', description: 'Forward' },
{ id: 'Achird', name: 'Achird', description: 'Friendly' },
{ id: 'Zubenelgenubi', name: 'Zubenelgenubi', description: 'Casual' },
{ id: 'Vindemiatrix', name: 'Vindemiatrix', description: 'Gentle' },
{ id: 'Sadachbia', name: 'Sadachbia', description: 'Lively' },
{ id: 'Sadaltager', name: 'Sadaltager', description: 'Knowledgeable' },
{ id: 'Sulafat', name: 'Sulafat', description: 'Warm' },
];
/**
* Gemini TTS provider. Calls the Gemini generateContent API with
* `responseModalities: ["AUDIO"]` and `speechConfig` to synthesize speech.
* Returns raw PCM audio wrapped in a WAV container.
*/
export class GeminiTTSProvider extends TTSProvider {
readonly providerName = 'gemini';
#client: GoogleGenAI;
constructor(meteringService: MeteringService, config: { apiKey: string }) {
super(meteringService, config);
if (!config.apiKey) {
throw new Error('Gemini TTS requires an API key');
}
this.#client = new GoogleGenAI({ apiKey: config.apiKey });
}
async listVoices(): Promise<ITTSVoice[]> {
return GEMINI_TTS_VOICES.map((voice) => ({
id: voice.id,
name: voice.name,
description: voice.description,
provider: 'gemini',
supported_models: GEMINI_TTS_MODELS.map((m) => m.id),
}));
}
async listEngines(): Promise<ITTSEngine[]> {
return GEMINI_TTS_MODELS.map((model) => ({
id: model.id,
name: model.name,
provider: 'gemini',
}));
}
override getReportedCosts(): Record<string, unknown>[] {
return Object.entries(GEMINI_TTS_COSTS).map(([model, costs]) => ({
usageType: `gemini:${model}:tts`,
ucentsInputPerToken: this.#toMicroCents(costs.input / 1_000_000),
ucentsOutputAudioPerToken: this.#toMicroCents(
costs.output_audio / 1_000_000,
),
unit: 'token',
source: 'driver:aiTts/gemini',
}));
}
async synthesize(
args: ISynthesizeArgs,
): Promise<DriverStreamResult | { url: string; content_type: string }> {
const {
text,
voice: voiceArg,
model: modelArg,
instructions,
test_mode,
} = args;
if (test_mode) {
return { url: SAMPLE_AUDIO_URL, content_type: 'audio' };
}
if (typeof text !== 'string' || !text.trim()) {
throw new HttpError(400, 'Missing required field: text', {
legacyCode: 'field_required',
fields: { key: 'text' },
});
}
const model = modelArg || DEFAULT_MODEL;
if (!GEMINI_TTS_MODELS.find(({ id }) => id === model)) {
throw new HttpError(
400,
`Invalid model: ${model}. Expected: ${GEMINI_TTS_MODELS.map(({ id }) => id).join(', ')}`,
{
legacyCode: 'field_invalid',
fields: {
key: 'model',
expected: GEMINI_TTS_MODELS.map(({ id }) => id).join(
', ',
),
got: model,
},
},
);
}
const voice = voiceArg || DEFAULT_VOICE;
if (
!GEMINI_TTS_VOICES.find(
({ id }) => id.toLowerCase() === voice.toLowerCase(),
)
) {
throw new HttpError(
400,
`Invalid voice: ${voice}. Expected: ${GEMINI_TTS_VOICES.map(({ id }) => id).join(', ')}`,
{
legacyCode: 'field_invalid',
fields: {
key: 'voice',
expected: GEMINI_TTS_VOICES.map(({ id }) => id).join(
', ',
),
got: voice,
},
},
);
}
const actor = Context.get('actor')!;
const costs = GEMINI_TTS_COSTS[model];
if (!costs) {
throw new HttpError(500, `No cost data for model: ${model}`);
}
// Estimate input tokens (~4 chars per token) and a rough output
// audio duration (~150 words/min, 25 tokens/sec).
const estimatedInputTokens = Math.max(1, Math.ceil(text.length / 4));
const wordCount = text.split(/\s+/).length;
const estimatedDurationSec = Math.max(1, (wordCount / 150) * 60);
const estimatedOutputTokens = Math.ceil(estimatedDurationSec * 25);
const estimatedInputCostCents =
(estimatedInputTokens / 1_000_000) * costs.input;
const estimatedOutputCostCents =
(estimatedOutputTokens / 1_000_000) * costs.output_audio;
const estimatedTotalMicroCents = this.#toMicroCents(
estimatedInputCostCents + estimatedOutputCostCents,
);
const usageAllowed = await this.meteringService.hasEnoughCredits(
actor,
estimatedTotalMicroCents,
);
if (!usageAllowed) {
throw new HttpError(402, 'Insufficient funds', {
legacyCode: 'insufficient_funds',
});
}
// The TTS models require the text to be framed as a transcript
// to read aloud. Prefixing with "Say:" prevents the model from
// trying to generate conversational text instead of audio.
const inputText = instructions
? `${instructions}\n\nSay the following text aloud:\n${text}`
: `Say the following text aloud:\n${text}`;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let response: any;
try {
response = await this.#client.models.generateContent({
model,
contents: [{ parts: [{ text: inputText }] }],
config: {
responseModalities: ['AUDIO'],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: { voiceName: voice },
},
},
},
});
} catch (e: unknown) {
const msg = (e as Error).message ?? String(e);
console.error('[GeminiTTSProvider] API error:', msg);
throw new HttpError(502, `Gemini TTS API error: ${msg}`, {
fields: { provider: 'gemini' },
});
}
// Extract audio data from response
const part = response?.candidates?.[0]?.content?.parts?.[0];
if (!part?.inlineData?.data) {
throw new HttpError(502, 'Gemini TTS did not return audio data', {
fields: { provider: 'gemini' },
});
}
const audioBase64: string = part.inlineData.data;
const mimeType: string =
part.inlineData.mimeType || 'audio/L16;rate=24000';
// Convert base64 PCM to a WAV buffer for broad client compatibility
const pcmBuffer = Buffer.from(audioBase64, 'base64');
let outputBuffer: Buffer;
let contentType: string;
if (mimeType.startsWith('audio/L16') || mimeType === 'audio/pcm') {
// Wrap raw PCM (16-bit LE, 24kHz, mono) in a WAV container
outputBuffer = this.#wrapPcmInWav(pcmBuffer, 24000, 1, 16);
contentType = 'audio/wav';
} else {
// If the API returns encoded audio (unlikely today), pass through
outputBuffer = pcmBuffer;
contentType = mimeType;
}
// Meter actual usage from response metadata
const usage = response.usageMetadata;
const actualInputTokens =
typeof usage?.promptTokenCount === 'number'
? usage.promptTokenCount
: estimatedInputTokens;
const actualOutputTokens =
typeof usage?.candidatesTokenCount === 'number'
? usage.candidatesTokenCount
: estimatedOutputTokens;
const inputCostCents = (actualInputTokens / 1_000_000) * costs.input;
const outputCostCents =
(actualOutputTokens / 1_000_000) * costs.output_audio;
const usagePrefix = `gemini:${model}`;
this.meteringService.batchIncrementUsages(actor, [
{
usageType: `${usagePrefix}:input`,
usageAmount: Math.max(actualInputTokens, 1),
costOverride: this.#toMicroCents(inputCostCents),
},
{
usageType: `${usagePrefix}:output:audio`,
usageAmount: Math.max(actualOutputTokens, 1),
costOverride: this.#toMicroCents(outputCostCents),
},
]);
const stream = Readable.from(outputBuffer);
return {
dataType: 'stream',
content_type: contentType,
chunked: true,
stream,
};
}
/**
* Wrap raw PCM samples in a WAV container so browsers can play it.
*/
#wrapPcmInWav(
pcm: Buffer,
sampleRate: number,
channels: number,
bitsPerSample: number,
): Buffer {
const byteRate = (sampleRate * channels * bitsPerSample) / 8;
const blockAlign = (channels * bitsPerSample) / 8;
const dataSize = pcm.length;
const headerSize = 44;
const buffer = Buffer.alloc(headerSize + dataSize);
// RIFF header
buffer.write('RIFF', 0);
buffer.writeUInt32LE(36 + dataSize, 4);
buffer.write('WAVE', 8);
// fmt sub-chunk
buffer.write('fmt ', 12);
buffer.writeUInt32LE(16, 16); // sub-chunk size
buffer.writeUInt16LE(1, 20); // PCM format
buffer.writeUInt16LE(channels, 22);
buffer.writeUInt32LE(sampleRate, 24);
buffer.writeUInt32LE(byteRate, 28);
buffer.writeUInt16LE(blockAlign, 32);
buffer.writeUInt16LE(bitsPerSample, 34);
// data sub-chunk
buffer.write('data', 36);
buffer.writeUInt32LE(dataSize, 40);
pcm.copy(buffer, headerSize);
return buffer;
}
#toMicroCents(cents: number): number {
if (!Number.isFinite(cents) || cents <= 0) return 1;
return Math.ceil(cents * 1_000_000);
}
}
@@ -0,0 +1,44 @@
/**
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// Gemini TTS pricing in USD per 1M tokens:
// gemini-2.5-flash-preview-tts: input $0.50, output (audio) $10.00
// gemini-2.5-pro-preview-tts: input $1.00, output (audio) $20.00
// gemini-3.1-flash-tts-preview: input $1.00, output (audio) $20.00
//
// Audio output tokens = ~25 tokens/second of audio.
//
// Costs here are in USD-cents per 1M tokens for input and output.
export const GEMINI_TTS_COSTS: Record<
string,
{ input: number; output_audio: number }
> = {
'gemini-2.5-flash-preview-tts': {
input: 50, // $0.50 per 1M tokens = 50 cents
output_audio: 1000, // $10.00 per 1M tokens = 1000 cents
},
'gemini-2.5-pro-preview-tts': {
input: 100, // $1.00 per 1M tokens
output_audio: 2000, // $20.00 per 1M tokens
},
'gemini-3.1-flash-tts-preview': {
input: 100, // $1.00 per 1M tokens
output_audio: 2000, // $20.00 per 1M tokens
},
};
+1
View File
@@ -226,6 +226,7 @@ You can see various Puter.js AI features in action from the following examples:
- [Text to Speech with options](/playground/ai-txt2speech-options/)
- [Text to Speech with engines](/playground/ai-txt2speech-engines/)
- [Text to Speech with OpenAI voices](/playground/ai-txt2speech-openai/)
- [Text to Speech with Gemini voices](/playground/ai-txt2speech-gemini/)
- [Transcribe audio with `speech2txt`](/AI/speech2txt/)
- Text to Video
- [Generate a sample Sora clip](/AI/txt2vid/)
+38 -1
View File
@@ -32,7 +32,7 @@ Additional settings for the generation request. Available options depend on the
| Option | Type | Description |
|--------|------|-------------|
| `provider` | `String` | TTS provider to use. `'aws-polly'` (default), `'openai'`, `'elevenlabs'` |
| `provider` | `String` | TTS provider to use. `'aws-polly'` (default), `'openai'`, `'elevenlabs'`, `'gemini'` |
| `model` | `String` | Model identifier (provider-specific) |
| `voice` | `String` | Voice ID used for synthesis (provider-specific) |
| `test_mode` | `Boolean` | When `true`, returns a sample audio without using credits |
@@ -74,6 +74,18 @@ Available when `provider: 'elevenlabs'`:
For more details about each option, see the [ElevenLabs API reference](https://elevenlabs.io/docs/api-reference/text-to-speech).
#### Gemini Options
Available when `provider: 'gemini'`:
| Option | Type | Description |
|--------|------|-------------|
| `model` | `String` | TTS model. Available: `'gemini-2.5-flash-preview-tts'` (default), `'gemini-2.5-pro-preview-tts'`, `'gemini-3.1-flash-tts-preview'` |
| `voice` | `String` | Voice name. Defaults to `'Kore'`. Available: `'Zephyr'`, `'Puck'`, `'Charon'`, `'Kore'`, `'Fenrir'`, `'Leda'`, `'Orus'`, `'Aoede'`, `'Callirrhoe'`, `'Autonoe'`, `'Enceladus'`, `'Iapetus'`, `'Umbriel'`, `'Algieba'`, `'Despina'`, `'Erinome'`, `'Algenib'`, `'Rasalgethi'`, `'Laomedeia'`, `'Achernar'`, `'Alnilam'`, `'Schedar'`, `'Gacrux'`, `'Pulcherrima'`, `'Achird'`, `'Zubenelgenubi'`, `'Vindemiatrix'`, `'Sadachbia'`, `'Sadaltager'`, `'Sulafat'` |
| `instructions` | `String` | Natural language instructions to control speaking style (tone, speed, mood, etc.) |
For more details about Gemini TTS, see the [Google Gemini TTS documentation](https://ai.google.dev/gemini-api/docs/text-to-speech).
## Return value
A `Promise` that resolves to an `HTMLAudioElement`. The elements `src` points at a blob or remote URL containing the synthesized audio.
@@ -171,6 +183,31 @@ A `Promise` that resolves to an `HTMLAudioElement`. The elements `src` points
</html>
```
<strong class="example-title">Use Gemini voices</strong>
```html;ai-txt2speech-gemini
<html>
<body>
<script src="https://js.puter.com/v2/"></script>
<button id="play">Use Gemini voice</button>
<script>
document.getElementById('play').addEventListener('click', async ()=>{
const audio = await puter.ai.txt2speech(
"Hello! This sample uses the Gemini Puck voice.",
{
provider: "gemini",
model: "gemini-2.5-flash-preview-tts",
voice: "Puck",
instructions: "Speak in a friendly, upbeat tone."
}
);
audio.play();
});
</script>
</body>
</html>
```
<strong class="example-title">Compare different engines</strong>
```html;ai-txt2speech-engines
@@ -0,0 +1,20 @@
<html>
<body>
<script src="https://js.puter.com/v2/"></script>
<button id="play">Use Gemini voice</button>
<script>
document.getElementById('play').addEventListener('click', async ()=>{
const audio = await puter.ai.txt2speech(
"Hello! This sample uses the Gemini Puck voice.",
{
provider: "gemini",
model: "gemini-2.5-flash-preview-tts",
voice: "Puck",
instructions: "Speak in a friendly, upbeat tone."
}
);
audio.play();
});
</script>
</body>
</html>
+44 -10
View File
@@ -2,11 +2,12 @@ import * as utils from '../lib/utils.js';
const normalizeTTSProvider = (value) => {
if ( typeof value !== 'string' ) {
return 'aws-polly';
return null;
}
const lower = value.toLowerCase();
if ( lower === 'openai' ) return 'openai';
if ( ['elevenlabs', 'eleven', '11labs', '11-labs', 'eleven-labs', 'elevenlabs-tts'].includes(lower) ) return 'elevenlabs';
if ( ['gemini', 'google', 'gemini-tts', 'google-tts'].includes(lower) ) return 'gemini';
if ( lower === 'aws' || lower === 'polly' || lower === 'aws-polly' ) return 'aws-polly';
return value;
};
@@ -270,6 +271,10 @@ class AI {
provider = 'elevenlabs';
}
if ( options.engine && normalizeTTSProvider(options.engine) === 'gemini' && !options.provider ) {
provider = 'gemini';
}
if ( provider === 'openai' ) {
if ( !options.model && typeof options.engine === 'string' ) {
options.model = options.engine;
@@ -301,6 +306,17 @@ class AI {
options.output_format = options.response_format;
}
delete options.engine;
} else if ( provider === 'gemini' ) {
if ( !options.model && typeof options.engine === 'string' ) {
options.model = options.engine;
}
if ( ! options.voice ) {
options.voice = 'Kore';
}
if ( ! options.model ) {
options.model = 'gemini-2.5-flash-preview-tts';
}
delete options.engine;
} else {
provider = 'aws-polly';
@@ -332,9 +348,12 @@ class AI {
}
}
const driverName = provider === 'openai'
? 'openai-tts'
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
const driverNameMap = {
'openai': 'openai-tts',
'elevenlabs': 'elevenlabs-tts',
'gemini': 'gemini-tts',
};
const driverName = driverNameMap[provider] || 'aws-polly';
return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'synthesize', {
responseType: 'blob',
@@ -574,9 +593,16 @@ class AI {
params.provider = 'elevenlabs';
}
const driverName = provider === 'openai'
? 'openai-tts'
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
if ( provider === 'gemini' ) {
params.provider = 'gemini';
}
const driverNameMap = {
'openai': 'openai-tts',
'elevenlabs': 'elevenlabs-tts',
'gemini': 'gemini-tts',
};
const driverName = driverNameMap[provider] || 'aws-polly';
return await utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_engines', {
responseType: 'text',
@@ -609,9 +635,17 @@ class AI {
params.provider = 'elevenlabs';
}
const driverName = provider === 'openai'
? 'openai-tts'
: (provider === 'elevenlabs' ? 'elevenlabs-tts' : 'aws-polly');
if ( provider === 'gemini' ) {
params.provider = 'gemini';
delete params.engine;
}
const driverNameMap2 = {
'openai': 'openai-tts',
'elevenlabs': 'elevenlabs-tts',
'gemini': 'gemini-tts',
};
const driverName = driverNameMap2[provider] || 'aws-polly';
return utils.make_driver_method(['source'], 'puter-tts', driverName, 'list_voices', {
responseType: 'text',