tests: add unit tests for TTSDriver (#3064)

Adds offline TTSDriver.test.ts covering provider selection/dispatch (args.provider, legacy driverAlias via Context.driverName, registered fallback), voice/format param validation, error mapping, and metering propagation (provider failures must not bill). Mocks each provider's SDK / fetch boundary against a real PuterServer wired with credentials for every TTS provider. Closes #2995 Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 20:31:16 +00:00 · 2026-05-10 14:27:08 -07:00
parent b4fad80a8c
commit c101fe74ab
1 changed files with 387 additions and 0 deletions
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Offline unit tests for TTSDriver.
+ *
+ * Boots a real PuterServer (in-memory sqlite + dynamo + s3 + mock
+ * redis) with API credentials for every TTS provider so the driver
+ * registers and indexes them all. Then drives `server.drivers.aiTts`
+ * directly. Provider SDKs and global `fetch` are mocked at their
+ * network boundaries so the driver's routing and dispatch logic runs
+ * without real egress. Aligns with AGENTS.md: "Prefer test server
+ * over mocking deps."
+ */
+
+import {
+    afterAll,
+    afterEach,
+    beforeAll,
+    beforeEach,
+    describe,
+    expect,
+    it,
+    vi,
+    type MockInstance,
+} from 'vitest';
+
+import { runWithContext } from '../../core/context.js';
+import { SYSTEM_ACTOR } from '../../core/actor.js';
+import { PuterServer } from '../../server.js';
+import { setupTestServer } from '../../testUtil.js';
+import type { TTSDriver } from './TTSDriver.js';
+
+// ── SDK mocks ──────────────────────────────────────────────────────
+//
+// These boot during PuterServer.start() since each provider's
+// constructor instantiates its SDK. The driver-level tests only care
+// about which provider the driver dispatched to, so each `synthesize`
+// mock resolves to a sentinel value that callers inspect.
+
+const { openaiSpeechCreateMock } = vi.hoisted(() => ({
+    openaiSpeechCreateMock: vi.fn(),
+}));
+
+vi.mock('openai', () => {
+    const OpenAICtor = vi.fn().mockImplementation(function (
+        this: Record<string, unknown>,
+    ) {
+        this.audio = { speech: { create: openaiSpeechCreateMock } };
+        this.chat = { completions: { create: vi.fn() } };
+        this.images = { generate: vi.fn() };
+    });
+    // Mirror the dual-shape contract (default-as-constructor +
+    // default.OpenAI for sibling chat providers).
+    (OpenAICtor as unknown as { OpenAI: unknown }).OpenAI = OpenAICtor;
+    return { OpenAI: OpenAICtor, default: OpenAICtor };
+});
+
+const { geminiGenerateContentMock } = vi.hoisted(() => ({
+    geminiGenerateContentMock: vi.fn(),
+}));
+
+vi.mock('@google/genai', () => {
+    const GoogleGenAI = vi.fn().mockImplementation(function (
+        this: Record<string, unknown>,
+    ) {
+        this.models = {
+            generateContent: geminiGenerateContentMock,
+            generateImages: vi.fn(),
+        };
+        this.operations = { getVideosOperation: vi.fn() };
+    });
+    return { GoogleGenAI };
+});
+
+const { pollySendMock } = vi.hoisted(() => ({
+    pollySendMock: vi.fn(),
+}));
+
+vi.mock('@aws-sdk/client-polly', async () => {
+    const actual =
+        await vi.importActual<typeof import('@aws-sdk/client-polly')>(
+            '@aws-sdk/client-polly',
+        );
+    return {
+        ...actual,
+        PollyClient: vi.fn().mockImplementation(function (
+            this: Record<string, unknown>,
+        ) {
+            this.send = pollySendMock;
+        }),
+    };
+});
+
+// ── Test harness ────────────────────────────────────────────────────
+
+let server: PuterServer;
+let driver: TTSDriver;
+let fetchSpy: MockInstance<typeof fetch>;
+
+beforeAll(async () => {
+    server = await setupTestServer({
+        providers: {
+            'openai-tts': { apiKey: 'oai-key' },
+            elevenlabs: { apiKey: 'el-key' },
+            'aws-polly': {
+                aws: {
+                    access_key: 'AKIA-TEST',
+                    secret_key: 'secret',
+                    region: 'us-west-2',
+                },
+            },
+            gemini: { apiKey: 'gem-key' },
+            xai: { apiKey: 'xai-key' },
+        },
+    } as never);
+    driver = server.drivers.aiTts as unknown as TTSDriver;
+});
+
+afterAll(async () => {
+    await server?.shutdown();
+});
+
+beforeEach(() => {
+    openaiSpeechCreateMock.mockReset();
+    geminiGenerateContentMock.mockReset();
+    pollySendMock.mockReset();
+    fetchSpy = vi.spyOn(globalThis, 'fetch') as MockInstance<typeof fetch>;
+});
+
+afterEach(() => {
+    vi.restoreAllMocks();
+});
+
+const withActor = <T>(fn: () => T | Promise<T>): Promise<T> =>
+    Promise.resolve(runWithContext({ actor: SYSTEM_ACTOR }, fn));
+
+const withDriverName = <T>(driverName: string, fn: () => T | Promise<T>) =>
+    Promise.resolve(runWithContext({ actor: SYSTEM_ACTOR, driverName }, fn));
+
+const openaiAudioResponse = () => ({
+    arrayBuffer: async () =>
+        new Uint8Array(Buffer.from('mp3-bytes')).buffer as ArrayBuffer,
+});
+
+// Polly's DescribeVoices needs a non-empty Voices list so the
+// engine-default-voice resolver finds something.
+const pollyDescribeVoices = {
+    Voices: [
+        {
+            Id: 'Joanna',
+            Name: 'Joanna',
+            LanguageCode: 'en-US',
+            LanguageName: 'US English',
+            SupportedEngines: ['standard', 'neural'],
+        },
+    ],
+};
+
+const pollyDispatch = () =>
+    pollySendMock.mockImplementation((cmd: { constructor: { name: string } }) => {
+        if (cmd.constructor.name === 'DescribeVoicesCommand') {
+            return Promise.resolve(pollyDescribeVoices);
+        }
+        return Promise.resolve({ AudioStream: 'audio-bytes' });
+    });
+
+// ── Provider registration & list ────────────────────────────────────
+
+describe('TTSDriver provider registration', () => {
+    it('list() returns every provider with credentials wired up', async () => {
+        const names = await driver.list();
+        expect(names.sort()).toEqual([
+            'aws-polly',
+            'elevenlabs',
+            'gemini',
+            'openai',
+            'xai',
+        ]);
+    });
+});
+
+// ── Authentication ──────────────────────────────────────────────────
+
+describe('TTSDriver.synthesize authentication', () => {
+    it('throws 401 when no actor is on the request context', async () => {
+        await expect(
+            driver.synthesize({ text: 'hi' } as never),
+        ).rejects.toMatchObject({ statusCode: 401 });
+    });
+});
+
+// ── Provider routing ────────────────────────────────────────────────
+
+describe('TTSDriver.synthesize provider routing', () => {
+    it('routes via explicit args.provider (openai)', async () => {
+        openaiSpeechCreateMock.mockResolvedValueOnce(openaiAudioResponse());
+
+        await withActor(() =>
+            driver.synthesize({ text: 'hi', provider: 'openai' }),
+        );
+
+        expect(openaiSpeechCreateMock).toHaveBeenCalledTimes(1);
+        // None of the other providers should have been hit.
+        expect(pollySendMock).not.toHaveBeenCalled();
+        expect(geminiGenerateContentMock).not.toHaveBeenCalled();
+        expect(fetchSpy).not.toHaveBeenCalled();
+    });
+
+    it('routes via legacy driverAlias (elevenlabs-tts → elevenlabs)', async () => {
+        fetchSpy.mockResolvedValueOnce(
+            new Response('audio', {
+                status: 200,
+                headers: { 'content-type': 'audio/mpeg' },
+            }),
+        );
+
+        await withDriverName('elevenlabs-tts', () =>
+            driver.synthesize({ text: 'hi' }),
+        );
+
+        // ElevenLabs uses fetch — verify it hit the right URL.
+        expect(fetchSpy).toHaveBeenCalledTimes(1);
+        expect(String(fetchSpy.mock.calls[0]![0])).toMatch(
+            /api\.elevenlabs\.io\/v1\/text-to-speech\//,
+        );
+        expect(openaiSpeechCreateMock).not.toHaveBeenCalled();
+    });
+
+    it('routes via legacy driverAlias (aws-polly → aws-polly)', async () => {
+        pollyDispatch();
+
+        await withDriverName('aws-polly', () =>
+            driver.synthesize({ text: 'hi', voice: 'Joanna' }),
+        );
+
+        const synthCalls = pollySendMock.mock.calls.filter(
+            ([cmd]) => cmd.constructor.name === 'SynthesizeSpeechCommand',
+        );
+        expect(synthCalls).toHaveLength(1);
+        expect(openaiSpeechCreateMock).not.toHaveBeenCalled();
+    });
+
+    it('defaults to openai when no provider hint is supplied (preferred first)', async () => {
+        openaiSpeechCreateMock.mockResolvedValueOnce(openaiAudioResponse());
+
+        await withActor(() => driver.synthesize({ text: 'hi' }));
+
+        expect(openaiSpeechCreateMock).toHaveBeenCalledTimes(1);
+    });
+
+    it('throws 400 when the named provider is not registered', async () => {
+        await expect(
+            withActor(() =>
+                driver.synthesize({
+                    text: 'hi',
+                    provider: 'totally-not-a-real-provider',
+                }),
+            ),
+        ).rejects.toMatchObject({ statusCode: 400 });
+    });
+});
+
+// ── list_voices / list_engines aggregation ─────────────────────────
+
+describe('TTSDriver list_voices / list_engines', () => {
+    it('list_voices() aggregates across providers when no filter is supplied', async () => {
+        // Provider listVoices methods that touch the network: ElevenLabs
+        // (fetch) and AWS Polly (DescribeVoices). Wire both up.
+        fetchSpy.mockResolvedValueOnce(
+            new Response(JSON.stringify({ voices: [] }), { status: 200 }),
+        );
+        pollyDispatch();
+
+        const voices = await driver.list_voices();
+        const providers = new Set(voices.map((v) => v.provider));
+        // OpenAI, Gemini, xAI, AWS Polly are all hard-coded catalogs; we
+        // expect those at minimum (ElevenLabs returned an empty list).
+        expect(providers.has('openai')).toBe(true);
+        expect(providers.has('gemini')).toBe(true);
+        expect(providers.has('xai')).toBe(true);
+        expect(providers.has('aws-polly')).toBe(true);
+    });
+
+    it('list_voices({ provider }) filters to a single provider', async () => {
+        const voices = await driver.list_voices({ provider: 'openai' });
+        expect(voices.length).toBeGreaterThan(0);
+        for (const voice of voices) {
+            expect(voice.provider).toBe('openai');
+        }
+    });
+
+    it('list_voices({ provider }) returns empty when the provider is not registered', async () => {
+        const voices = await driver.list_voices({ provider: 'nope' });
+        expect(voices).toEqual([]);
+    });
+
+    it('list_engines() aggregates engines across providers and namespaces them', async () => {
+        const engines = await driver.list_engines();
+        const ids = engines.map((e) => e.id);
+        // Some signature engines from each provider.
+        expect(ids).toEqual(
+            expect.arrayContaining([
+                'gpt-4o-mini-tts', // openai
+                'eleven_multilingual_v2', // elevenlabs
+                'gemini-2.5-flash-preview-tts',
+                'xai-tts',
+                'standard', // aws-polly
+            ]),
+        );
+    });
+
+    it('list_engines({ provider }) filters to a single provider', async () => {
+        const engines = await driver.list_engines({ provider: 'xai' });
+        expect(engines).toHaveLength(1);
+        expect(engines[0].provider).toBe('xai');
+    });
+});
+
+// ── getReportedCosts aggregation ────────────────────────────────────
+
+describe('TTSDriver.getReportedCosts', () => {
+    it('aggregates per-provider cost catalogs into one list', () => {
+        const reported = driver.getReportedCosts() as Array<{
+            usageType: string;
+            source: string;
+        }>;
+        const sources = new Set(reported.map((r) => r.source));
+        // Each provider's getReportedCosts() emits its own source string.
+        expect(sources).toEqual(
+            new Set([
+                'driver:aiTts/openai',
+                'driver:aiTts/elevenlabs',
+                'driver:aiTts/aws-polly',
+                'driver:aiTts/gemini',
+                'driver:aiTts/xai',
+            ]),
+        );
+    });
+});
+
+// ── Provider error mapping ──────────────────────────────────────────
+
+describe('TTSDriver.synthesize error mapping', () => {
+    it('passes through HttpError (400) from a provider with the same status code', async () => {
+        await expect(
+            withActor(() =>
+                driver.synthesize({
+                    text: 'hi',
+                    provider: 'openai',
+                    model: 'definitely-not-a-real-model',
+                }),
+            ),
+        ).rejects.toMatchObject({ statusCode: 400 });
+    });
+
+    it('does not meter when the dispatched provider throws an SDK error', async () => {
+        const incrementUsageSpy = vi.spyOn(
+            server.services.metering,
+            'incrementUsage',
+        );
+        openaiSpeechCreateMock.mockRejectedValueOnce(new Error('upstream blew up'));
+
+        await expect(
+            withActor(() =>
+                driver.synthesize({ text: 'hi', provider: 'openai' }),
+            ),
+        ).rejects.toThrow('upstream blew up');
+        expect(incrementUsageSpy).not.toHaveBeenCalled();
+    });
+});