diff --git a/src/backend/drivers/ai-chat/ChatCompletionDriver.ts b/src/backend/drivers/ai-chat/ChatCompletionDriver.ts index 8690125c7..17ac72101 100644 --- a/src/backend/drivers/ai-chat/ChatCompletionDriver.ts +++ b/src/backend/drivers/ai-chat/ChatCompletionDriver.ts @@ -41,6 +41,7 @@ import { OpenRouterProvider } from './providers/openrouter/OpenRouterProvider.js import { TogetherAIProvider } from './providers/together/TogetherAIProvider.js'; import { XAIProvider } from './providers/xai/XAIProvider.js'; import { ZAIProvider } from './providers/zai/ZAIProvider.js'; +import { AlibabaProvider } from './providers/alibaba/AlibabaProvider.js'; import { MoonshotProvider } from './providers/moonshot/MoonshotProvider.js'; import type { IChatCompleteResult, @@ -751,13 +752,13 @@ export class ChatCompletionDriver extends PuterDriver { ); } - const openrouter = providers['openrouter']; - const openrouterKey = readKey(openrouter); - if (openrouterKey) { - this.#providers['openrouter'] = new OpenRouterProvider( + const alibaba = providers['alibaba']; + const alibabaKey = readKey(alibaba); + if (alibabaKey) { + this.#providers['alibaba'] = new AlibabaProvider( { - apiKey: openrouterKey, - apiBaseUrl: openrouter?.apiBaseUrl as string | undefined, + apiKey: alibabaKey, + apiBaseUrl: alibaba?.apiBaseUrl as string | undefined, }, metering, ); @@ -782,6 +783,18 @@ export class ChatCompletionDriver extends PuterDriver { ); } + const openrouter = providers['openrouter']; + const openrouterKey = readKey(openrouter); + if (openrouterKey) { + this.#providers['openrouter'] = new OpenRouterProvider( + { + apiKey: openrouterKey, + apiBaseUrl: openrouter?.apiBaseUrl as string | undefined, + }, + metering, + ); + } + // Fake provider — always available for testing this.#providers['fake-chat'] = new FakeChatProvider(); } diff --git a/src/backend/drivers/ai-chat/providers/alibaba/AlibabaProvider.integration.test.ts b/src/backend/drivers/ai-chat/providers/alibaba/AlibabaProvider.integration.test.ts new file mode 100644 index 000000000..585fd1563 --- /dev/null +++ b/src/backend/drivers/ai-chat/providers/alibaba/AlibabaProvider.integration.test.ts @@ -0,0 +1,63 @@ +/** + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +/** + * Integration test for the Alibaba provider. + * + * Uses `qwen-turbo` (cheapest generally-available model). Skipped + * when `PUTER_TEST_AI_ALIBABA_API_KEY` is unset. + */ + +import { describe, expect, it } from 'vitest'; +import { + INTEGRATION_TEST_TIMEOUT_MS, + makeMeteringStub, + optionalEnv, + skipUnlessEnv, + withTestActor, +} from '../../../integrationTestUtil.js'; +import { AlibabaProvider } from './AlibabaProvider.js'; + +const ENV_VAR = 'PUTER_TEST_AI_ALIBABA_API_KEY'; + +describe.skipIf(skipUnlessEnv(ENV_VAR))( + 'AlibabaProvider (integration)', + () => { + it('returns a non-empty completion from qwen-turbo', { timeout: INTEGRATION_TEST_TIMEOUT_MS }, async () => { + const provider = new AlibabaProvider( + { apiKey: optionalEnv(ENV_VAR)! }, + makeMeteringStub(), + ); + + const result = await withTestActor(() => + provider.complete({ + model: 'qwen-turbo', + messages: [ + { role: 'user', content: 'Say hi in one word.' }, + ], + max_tokens: 16, + }), + ); + + const text = (result as { message?: { content?: string } }).message + ?.content; + expect(typeof text === 'string' && text.length > 0).toBe(true); + }); + }, +); diff --git a/src/backend/drivers/ai-chat/providers/alibaba/AlibabaProvider.test.ts b/src/backend/drivers/ai-chat/providers/alibaba/AlibabaProvider.test.ts new file mode 100644 index 000000000..5d66e0a2e --- /dev/null +++ b/src/backend/drivers/ai-chat/providers/alibaba/AlibabaProvider.test.ts @@ -0,0 +1,720 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +/** + * Offline unit tests for AlibabaProvider. + * + * Boots a real PuterServer (in-memory sqlite + dynamo + s3 + mock + * redis) and constructs AlibabaProvider directly against the live + * wired `MeteringService` so the recording side is exercised end-to- + * end. Alibaba is OpenAI-compatible so the OpenAI SDK is mocked at + * the module boundary; that's the real network egress point. The + * companion integration test (AlibabaProvider.integration.test.ts) + * exercises the real DashScope endpoint. + */ + +import { Writable } from 'node:stream'; +import { + afterAll, + afterEach, + beforeAll, + beforeEach, + describe, + expect, + it, + vi, + type MockInstance, +} from 'vitest'; + +import { SYSTEM_ACTOR } from '../../../../core/actor.js'; +import type { MeteringService } from '../../../../services/metering/MeteringService.js'; +import { PuterServer } from '../../../../server.js'; +import { setupTestServer } from '../../../../testUtil.js'; +import { withTestActor } from '../../../integrationTestUtil.js'; +import { AIChatStream } from '../../utils/Streaming.js'; +import { ALIBABA_MODELS } from './models.js'; +import { AlibabaProvider } from './AlibabaProvider.js'; + +// ── OpenAI SDK mock ───────────────────────────────────────────────── + +const { createMock, openAICtor } = vi.hoisted(() => { + const createMock = vi.fn(); + const openAICtor = vi.fn(); + return { createMock, openAICtor }; +}); + +vi.mock('openai', () => { + const OpenAICtor = vi.fn().mockImplementation(function ( + this: Record, + opts: unknown, + ) { + openAICtor(opts); + this.chat = { completions: { create: createMock } }; + }); + return { OpenAI: OpenAICtor, default: { OpenAI: OpenAICtor } }; +}); + +// ── Test harness ──────────────────────────────────────────────────── + +let server: PuterServer; +let recordSpy: MockInstance; + +beforeAll(async () => { + server = await setupTestServer(); +}); + +afterAll(async () => { + await server?.shutdown(); +}); + +const makeProvider = (config?: { apiKey?: string; apiBaseUrl?: string }) => { + const provider = new AlibabaProvider( + { apiKey: 'test-key', ...config }, + server.services.metering, + ); + return { provider }; +}; + +const asAsyncIterable = (items: T[]): AsyncIterable => ({ + async *[Symbol.asyncIterator]() { + for (const item of items) { + yield item; + } + }, +}); + +const makeCapturingChatStream = () => { + const chunks: string[] = []; + const sink = new Writable({ + write(chunk, _enc, cb) { + chunks.push(chunk.toString('utf8')); + cb(); + }, + }); + const chatStream = new AIChatStream({ stream: sink }); + return { + chatStream, + events: () => + chunks + .join('') + .split('\n') + .filter(Boolean) + .map((line) => JSON.parse(line)), + }; +}; + +beforeEach(() => { + createMock.mockReset(); + openAICtor.mockReset(); + recordSpy = vi.spyOn(server.services.metering, 'utilRecordUsageObject'); +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +// ── Construction ──────────────────────────────────────────────────── + +describe('AlibabaProvider construction', () => { + it('points the OpenAI SDK at the DashScope base URL with the configured key', () => { + makeProvider(); + expect(openAICtor).toHaveBeenCalledTimes(1); + expect(openAICtor).toHaveBeenCalledWith({ + apiKey: 'test-key', + baseURL: + 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1', + }); + }); + + it('uses a custom base URL when configured', () => { + makeProvider({ apiBaseUrl: 'https://custom.endpoint/v1' }); + expect(openAICtor).toHaveBeenCalledWith({ + apiKey: 'test-key', + baseURL: 'https://custom.endpoint/v1', + }); + }); +}); + +// ── Model catalog ─────────────────────────────────────────────────── + +describe('AlibabaProvider model catalog', () => { + it('returns qwen-plus-latest as the default', () => { + const { provider } = makeProvider(); + expect(provider.getDefaultModel()).toBe('qwen-plus-latest'); + }); + + it('exposes the static ALIBABA_MODELS list verbatim from models()', () => { + const { provider } = makeProvider(); + expect(provider.models()).toBe(ALIBABA_MODELS); + }); + + it('list() flattens canonical ids and aliases', async () => { + const { provider } = makeProvider(); + const ids = await provider.list(); + for (const m of ALIBABA_MODELS) { + expect(ids).toContain(m.id); + for (const a of m.aliases ?? []) { + expect(ids).toContain(a); + } + } + expect(ids).toContain('qwen-plus'); + expect(ids).toContain('qwen/qwen-plus'); + expect(ids).toContain('qwen-max'); + expect(ids).toContain('qwen/qwen-max'); + }); +}); + +// ── Request shape ─────────────────────────────────────────────────── + +describe('AlibabaProvider.complete request shape', () => { + const baseCompletion = { + choices: [ + { + message: { content: 'hi', role: 'assistant' }, + finish_reason: 'stop', + }, + ], + usage: { prompt_tokens: 1, completion_tokens: 1 }, + }; + + it('forwards model + messages and defaults max_tokens to 1000 when caller omits it', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce(baseCompletion); + + await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'hello' }], + }), + ); + + const [args] = createMock.mock.calls[0]!; + expect(args.model).toBe('qwen-plus'); + expect(args.messages).toEqual([{ role: 'user', content: 'hello' }]); + expect(args.max_tokens).toBe(1000); + }); + + it('respects an explicit max_tokens override', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce(baseCompletion); + + await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'hi' }], + max_tokens: 256, + }), + ); + + expect(createMock.mock.calls[0]![0].max_tokens).toBe(256); + }); + + it('forwards temperature when supplied', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce(baseCompletion); + + await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'hi' }], + temperature: 0.7, + }), + ); + + expect(createMock.mock.calls[0]![0].temperature).toBe(0.7); + }); + + it('omits the `tools` key entirely when no tools are supplied', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce(baseCompletion); + + await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'hi' }], + }), + ); + + const [args] = createMock.mock.calls[0]!; + expect('tools' in args).toBe(false); + }); + + it('passes tool definitions through unchanged when supplied', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce(baseCompletion); + + const tools = [ + { + type: 'function', + function: { + name: 'lookup', + parameters: { type: 'object', properties: {} }, + }, + }, + ]; + await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'hi' }], + tools, + }), + ); + + expect(createMock.mock.calls[0]![0].tools).toBe(tools); + }); + + it('only sets stream_options.include_usage when streaming', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce(baseCompletion); + await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'hi' }], + stream: false, + }), + ); + const [nonStreamArgs] = createMock.mock.calls[0]!; + expect(nonStreamArgs.stream).toBe(false); + expect('stream_options' in nonStreamArgs).toBe(false); + + createMock.mockReturnValueOnce(asAsyncIterable([])); + await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'hi' }], + stream: true, + }), + ); + const [streamArgs] = createMock.mock.calls[1]!; + expect(streamArgs.stream).toBe(true); + expect(streamArgs.stream_options).toEqual({ include_usage: true }); + }); + + it('hoists Puter-style tool_use blocks into OpenAI tool_calls before sending', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce(baseCompletion); + + await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [ + { + role: 'assistant', + content: [ + { + type: 'tool_use', + id: 'call_1', + name: 'lookup', + input: { q: 'puter' }, + }, + ], + }, + ], + }), + ); + + const [args] = createMock.mock.calls[0]!; + expect(args.messages[0].content).toBeNull(); + expect(args.messages[0].tool_calls).toEqual([ + { + id: 'call_1', + type: 'function', + function: { + name: 'lookup', + arguments: JSON.stringify({ q: 'puter' }), + }, + }, + ]); + }); +}); + +// ── Model resolution ──────────────────────────────────────────────── + +describe('AlibabaProvider model resolution', () => { + const baseCompletion = { + choices: [ + { + message: { content: 'ok', role: 'assistant' }, + finish_reason: 'stop', + }, + ], + usage: { prompt_tokens: 1, completion_tokens: 1 }, + }; + + it('resolves an exact canonical id', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce(baseCompletion); + + await withTestActor(() => + provider.complete({ + model: 'qwen-max', + messages: [{ role: 'user', content: 'hi' }], + }), + ); + + expect(createMock.mock.calls[0]![0].model).toBe('qwen-max'); + expect(recordSpy).toHaveBeenCalledWith( + expect.any(Object), + expect.anything(), + 'alibaba:qwen-max', + expect.any(Object), + ); + }); + + it('resolves an alias to its canonical id (alias rewriting)', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce(baseCompletion); + + await withTestActor(() => + provider.complete({ + model: 'qwen/qwen-plus', + messages: [{ role: 'user', content: 'hi' }], + }), + ); + + expect(createMock.mock.calls[0]![0].model).toBe('qwen-plus'); + expect(recordSpy).toHaveBeenCalledWith( + expect.any(Object), + expect.anything(), + 'alibaba:qwen-plus', + expect.any(Object), + ); + }); +}); + +// ── Non-stream completion ─────────────────────────────────────────── + +describe('AlibabaProvider.complete non-stream output', () => { + it('returns the first choice and runs the metered usage calculator', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce({ + choices: [ + { + message: { content: 'hi there', role: 'assistant' }, + finish_reason: 'stop', + }, + ], + usage: { prompt_tokens: 100, completion_tokens: 50 }, + }); + + const result = await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'hi' }], + }), + ); + + expect(result).toMatchObject({ + message: { content: 'hi there', role: 'assistant' }, + finish_reason: 'stop', + }); + expect((result as { usage: unknown }).usage).toEqual({ + prompt_tokens: 100, + completion_tokens: 50, + cached_tokens: 0, + }); + + const qwenPlus = ALIBABA_MODELS.find((m) => m.id === 'qwen-plus')!; + expect(recordSpy).toHaveBeenCalledTimes(1); + const [usage, actor, prefix, overrides] = recordSpy.mock.calls[0]!; + expect(usage).toEqual({ + prompt_tokens: 100, + completion_tokens: 50, + cached_tokens: 0, + }); + expect(actor).toBe(SYSTEM_ACTOR); + expect(prefix).toBe('alibaba:qwen-plus'); + expect(overrides).toEqual({ + prompt_tokens: 100 * Number(qwenPlus.costs.prompt_tokens), + completion_tokens: 50 * Number(qwenPlus.costs.completion_tokens), + cached_tokens: 0, + }); + }); + + it('preserves OpenAI-shaped tool_calls on the assistant response', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce({ + choices: [ + { + message: { + role: 'assistant', + content: null, + tool_calls: [ + { + id: 'call_1', + type: 'function', + function: { + name: 'lookup', + arguments: '{"q":"puter"}', + }, + }, + ], + }, + finish_reason: 'tool_calls', + }, + ], + usage: { prompt_tokens: 1, completion_tokens: 1 }, + }); + + const result = (await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'do a tool call' }], + tools: [ + { + type: 'function', + function: { name: 'lookup', parameters: {} }, + }, + ], + }), + )) as { message: { tool_calls?: unknown[] }; finish_reason: string }; + + expect(result.finish_reason).toBe('tool_calls'); + expect(result.message.tool_calls).toEqual([ + { + id: 'call_1', + type: 'function', + function: { name: 'lookup', arguments: '{"q":"puter"}' }, + }, + ]); + }); + + it('zeroes cached_tokens when prompt_tokens_details is missing', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce({ + choices: [ + { + message: { content: 'ok', role: 'assistant' }, + finish_reason: 'stop', + }, + ], + usage: { prompt_tokens: 7, completion_tokens: 3 }, + }); + + await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'hi' }], + }), + ); + + const [usage, , , overrides] = recordSpy.mock.calls[0]!; + expect(usage.cached_tokens).toBe(0); + expect(overrides).toMatchObject({ cached_tokens: 0 }); + }); + + it('accounts for cached_tokens when prompt_tokens_details is present', async () => { + const { provider } = makeProvider(); + createMock.mockResolvedValueOnce({ + choices: [ + { + message: { content: 'ok', role: 'assistant' }, + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 50, + completion_tokens: 20, + prompt_tokens_details: { cached_tokens: 15 }, + }, + }); + + await withTestActor(() => + provider.complete({ + model: 'qwen3.6-max-preview', + messages: [{ role: 'user', content: 'hi' }], + }), + ); + + const model = ALIBABA_MODELS.find( + (m) => m.id === 'qwen3.6-max-preview', + )!; + const [usage, , prefix, overrides] = recordSpy.mock.calls[0]!; + expect(usage).toEqual({ + prompt_tokens: 50, + completion_tokens: 20, + cached_tokens: 15, + }); + expect(prefix).toBe('alibaba:qwen3.6-max-preview'); + expect(overrides).toEqual({ + prompt_tokens: 50 * Number(model.costs.prompt_tokens), + completion_tokens: 20 * Number(model.costs.completion_tokens), + cached_tokens: 15 * Number(model.costs.cached_tokens ?? 0), + }); + }); +}); + +// ── Streaming deltas ──────────────────────────────────────────────── + +describe('AlibabaProvider.complete streaming', () => { + it('streams text deltas through to text events and meters final usage', async () => { + const { provider } = makeProvider(); + createMock.mockReturnValueOnce( + asAsyncIterable([ + { choices: [{ delta: { content: 'hel' } }] }, + { choices: [{ delta: { content: 'lo' } }] }, + { + choices: [{ delta: {} }], + usage: { prompt_tokens: 4, completion_tokens: 2 }, + }, + ]), + ); + + const result = await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'say hi' }], + stream: true, + }), + ); + expect((result as { stream: boolean }).stream).toBe(true); + + const harness = makeCapturingChatStream(); + await ( + result as { + init_chat_stream: (p: { chatStream: unknown }) => Promise; + } + ).init_chat_stream({ chatStream: harness.chatStream }); + + const events = harness.events(); + const textEvents = events.filter((e) => e.type === 'text'); + expect(textEvents.map((e) => e.text)).toEqual(['hel', 'lo']); + + const usageEvent = events.find((e) => e.type === 'usage'); + expect(usageEvent?.usage).toEqual({ + prompt_tokens: 4, + completion_tokens: 2, + cached_tokens: 0, + }); + + const qwenPlus = ALIBABA_MODELS.find((m) => m.id === 'qwen-plus')!; + expect(recordSpy).toHaveBeenCalledTimes(1); + const [, , prefix, overrides] = recordSpy.mock.calls[0]!; + expect(prefix).toBe('alibaba:qwen-plus'); + expect(overrides).toEqual({ + prompt_tokens: 4 * Number(qwenPlus.costs.prompt_tokens), + completion_tokens: 2 * Number(qwenPlus.costs.completion_tokens), + cached_tokens: 0, + }); + }); + + it('builds a tool_use block from streamed function-call deltas', async () => { + const { provider } = makeProvider(); + createMock.mockReturnValueOnce( + asAsyncIterable([ + { + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: 'call_1', + function: { + name: 'lookup', + arguments: '{"q":', + }, + }, + ], + }, + }, + ], + }, + { + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + function: { arguments: '"puter"}' }, + }, + ], + }, + }, + ], + }, + { + choices: [{ delta: {} }], + usage: { prompt_tokens: 1, completion_tokens: 1 }, + }, + ]), + ); + + const result = await withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'do tool call' }], + tools: [ + { + type: 'function', + function: { name: 'lookup', parameters: {} }, + }, + ], + stream: true, + }), + ); + + const harness = makeCapturingChatStream(); + await ( + result as { + init_chat_stream: (p: { chatStream: unknown }) => Promise; + } + ).init_chat_stream({ chatStream: harness.chatStream }); + + const events = harness.events(); + const toolEvent = events.find((e) => e.type === 'tool_use'); + expect(toolEvent).toBeDefined(); + expect(toolEvent?.id).toBe('call_1'); + expect(toolEvent?.name).toBe('lookup'); + expect(toolEvent?.input).toEqual({ q: 'puter' }); + }); +}); + +// ── Error mapping ─────────────────────────────────────────────────── + +describe('AlibabaProvider.complete error mapping', () => { + it('rethrows errors raised by the OpenAI client unchanged', async () => { + const { provider } = makeProvider(); + const apiError = new Error('DashScope exploded'); + createMock.mockRejectedValueOnce(apiError); + + await expect( + withTestActor(() => + provider.complete({ + model: 'qwen-plus', + messages: [{ role: 'user', content: 'boom' }], + }), + ), + ).rejects.toBe(apiError); + + expect(recordSpy).not.toHaveBeenCalled(); + }); +}); + +// ── Moderation ────────────────────────────────────────────────────── + +describe('AlibabaProvider.checkModeration', () => { + it('throws — Alibaba provider does not implement moderation', () => { + const { provider } = makeProvider(); + expect(() => provider.checkModeration('anything')).toThrow( + /not implemented/i, + ); + }); +}); diff --git a/src/backend/drivers/ai-chat/providers/alibaba/AlibabaProvider.ts b/src/backend/drivers/ai-chat/providers/alibaba/AlibabaProvider.ts new file mode 100644 index 000000000..14798ae28 --- /dev/null +++ b/src/backend/drivers/ai-chat/providers/alibaba/AlibabaProvider.ts @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import { OpenAI } from 'openai'; +import { ChatCompletionCreateParams } from 'openai/resources/index.js'; +import { Context } from '../../../../core/context.js'; +import type { MeteringService } from '../../../../services/metering/MeteringService.js'; +import type { IChatProvider, ICompleteArguments } from '../../types.js'; +import * as OpenAIUtil from '../../utils/OpenAIUtil.js'; +import { ALIBABA_MODELS } from './models.js'; + +type AlibabaConfig = { + apiKey: string; + apiBaseUrl?: string; +}; + +export class AlibabaProvider implements IChatProvider { + #openai: OpenAI; + + #meteringService: MeteringService; + + constructor(config: AlibabaConfig, meteringService: MeteringService) { + this.#openai = new OpenAI({ + apiKey: config.apiKey, + baseURL: + config.apiBaseUrl ?? + 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1', + }); + this.#meteringService = meteringService; + } + + getDefaultModel() { + return 'qwen-plus-latest'; + } + + models() { + return ALIBABA_MODELS; + } + + async list() { + const models = this.models(); + const modelNames: string[] = []; + for (const model of models) { + modelNames.push(model.id); + if (model.aliases) { + modelNames.push(...model.aliases); + } + } + return modelNames; + } + + async complete({ + messages, + stream, + model, + tools, + max_tokens, + temperature, + }: ICompleteArguments): ReturnType { + const actor = Context.get('actor'); + const availableModels = this.models(); + const modelUsed = + availableModels.find((m) => + [m.id, ...(m.aliases || [])].includes(model), + ) || availableModels.find((m) => m.id === this.getDefaultModel())!; + + messages = await OpenAIUtil.process_input_messages(messages); + + const completion = await this.#openai.chat.completions.create({ + messages, + model: modelUsed.id, + ...(tools ? { tools } : {}), + max_tokens: max_tokens || 1000, + temperature, + stream, + ...(stream + ? { + stream_options: { include_usage: true }, + } + : {}), + } as ChatCompletionCreateParams); + + return OpenAIUtil.handle_completion_output({ + usage_calculator: ({ usage }) => { + const trackedUsage = OpenAIUtil.extractMeteredUsage(usage); + const costsOverride = Object.fromEntries( + Object.entries(trackedUsage).map(([k, v]) => { + return [k, v * modelUsed.costs[k]]; + }), + ); + this.#meteringService.utilRecordUsageObject( + trackedUsage, + actor!, + `alibaba:${modelUsed.id}`, + costsOverride, + ); + return trackedUsage; + }, + stream, + completion, + }); + } + + checkModeration(_text: string) { + throw new Error('Method not implemented.'); + } +} diff --git a/src/backend/drivers/ai-chat/providers/alibaba/models.ts b/src/backend/drivers/ai-chat/providers/alibaba/models.ts new file mode 100644 index 000000000..5b0159f8b --- /dev/null +++ b/src/backend/drivers/ai-chat/providers/alibaba/models.ts @@ -0,0 +1,1014 @@ +/** + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import type { IChatModel } from '../../types.js'; + +// Hardcoded from https://models.dev/api.json +export const ALIBABA_MODELS: IChatModel[] = [ + // ── Commercial flagship ──────────────────────────────────────── + { + puterId: 'alibaba:qwen/qwen-max', + id: 'qwen-max', + name: 'Qwen Max', + modalities: { input: ['text'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-04-03', + aliases: ['qwen/qwen-max'], + context: 32_768, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 160, + completion_tokens: 640, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-max', + id: 'qwen3-max', + name: 'Qwen3 Max', + modalities: { input: ['text'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-09-23', + aliases: ['qwen/qwen3-max'], + context: 262_144, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 120, + completion_tokens: 600, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3.6-max-preview', + id: 'qwen3.6-max-preview', + name: 'Qwen3.6 Max Preview', + modalities: { input: ['text'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2025-04', + release_date: '2026-04-20', + aliases: ['qwen/qwen3.6-max-preview'], + context: 262_144, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 130, + completion_tokens: 780, + cached_tokens: 13, + }, + }, + + // ── Plus tier ────────────────────────────────────────────────── + { + puterId: 'alibaba:qwen/qwen-plus', + id: 'qwen-plus', + name: 'Qwen Plus', + modalities: { input: ['text'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-01-25', + aliases: ['qwen/qwen-plus'], + context: 1_000_000, + max_tokens: 32_768, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 40, + completion_tokens: 120, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3.5-plus', + id: 'qwen3.5-plus', + name: 'Qwen3.5 Plus', + modalities: { input: ['text', 'image', 'video'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2025-04', + release_date: '2026-02-16', + aliases: ['qwen/qwen3.5-plus'], + context: 1_000_000, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 40, + completion_tokens: 240, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3.6-plus', + id: 'qwen3.6-plus', + name: 'Qwen3.6 Plus', + modalities: { input: ['text', 'image', 'video'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2025-04', + release_date: '2026-04-02', + aliases: ['qwen/qwen3.6-plus'], + context: 1_000_000, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 50, + completion_tokens: 300, + cached_tokens: 5, + }, + }, + + // ── Turbo / Flash tier ───────────────────────────────────────── + { + puterId: 'alibaba:qwen/qwen-turbo', + id: 'qwen-turbo', + name: 'Qwen Turbo', + modalities: { input: ['text'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-11-01', + aliases: ['qwen/qwen-turbo'], + context: 1_000_000, + max_tokens: 16_384, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 5, + completion_tokens: 20, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen-flash', + id: 'qwen-flash', + name: 'Qwen Flash', + modalities: { input: ['text'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2024-04', + release_date: '2025-07-28', + aliases: ['qwen/qwen-flash'], + context: 1_000_000, + max_tokens: 32_768, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 5, + completion_tokens: 40, + cached_tokens: 0, + }, + }, + + // ── Coding models ────────────────────────────────────────────── + { + puterId: 'alibaba:qwen/qwen3-coder-plus', + id: 'qwen3-coder-plus', + name: 'Qwen3 Coder Plus', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-07-23', + aliases: ['qwen/qwen3-coder-plus'], + context: 1_048_576, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 100, + completion_tokens: 500, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-coder-flash', + id: 'qwen3-coder-flash', + name: 'Qwen3 Coder Flash', + modalities: { input: ['text'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-07-28', + aliases: ['qwen/qwen3-coder-flash'], + context: 1_000_000, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 30, + completion_tokens: 150, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-coder-480b-a35b-instruct', + id: 'qwen3-coder-480b-a35b-instruct', + name: 'Qwen3-Coder 480B-A35B Instruct', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-04', + aliases: ['qwen/qwen3-coder-480b-a35b-instruct'], + context: 262_144, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 150, + completion_tokens: 750, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-coder-30b-a3b-instruct', + id: 'qwen3-coder-30b-a3b-instruct', + name: 'Qwen3-Coder 30B-A3B Instruct', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-04', + aliases: ['qwen/qwen3-coder-30b-a3b-instruct'], + context: 262_144, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 45, + completion_tokens: 225, + cached_tokens: 0, + }, + }, + + // ── Reasoning models ─────────────────────────────────────────── + { + puterId: 'alibaba:qwen/qwq-plus', + id: 'qwq-plus', + name: 'QwQ Plus', + modalities: { input: ['text'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2024-04', + release_date: '2025-03-05', + aliases: ['qwen/qwq-plus'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 80, + completion_tokens: 240, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-next-80b-a3b-thinking', + id: 'qwen3-next-80b-a3b-thinking', + name: 'Qwen3-Next 80B-A3B (Thinking)', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-09', + aliases: ['qwen/qwen3-next-80b-a3b-thinking'], + context: 131_072, + max_tokens: 32_768, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 50, + completion_tokens: 600, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-next-80b-a3b-instruct', + id: 'qwen3-next-80b-a3b-instruct', + name: 'Qwen3-Next 80B-A3B Instruct', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-09', + aliases: ['qwen/qwen3-next-80b-a3b-instruct'], + context: 131_072, + max_tokens: 32_768, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 50, + completion_tokens: 200, + cached_tokens: 0, + }, + }, + + // ── Open-weight Qwen3 ────────────────────────────────────────── + { + puterId: 'alibaba:qwen/qwen3-235b-a22b', + id: 'qwen3-235b-a22b', + name: 'Qwen3 235B-A22B', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-04', + aliases: ['qwen/qwen3-235b-a22b'], + context: 131_072, + max_tokens: 16_384, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 70, + completion_tokens: 280, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-32b', + id: 'qwen3-32b', + name: 'Qwen3 32B', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-04', + aliases: ['qwen/qwen3-32b'], + context: 131_072, + max_tokens: 16_384, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 70, + completion_tokens: 280, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-14b', + id: 'qwen3-14b', + name: 'Qwen3 14B', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-04', + aliases: ['qwen/qwen3-14b'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 35, + completion_tokens: 140, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-8b', + id: 'qwen3-8b', + name: 'Qwen3 8B', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-04', + aliases: ['qwen/qwen3-8b'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 18, + completion_tokens: 70, + cached_tokens: 0, + }, + }, + + // ── Open-weight Qwen3.5 ──────────────────────────────────────── + { + puterId: 'alibaba:qwen/qwen3.5-397b-a17b', + id: 'qwen3.5-397b-a17b', + name: 'Qwen3.5 397B-A17B', + modalities: { + input: ['text', 'image', 'video', 'audio'], + output: ['text'], + }, + open_weights: true, + tool_call: true, + release_date: '2026-02-15', + aliases: ['qwen/qwen3.5-397b-a17b'], + context: 262_144, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 60, + completion_tokens: 360, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3.5-122b-a10b', + id: 'qwen3.5-122b-a10b', + name: 'Qwen3.5 122B-A10B', + modalities: { + input: ['text', 'image', 'video', 'audio'], + output: ['text'], + }, + open_weights: true, + tool_call: true, + release_date: '2026-02-23', + aliases: ['qwen/qwen3.5-122b-a10b'], + context: 262_144, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 40, + completion_tokens: 320, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3.5-35b-a3b', + id: 'qwen3.5-35b-a3b', + name: 'Qwen3.5 35B-A3B', + modalities: { + input: ['text', 'image', 'video', 'audio'], + output: ['text'], + }, + open_weights: true, + tool_call: true, + release_date: '2026-02-23', + aliases: ['qwen/qwen3.5-35b-a3b'], + context: 262_144, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 25, + completion_tokens: 200, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3.5-27b', + id: 'qwen3.5-27b', + name: 'Qwen3.5 27B', + modalities: { + input: ['text', 'image', 'video', 'audio'], + output: ['text'], + }, + open_weights: true, + tool_call: true, + release_date: '2026-02-23', + aliases: ['qwen/qwen3.5-27b'], + context: 262_144, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 30, + completion_tokens: 240, + cached_tokens: 0, + }, + }, + + // ── Open-weight Qwen3.6 ──────────────────────────────────────── + { + puterId: 'alibaba:qwen/qwen3.6-35b-a3b', + id: 'qwen3.6-35b-a3b', + name: 'Qwen3.6 35B-A3B', + modalities: { + input: ['text', 'image', 'video', 'audio'], + output: ['text'], + }, + open_weights: true, + tool_call: true, + release_date: '2026-04-17', + aliases: ['qwen/qwen3.6-35b-a3b'], + context: 262_144, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 24.8, + completion_tokens: 148.5, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3.6-27b', + id: 'qwen3.6-27b', + name: 'Qwen3.6 27B', + modalities: { + input: ['text', 'image', 'video', 'audio'], + output: ['text'], + }, + open_weights: true, + tool_call: true, + release_date: '2026-04-22', + aliases: ['qwen/qwen3.6-27b'], + context: 262_144, + max_tokens: 65_536, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 60, + completion_tokens: 360, + cached_tokens: 0, + }, + }, + + // ── Vision models ────────────────────────────────────────────── + { + puterId: 'alibaba:qwen/qwen-vl-max', + id: 'qwen-vl-max', + name: 'Qwen-VL Max', + modalities: { input: ['text', 'image'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-04-08', + aliases: ['qwen/qwen-vl-max'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 80, + completion_tokens: 320, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen-vl-plus', + id: 'qwen-vl-plus', + name: 'Qwen-VL Plus', + modalities: { input: ['text', 'image'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-01-25', + aliases: ['qwen/qwen-vl-plus'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 21, + completion_tokens: 63, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen-vl-ocr', + id: 'qwen-vl-ocr', + name: 'Qwen-VL OCR', + modalities: { input: ['text', 'image'], output: ['text'] }, + open_weights: false, + tool_call: false, + knowledge: '2024-04', + release_date: '2024-10-28', + aliases: ['qwen/qwen-vl-ocr'], + context: 34_096, + max_tokens: 4_096, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 72, + completion_tokens: 72, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-vl-235b-a22b', + id: 'qwen3-vl-235b-a22b', + name: 'Qwen3-VL 235B-A22B', + modalities: { input: ['text', 'image'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-04', + aliases: ['qwen/qwen3-vl-235b-a22b'], + context: 131_072, + max_tokens: 32_768, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 70, + completion_tokens: 280, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-vl-plus', + id: 'qwen3-vl-plus', + name: 'Qwen3-VL Plus', + modalities: { input: ['text', 'image'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-09-23', + aliases: ['qwen/qwen3-vl-plus'], + context: 262_144, + max_tokens: 32_768, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 20, + completion_tokens: 160, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-vl-30b-a3b', + id: 'qwen3-vl-30b-a3b', + name: 'Qwen3-VL 30B-A3B', + modalities: { input: ['text', 'image'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2025-04', + release_date: '2025-04', + aliases: ['qwen/qwen3-vl-30b-a3b'], + context: 131_072, + max_tokens: 32_768, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 20, + completion_tokens: 80, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qvq-max', + id: 'qvq-max', + name: 'QVQ Max', + modalities: { input: ['text', 'image'], output: ['text'] }, + open_weights: false, + tool_call: true, + knowledge: '2024-04', + release_date: '2025-03-25', + aliases: ['qwen/qvq-max'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 120, + completion_tokens: 480, + cached_tokens: 0, + }, + }, + + // ── Legacy Qwen2.5 ──────────────────────────────────────────── + { + puterId: 'alibaba:qwen/qwen2-5-72b-instruct', + id: 'qwen2-5-72b-instruct', + name: 'Qwen2.5 72B Instruct', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-09', + aliases: ['qwen/qwen2-5-72b-instruct'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 140, + completion_tokens: 560, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen2-5-32b-instruct', + id: 'qwen2-5-32b-instruct', + name: 'Qwen2.5 32B Instruct', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-09', + aliases: ['qwen/qwen2-5-32b-instruct'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 70, + completion_tokens: 280, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen2-5-14b-instruct', + id: 'qwen2-5-14b-instruct', + name: 'Qwen2.5 14B Instruct', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-09', + aliases: ['qwen/qwen2-5-14b-instruct'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 35, + completion_tokens: 140, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen2-5-7b-instruct', + id: 'qwen2-5-7b-instruct', + name: 'Qwen2.5 7B Instruct', + modalities: { input: ['text'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-09', + aliases: ['qwen/qwen2-5-7b-instruct'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 17.5, + completion_tokens: 70, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen2-5-vl-72b-instruct', + id: 'qwen2-5-vl-72b-instruct', + name: 'Qwen2.5-VL 72B Instruct', + modalities: { input: ['text', 'image'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-09', + aliases: ['qwen/qwen2-5-vl-72b-instruct'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 280, + completion_tokens: 840, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen2-5-vl-7b-instruct', + id: 'qwen2-5-vl-7b-instruct', + name: 'Qwen2.5-VL 7B Instruct', + modalities: { input: ['text', 'image'], output: ['text'] }, + open_weights: true, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-09', + aliases: ['qwen/qwen2-5-vl-7b-instruct'], + context: 131_072, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 35, + completion_tokens: 105, + cached_tokens: 0, + }, + }, + + // ── Omni models (text interface, audio costs excluded) ───────── + { + puterId: 'alibaba:qwen/qwen-omni-turbo', + id: 'qwen-omni-turbo', + name: 'Qwen-Omni Turbo', + modalities: { + input: ['text', 'image', 'audio', 'video'], + output: ['text'], + }, + open_weights: false, + tool_call: true, + knowledge: '2024-04', + release_date: '2025-01-19', + aliases: ['qwen/qwen-omni-turbo'], + context: 32_768, + max_tokens: 2_048, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 7, + completion_tokens: 27, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen3-omni-flash', + id: 'qwen3-omni-flash', + name: 'Qwen3-Omni Flash', + modalities: { + input: ['text', 'image', 'audio', 'video'], + output: ['text'], + }, + open_weights: false, + tool_call: true, + knowledge: '2024-04', + release_date: '2025-09-15', + aliases: ['qwen/qwen3-omni-flash'], + context: 65_536, + max_tokens: 16_384, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 43, + completion_tokens: 166, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen2-5-omni-7b', + id: 'qwen2-5-omni-7b', + name: 'Qwen2.5-Omni 7B', + modalities: { + input: ['text', 'image', 'audio', 'video'], + output: ['text'], + }, + open_weights: true, + tool_call: true, + knowledge: '2024-04', + release_date: '2024-12', + aliases: ['qwen/qwen2-5-omni-7b'], + context: 32_768, + max_tokens: 2_048, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 10, + completion_tokens: 40, + cached_tokens: 0, + }, + }, + + // ── Translation models ───────────────────────────────────────── + { + puterId: 'alibaba:qwen/qwen-mt-plus', + id: 'qwen-mt-plus', + name: 'Qwen-MT Plus', + modalities: { input: ['text'], output: ['text'] }, + open_weights: false, + tool_call: false, + knowledge: '2024-04', + release_date: '2025-01', + aliases: ['qwen/qwen-mt-plus'], + context: 16_384, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 246, + completion_tokens: 737, + cached_tokens: 0, + }, + }, + { + puterId: 'alibaba:qwen/qwen-mt-turbo', + id: 'qwen-mt-turbo', + name: 'Qwen-MT Turbo', + modalities: { input: ['text'], output: ['text'] }, + open_weights: false, + tool_call: false, + knowledge: '2024-04', + release_date: '2025-01', + aliases: ['qwen/qwen-mt-turbo'], + context: 16_384, + max_tokens: 8_192, + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 16, + completion_tokens: 49, + cached_tokens: 0, + }, + }, +];