From 7b5d90d678dbe64564d46a3745bd33ad02ddfe92 Mon Sep 17 00:00:00 2001 From: Reynaldi Chernando <12949382+reynaldichernando@users.noreply.github.com> Date: Tue, 9 Dec 2025 05:06:36 +0700 Subject: [PATCH] Fix AI types (#2100) * Remove constructor and global methods * Fix streaming type and stricter type * Add missing chat options param * Fix messages type * Fix img2txt types * Fix txt2img types * remove unused * Fix test mode in txt2img * Fix txt2vid types * Fix speech2txt types * Fix speech2speech types * Fix txt2speech types * Add missing chat types * Fix missing txt2speech types --- src/puter-js/types/modules/ai.d.ts | 141 ++++++++++++++++++++++------- 1 file changed, 107 insertions(+), 34 deletions(-) diff --git a/src/puter-js/types/modules/ai.d.ts b/src/puter-js/types/modules/ai.d.ts index d70245feb..71c736dea 100644 --- a/src/puter-js/types/modules/ai.d.ts +++ b/src/puter-js/types/modules/ai.d.ts @@ -3,34 +3,53 @@ export type AIMessageContent = string | { image_url?: { url: string } } | Record export interface ChatMessage { role?: string; content: AIMessageContent | AIMessageContent[]; - [key: string]: unknown; + tool_calls?: ToolCall[]; +} + +export interface ToolCall { + id: string; + function: { name: string, arguments: string }; } export interface ChatOptions { model?: string; temperature?: number; max_tokens?: number; - stream?: boolean; vision?: boolean; - [key: string]: unknown; + driver?: string; + tools?: unknown; + response?: unknown; + reasoning?: unknown; + reasoning_effort?: string; + text?: unknown; + verbosity?: unknown; +} + +export interface StreamingChatOptions extends ChatOptions { + stream: boolean; } export interface ChatResponse { message?: ChatMessage; choices?: unknown; - [key: string]: unknown; } export interface ChatResponseChunk { text?: string; - [key: string]: unknown; + reasoning?: string; } export interface Img2TxtOptions { source?: string | File | Blob; provider?: string; testMode?: boolean; - [key: string]: unknown; + model?: string; + pages?: number[]; + includeImageBase64?: boolean; + imageLimit?: number; + imageMinSize?: number; + bboxAnnotationFormat?: string; + documentAnnotationFormat?: string; } export interface Txt2ImgOptions { @@ -39,18 +58,38 @@ export interface Txt2ImgOptions { quality?: string; input_image?: string; input_image_mime_type?: string; - [key: string]: unknown; + driver?: string; + provider?: string; + service?: string; + ratio?: { w: number; h: number }; + width?: number; + height?: number; + aspect_ratio?: string; + steps?: number; + seed?: number; + negative_prompt?: string; + n?: number; + image_url?: string; + image_base64?: string; + mask_image_url?: string; + mask_image_base64?: string; + prompt_strength?: number; + disable_safety_checker?: boolean; + response_format?: string; } export interface Txt2VidOptions { prompt?: string; model?: string; duration?: number; + seconds?: number; width?: number; height?: number; fps?: number; steps?: number; - [key: string]: unknown; + driver?: string; + provider?: string; + service?: string; } export interface Txt2SpeechOptions { @@ -61,14 +100,9 @@ export interface Txt2SpeechOptions { provider?: string; model?: string; response_format?: string; - [key: string]: unknown; -} - -export interface Txt2SpeechCallable { - (text: string, options?: Txt2SpeechOptions): Promise; - (text: string, language?: string, voice?: string, engine?: string): Promise; - listEngines: (options?: string | Record) => Promise; - listVoices: (options?: string | Record) => Promise; + output_format?: string; + instructions?: string; + voice_settings?: Record; } export interface Speech2TxtOptions { @@ -80,7 +114,13 @@ export interface Speech2TxtOptions { prompt?: string; stream?: boolean; translate?: boolean; - [key: string]: unknown; + temperature?: number; + logprobs?: boolean; + timestamp_granularities?: string[]; + chunking_strategy?: string; + known_speaker_names?: string[]; + known_speaker_references?: string[]; + extra_body?: Record; } export interface Speech2SpeechOptions { @@ -88,36 +128,69 @@ export interface Speech2SpeechOptions { file?: string | File | Blob; provider?: string; model?: string; + modelId?: string; + model_id?: string; voice?: string; - [key: string]: unknown; + voiceId?: string; + voice_id?: string; + output_format?: string; + outputFormat?: string; + voice_settings?: Record; + voiceSettings?: Record; + file_format?: string; + fileFormat?: string; + remove_background_noise?: boolean; + removeBackgroundNoise?: boolean; + optimize_streaming_latency?: number; + optimizeStreamingLatency?: number; + enable_logging?: boolean; + enableLogging?: boolean; } export class AI { - constructor (context: { authToken?: string; APIOrigin: string; appID?: string }); - - setAuthToken (authToken: string): void; - setAPIOrigin (APIOrigin: string): void; - listModels (provider?: string): Promise[]>; listModelProviders (): Promise; - chat (prompt: string, options?: ChatOptions): Promise; - chat (prompt: string, imageURL: string | File, options?: ChatOptions): Promise; - chat (messages: ChatMessage[], options?: ChatOptions): Promise; - chat (prompt: string, options: ChatOptions & { stream: true }): AsyncIterable; - chat (prompt: string, imageURL: string | File, options: ChatOptions & { stream: true }): AsyncIterable; - chat (messages: ChatMessage[], options: ChatOptions & { stream: true }): AsyncIterable; - chat (...args: unknown[]): Promise | AsyncIterable; + chat (prompt: string, testMode?: boolean): Promise; + chat (prompt: string, options: ChatOptions, testMode?: boolean): Promise; + chat (prompt: string, imageURL: string | File, testMode?: boolean): Promise; + chat (prompt: string, imageURLArray: string[], testMode?: boolean): Promise; + chat (prompt: string, imageURL: string | File, options: ChatOptions, testMode?: boolean): Promise; + chat (prompt: string, imageURLArray: string[], options: ChatOptions, testMode?: boolean): Promise; + + chat (prompt: string, options: StreamingChatOptions, testMode?: boolean): AsyncIterable; + chat (prompt: string, imageURL: string | File, options: StreamingChatOptions, testMode?: boolean): AsyncIterable; + chat (prompt: string, imageURLArray: string[], options: StreamingChatOptions, testMode?: boolean): AsyncIterable; + + chat (messages: ChatMessage[], testMode?: boolean): Promise; + chat (messages: ChatMessage[], options: ChatOptions, testMode?: boolean): Promise; + chat (messages: ChatMessage[], options: StreamingChatOptions, testMode?: boolean): AsyncIterable; + + img2txt (source: string | File | Blob, testMode?: boolean): Promise; + img2txt (source: string | File | Blob, options: Img2TxtOptions, testMode?: boolean): Promise; + img2txt (options: Img2TxtOptions, testMode?: boolean): Promise; - img2txt (source: string | File | Blob | Img2TxtOptions, testMode?: boolean): Promise; txt2img (prompt: string, testMode?: boolean): Promise; txt2img (prompt: string, options: Txt2ImgOptions): Promise; + txt2img (options: Txt2ImgOptions, testMode?: boolean): Promise; + txt2vid (prompt: string, testMode?: boolean): Promise; txt2vid (prompt: string, options: Txt2VidOptions): Promise; - speech2txt (source: string | File | Blob | Speech2TxtOptions, options?: Speech2TxtOptions): Promise>; - speech2speech (source: string | File | Blob | Speech2SpeechOptions, options?: Speech2SpeechOptions): Promise>; + txt2vid (options: Txt2VidOptions, testMode?: boolean): Promise; - txt2speech: Txt2SpeechCallable; + speech2txt (source: string | File | Blob, testMode?: boolean): Promise>; + speech2txt (source: string | File | Blob, options: Speech2TxtOptions, testMode?: boolean): Promise>; + speech2txt (options: Speech2TxtOptions, testMode?: boolean): Promise>; + + speech2speech (source: string | File | Blob, testMode?: boolean): Promise; + speech2speech (source: string | File | Blob, options: Speech2SpeechOptions, testMode?: boolean): Promise; + speech2speech (options: Speech2SpeechOptions, testMode?: boolean): Promise; + + txt2speech (text: string, testMode?: boolean): Promise; + txt2speech (text: string, options: Txt2SpeechOptions, testMode?: boolean): Promise; + txt2speech (text: string, language: string, testMode?: boolean): Promise; + txt2speech (text: string, language: string, voice: string, testMode?: boolean): Promise; + txt2speech (text: string, language: string, voice: string, engine: string, testMode?: boolean): Promise; } // NOTE: AI responses contain provider-specific payloads that are not fully typed here because