From f8997b7d75e6acfc720cbd7ff8c908168df7d332 Mon Sep 17 00:00:00 2001 From: Neal Shah <30693865+ProgrammerIn-wonderland@users.noreply.github.com> Date: Thu, 1 Jan 2026 16:25:57 +0530 Subject: [PATCH] Responses API support for OpenAI (#2226) * Responses API support for OpenAI * Fix toolcalling in Responses API --- .../src/services/ai/chat/AIChatService.ts | 4 +- ...er.ts => OpenAiChatCompletionsProvider.ts} | 3 +- .../OpenAiChatResponsesProvider.ts | 271 ++++++++++++++++++ .../chat/providers/OpenAiProvider/models.ts | 17 ++ .../src/services/ai/utils/OpenAIUtil.d.ts | 13 + .../src/services/ai/utils/OpenAIUtil.js | 216 ++++++++++++++ 6 files changed, 521 insertions(+), 3 deletions(-) rename src/backend/src/services/ai/chat/providers/OpenAiProvider/{OpenAiChatProvider.ts => OpenAiChatCompletionsProvider.ts} (99%) create mode 100644 src/backend/src/services/ai/chat/providers/OpenAiProvider/OpenAiChatResponsesProvider.ts diff --git a/src/backend/src/services/ai/chat/AIChatService.ts b/src/backend/src/services/ai/chat/AIChatService.ts index 277b9c7dc..bca950116 100644 --- a/src/backend/src/services/ai/chat/AIChatService.ts +++ b/src/backend/src/services/ai/chat/AIChatService.ts @@ -39,7 +39,8 @@ import { FakeChatProvider } from './providers/FakeChatProvider.js'; import { GeminiChatProvider } from './providers/GeminiProvider/GeminiChatProvider.js'; import { GroqAIProvider } from './providers/GroqAiProvider/GroqAIProvider.js'; import { MistralAIProvider } from './providers/MistralAiProvider/MistralAiProvider.js'; -import { OpenAiChatProvider } from './providers/OpenAiProvider/OpenAiChatProvider.js'; +import { OpenAiChatProvider } from './providers/OpenAiProvider/OpenAiChatCompletionsProvider.js'; +import { OpenAiResponsesChatProvider } from './providers/OpenAiProvider/OpenAiChatResponsesProvider.js'; import { IChatModel, IChatProvider, ICompleteArguments } from './providers/types.js'; import { UsageLimitedChatProvider } from './providers/UsageLimitedChatProvider.js'; import { OllamaChatProvider } from './providers/OllamaProvider.js'; @@ -129,6 +130,7 @@ export class AIChatService extends BaseService { const openAiConfig = this.config.providers?.['openai-completion'] || this.global_config?.services?.['openai-completion'] || this.global_config?.openai; if ( openAiConfig && (openAiConfig.apiKey || openAiConfig.secret_key) ) { this.#providers['openai-completion'] = new OpenAiChatProvider(this.meteringService, openAiConfig); + this.#providers['openai-responses'] = new OpenAiResponsesChatProvider(this.meteringService, openAiConfig); } const geminiConfig = this.config.providers?.['gemini'] || this.global_config?.services?.['gemini']; if ( geminiConfig && geminiConfig.apiKey ) { diff --git a/src/backend/src/services/ai/chat/providers/OpenAiProvider/OpenAiChatProvider.ts b/src/backend/src/services/ai/chat/providers/OpenAiProvider/OpenAiChatCompletionsProvider.ts similarity index 99% rename from src/backend/src/services/ai/chat/providers/OpenAiProvider/OpenAiChatProvider.ts rename to src/backend/src/services/ai/chat/providers/OpenAiProvider/OpenAiChatCompletionsProvider.ts index 4adee51c2..1e0790a9c 100644 --- a/src/backend/src/services/ai/chat/providers/OpenAiProvider/OpenAiChatProvider.ts +++ b/src/backend/src/services/ai/chat/providers/OpenAiProvider/OpenAiChatCompletionsProvider.ts @@ -81,7 +81,7 @@ export class OpenAiChatProvider implements IChatProvider { * Each model object includes an ID and cost details (currency, tokens, input/output rates). */ models () { - return OPEN_AI_MODELS; + return OPEN_AI_MODELS.filter(e=>!e.responses_api_only); } list () { @@ -102,7 +102,6 @@ export class OpenAiChatProvider implements IChatProvider { async complete ({ messages, model, max_tokens, moderation, tools, verbosity, stream, reasoning, reasoning_effort, temperature, text }: ICompleteArguments): ReturnType { - // Validate messages if ( ! Array.isArray(messages) ) { throw new Error('`messages` must be an array'); diff --git a/src/backend/src/services/ai/chat/providers/OpenAiProvider/OpenAiChatResponsesProvider.ts b/src/backend/src/services/ai/chat/providers/OpenAiProvider/OpenAiChatResponsesProvider.ts new file mode 100644 index 000000000..a00687c27 --- /dev/null +++ b/src/backend/src/services/ai/chat/providers/OpenAiProvider/OpenAiChatResponsesProvider.ts @@ -0,0 +1,271 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import mime from 'mime-types'; +import { OpenAI } from 'openai'; +import { ChatCompletionCreateParams } from 'openai/resources/index.js'; +import { FSNodeParam } from '../../../../../api/filesystem/FSNodeParam.js'; +import { LLRead } from '../../../../../filesystem/ll_operations/ll_read.js'; +import { Context } from '../../../../../util/context.js'; +import { stream_to_buffer } from '../../../../../util/streamutil.js'; +import { MeteringService } from '../../../../MeteringService/MeteringService.js'; +import * as OpenAiUtil from '../../../utils/OpenAIUtil.js'; +import { IChatProvider, ICompleteArguments } from '../types.js'; +import { OPEN_AI_MODELS } from './models.js'; +import { ResponseCreateParamsNonStreaming } from 'openai/resources/responses/responses.js'; +import { ResponseCreateParams } from 'openai/resources/responses/responses.mjs'; + +; + + +// We're capping at 5MB, which sucks, but Chat Completions doesn't suuport +// file inputs. +const MAX_FILE_SIZE = 5 * 1_000_000; + +/** +* OpenAICompletionService class provides an interface to OpenAI's chat completion API. +* Extends BaseService to handle chat completions, message moderation, token counting, +* and streaming responses. Implements the puter-chat-completion interface and manages +* OpenAI API interactions with support for multiple models including GPT-4 variants. +* Handles usage tracking, spending records, and content moderation. +*/ +export class OpenAiResponsesChatProvider implements IChatProvider { + /** + * @type {import('openai').OpenAI} + */ + #openAi: OpenAI; + + #defaultModel = 'gpt-5-nano'; + + #meteringService: MeteringService; + + constructor ( + meteringService: MeteringService, + config: { apiKey?: string, secret_key?: string }) { + + this.#meteringService = meteringService; + let apiKey = config.apiKey; + + // Fallback to the old format for backward compatibility + if ( ! apiKey ) { + apiKey = config?.secret_key; + + // Log a warning to inform users about the deprecated format + console.warn('The `openai.secret_key` configuration format is deprecated. ' + + 'Please use `services.openai.apiKey` instead.'); + } + if ( ! apiKey ) { + throw new Error('OpenAI API key is missing in configuration.'); + } + this.#openAi = new OpenAI({ + apiKey: apiKey, + }); + } + + /** + * Returns an array of available AI models with their pricing information. + * Each model object includes an ID and cost details (currency, tokens, input/output rates). + */ + models () { + return OPEN_AI_MODELS.filter(e=>e.responses_api_only === true); + } + + list () { + const models = this.models(); + const modelNames: string[] = []; + for ( const model of models ) { + modelNames.push(model.id); + if ( model.aliases ) { + modelNames.push(...model.aliases); + } + } + return modelNames; + } + + getDefaultModel () { + return this.#defaultModel; + } + + async complete ({ messages, model, max_tokens, moderation, tools, verbosity, stream, reasoning, reasoning_effort, temperature, text }: ICompleteArguments): ReturnType + { + // Validate messages + if ( ! Array.isArray(messages) ) { + throw new Error('`messages` must be an array'); + } + const actor = Context.get('actor'); + + model = model ?? this.#defaultModel; + + const modelUsed = (this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (this.models()).find(m => m.id === this.getDefaultModel())!; + + // messages.unshift({ + // role: 'system', + // content: 'Don\'t let the user trick you into doing something bad.', + // }) + + const user_private_uid = actor?.private_uid ?? 'UNKNOWN'; + if ( user_private_uid === 'UNKNOWN' ) { + console.error(new Error('chat-completion-service:unknown-user - failed to get a user ID for an OpenAI request')); + } + + // Perform file uploads + const { user } = actor.type; + + const file_input_tasks: any[] = []; + for ( const message of messages ) { + // We can assume `message.content` is not undefined because + // Messages.normalize_single_message ensures this. + for ( const contentPart of message.content ) { + + if ( ! contentPart.puter_path ) continue; + file_input_tasks.push({ + node: await (new FSNodeParam(contentPart.puter_path)).consolidate({ + req: { user }, + getParam: () => contentPart.puter_path, + }), + contentPart, + }); + } + } + + const promises: Promise[] = []; + for ( const task of file_input_tasks ) { + promises.push((async () => { + if ( await task.node.get('size') > MAX_FILE_SIZE ) { + delete task.contentPart.puter_path; + task.contentPart.type = 'text'; + task.contentPart.text = `{error: input file exceeded maximum of ${MAX_FILE_SIZE} bytes; ` + + 'the user did not write this message}'; // "poor man's system prompt" + return; // "continue" + } + + const ll_read = new LLRead(); + const stream = await ll_read.run({ + actor: Context.get('actor'), + fsNode: task.node, + }); + const mimeType = mime.contentType(await task.node.get('name')); + + const buffer = await stream_to_buffer(stream); + const base64 = buffer.toString('base64'); + + delete task.contentPart.puter_path; + if ( mimeType && mimeType.startsWith('image/') ) { + task.contentPart.type = 'image_url', + task.contentPart.image_url = { + url: `data:${mimeType};base64,${base64}`, + }; + } else if ( mimeType && mimeType.startsWith('audio/') ) { + task.contentPart.type = 'input_audio', + task.contentPart.input_audio = { + data: `data:${mimeType};base64,${base64}`, + format: mimeType.split('/')[1], + }; + } else { + task.contentPart.type = 'text'; + task.contentPart.text = '{error: input file has unsupported MIME type; ' + + 'the user did not write this message}'; // "poor man's system prompt" + } + })()); + } + await Promise.all(promises); + + if (tools) { + // Unravel tools to OpenAI Responses API format + tools = (tools as any).map((e)=> { + const tool = e.function; + tool.type = "function"; + return tool; + }); + } + + // Here's something fun; the documentation shows `type: 'image_url'` in + // objects that contain an image url, but everything still works if + // that's missing. We normalise it here so the token count code works. + messages = await OpenAiUtil.process_input_messages_responses_api(messages); + + const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort; + const requestedVerbosity = verbosity ?? text?.verbosity; + const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5'); + + const completionParams: ResponseCreateParams = { + user: user_private_uid, + input: messages, + model: modelUsed.id, + ...(tools ? { tools } : {}), + ...(max_tokens ? { max_output_tokens: max_tokens } : {}), + ...(temperature ? { temperature } : {}), + stream: !!stream, + ...(supportsReasoningControls ? {} : + { + ...(requestedReasoningEffort ? { reasoning_effort: requestedReasoningEffort } : {}), + ...(requestedVerbosity ? { verbosity: requestedVerbosity } : {}), + } + ), + } as ResponseCreateParams; + + // console.log("completion params: ", completionParams) + const completion = await this.#openAi.responses.create(completionParams); + // console.log("Completion: ", completion) + return OpenAiUtil.handle_completion_output_responses_api({ + usage_calculator: ({ usage }) => { + const trackedUsage = { + prompt_tokens: ((usage as any).input_tokens ?? 0) - ((usage as any).input_tokens_details?.cached_tokens ?? 0), + completion_tokens: (usage as any).output_tokens ?? 0, + cached_tokens: (usage as any).input_tokens_details?.cached_tokens ?? 0, + }; + + const costsOverrideFromModel = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => { + return [k, v * (modelUsed.costs[k] || 0)]; + })); + + this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `openai:${modelUsed?.id}`, costsOverrideFromModel); + return trackedUsage; + }, + stream, + completion, + moderate: moderation ? this.checkModeration.bind(this) : undefined, + }); + } + + async checkModeration (text: string) { + // create moderation + const results = await this.#openAi.moderations.create({ + model: 'omni-moderation-latest', + input: text, + }); + + let flagged = false; + + for ( const result of results?.results ?? [] ) { + + // OpenAI does a crazy amount of false positives. We filter by their 80% interval + const veryFlaggedEntries = Object.entries(result.category_scores).filter(e => e[1] > 0.8); + if ( veryFlaggedEntries.length > 0 ) { + flagged = true; + break; + } + } + + return { + flagged, + results, + }; + } +} diff --git a/src/backend/src/services/ai/chat/providers/OpenAiProvider/models.ts b/src/backend/src/services/ai/chat/providers/OpenAiProvider/models.ts index a893c2bd1..965a33b86 100644 --- a/src/backend/src/services/ai/chat/providers/OpenAiProvider/models.ts +++ b/src/backend/src/services/ai/chat/providers/OpenAiProvider/models.ts @@ -29,6 +29,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [ completion_tokens: 16800, }, max_tokens: 16384, + responses_api_only: true }, { id: 'gpt-5.2-2025-12-11', @@ -69,6 +70,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [ completion_tokens: 1000, }, max_tokens: 128000, + responses_api_only: true }, { id: 'gpt-5.1-codex-mini', @@ -82,6 +84,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [ completion_tokens: 200, }, max_tokens: 128000, + responses_api_only: true }, { id: 'gpt-5.1-chat-latest', @@ -227,6 +230,20 @@ export const OPEN_AI_MODELS: IChatModel[] = [ }, max_tokens: 100000, }, + { + id: 'o3-pro', + costs_currency: 'usd-cents', + input_cost_key: 'prompt_tokens', + output_cost_key: 'completion_tokens', + costs: { + tokens: 1_000_000, + prompt_tokens: 2000, + cached_tokens: 50, + completion_tokens: 8000, + }, + max_tokens: 100000, + responses_api_only: true + }, { id: 'o3-mini', costs_currency: 'usd-cents', diff --git a/src/backend/src/services/ai/utils/OpenAIUtil.d.ts b/src/backend/src/services/ai/utils/OpenAIUtil.d.ts index 1cd7bad68..117bb8b7c 100644 --- a/src/backend/src/services/ai/utils/OpenAIUtil.d.ts +++ b/src/backend/src/services/ai/utils/OpenAIUtil.d.ts @@ -83,6 +83,7 @@ export interface CompletionDeviations { } export function process_input_messages (messages: TMessage[]): Promise; +export function process_input_messages_responses_api (messages: TMessage[]): Promise; export function create_usage_calculator (params: { model_details: IChatModel }): UsageCalculator; @@ -114,3 +115,15 @@ export function handle_completion_output (params: usage_calculator?: UsageCalculator; finally_fn?: () => Promise; }): ReturnType; + +export function handle_completion_output_responses_api (params: { + deviations?: CompletionDeviations; + stream?: boolean; + completion: AsyncIterable | TCompletion; + moderate?: (text: string) => Promise<{ flagged: boolean }>; + usage_calculator?: UsageCalculator; + finally_fn?: () => Promise; +}): ReturnType; + + + diff --git a/src/backend/src/services/ai/utils/OpenAIUtil.js b/src/backend/src/services/ai/utils/OpenAIUtil.js index 967723559..2ab95a9b5 100644 --- a/src/backend/src/services/ai/utils/OpenAIUtil.js +++ b/src/backend/src/services/ai/utils/OpenAIUtil.js @@ -57,6 +57,83 @@ export const process_input_messages = async (messages) => { return messages; }; +export const process_input_messages_responses_api = async (messages) => { + for (const msg of messages) { + if (!msg.content) continue; + if (typeof msg.content !== 'object') continue; + + const content = msg.content; + + for (const o of content) { + if (!o['image_url']) continue; + if (o.type) continue; + o.type = 'image_url'; + } + + // coerce tool calls + let is_tool_call = false; + for (let i = content.length - 1; i >= 0; i--) { + const content_block = content[i]; + if (content_block.type === "text" && (msg.role === "user" || msg.role === "system")) { + content_block.type = "input_text" + } + if (content_block.type === "text" && (msg.role === "assistant")) { + content_block.type = "output_text" + } + + if (content_block.type === 'tool_use') { + if (!msg.tool_calls) { + msg.tool_calls = []; + is_tool_call = true; + } + msg.tool_calls.push({ + id: content_block.id, + canonical_id: content_block.canonical_id, + type: 'function', + function: { + name: content_block.name, + arguments: JSON.stringify(content_block.input), + }, + ...(content_block.extra_content ? { extra_content: content_block.extra_content } : {}), + }); + + content.splice(i, 1); + } + } + + // Right now this does NOT support parallel tool calls! + // We only allow sequential toolcalling right now so this shouldn't be an issue right now + // but this probably needs to be changed in the future to split "one completions message" + // into multiple responses inputs. + if (is_tool_call) { + msg.call_id = msg.tool_calls[0].id; + msg.id = msg.tool_calls[0].canonical_id; + msg.name = msg.tool_calls[0].function.name; + msg.arguments = msg.tool_calls[0].function.arguments; + msg.type = "function_call"; + + delete msg.role; + delete msg.content; + delete msg.tool_calls; + } + + // coerce tool results + for (let i = content.length - 1; i >= 0; i--) { + const content_block = content[i]; + if (content_block.type !== 'tool_result') continue; + msg.type = 'function_call_output'; + msg.call_id = content_block.tool_use_id; + msg.output = content_block.content; + + delete msg.role; + delete msg.content; + } + } + console.log("coreced ", messages) + + return messages; +}; + export const create_usage_calculator = ({ model_details }) => { return ({ usage }) => { const tokens = []; @@ -173,6 +250,63 @@ export const create_chat_stream_handler = ({ chatStream.end(usage); }; +export const create_chat_stream_handler_responses_api = ({ + deviations, + completion, + usage_calculator, +}) => async ({ chatStream }) => { + deviations = Object.assign({ + // affected by: Groq + index_usage_from_stream_chunk: chunk => chunk.usage, + // affected by: Mistral + chunk_but_like_actually: chunk => chunk, + index_tool_calls_from_stream_choice: choice => choice.delta.tool_calls, + }, deviations); + + const message = chatStream.message(); + let textblock = message.contentBlock({ type: 'text' }); + let toolblock = null; + let mode = 'text'; + const tool_call_blocks = []; + + let last_usage = null; + for await (let chunk of completion) { + console.log("Chunk from API: ", chunk) + + if (chunk.type === "response.output_text.delta") { + textblock.addText(chunk.delta); + continue; + } + + + if (chunk.type === "response.completed") { + last_usage = chunk.response.usage; + } + + if (chunk.type === "response.output_item.done" && chunk.item?.type === "function_call") { + const tool_call = chunk.item; + toolblock = message.contentBlock({ + type: 'tool_use', + canonical_id: tool_call.id, + id: tool_call.call_id, + name: tool_call.name, + ...(tool_call.extra_content ? { extra_content: tool_call.extra_content } : {}), + }); + toolblock.addPartialJSON(tool_call.arguments); + toolblock.end(); + } + } + + // TODO DS: this is a bit too abstracted... this is basically just doing the metering now + const usage = usage_calculator({ usage: last_usage }); + + if (mode === 'text') textblock.end(); + if (mode === 'tool') toolblock.end(); + + message.end(); + chatStream.end(usage); +}; + /** * * @param {object} params @@ -236,3 +370,85 @@ export const handle_completion_output = async ({ }; return ret; }; + +/** + * + * @param {object} params + * @param {(args: {usage: import("openai/resources/completions.mjs").CompletionUsage})=> unknown } params.usage_calculator + * @returns + */ +export const handle_completion_output_responses_api = async ({ + deviations, + stream, + completion, + moderate, + usage_calculator, + finally_fn, +}) => { + deviations = Object.assign({ + // affected by: Mistral + coerce_completion_usage: completion => completion.usage, + }, deviations); + + if (stream) { + const init_chat_stream = + create_chat_stream_handler_responses_api({ + deviations, + completion, + usage_calculator, + }); + + return { + stream: true, + init_chat_stream, + finally_fn, + }; + } + + if (finally_fn) await finally_fn(); + + const is_empty = completion.output_text.trim() === ''; + if (is_empty && !completion.choices?.[0]?.message?.tool_calls) { + // GPT refuses to generate an empty response if you ask it to, + // so this will probably only happen on an error condition. + throw new Error('an empty response was generated'); + } + + // We need to moderate the completion too + const mod_text = completion.output_text; + if (moderate && mod_text !== null) { + const moderation_result = await moderate(mod_text); + if (moderation_result.flagged) { + throw new Error('message is not allowed'); + } + } + + + console.log("Completion: ", completion); + console.log("output: ", completion.output[0]); + + const ret = { + finish_reason: "stop", + index: 0, + message: { + content: completion.output_text, + reasoning: null, // Fix later to add proper reasoning + refusal: null, + role: "assistant" + } + } + ret.role = completion.output[0].role; + + delete ret.type; + + + ret.usage = usage_calculator ? usage_calculator({ + ...completion, + usage: completion.usage, + }) : { + input_tokens: completion.usage.input_tokens, + output_tokens: completion.usage.output_tokens, + }; + return ret; + +};