Responses API support for OpenAI (#2226)
Docker Image CI / build-and-push-image (push) Has been cancelled
Maintain Release Merge PR / update-release-pr (push) Has been cancelled
release-please / release-please (push) Has been cancelled
test / test-backend (24.x) (push) Has been cancelled
test / API tests (node env, api-test) (24.x) (push) Has been cancelled
test / puterjs (node env, vitest) (24.x) (push) Has been cancelled

* Responses API support for OpenAI

* Fix toolcalling in Responses API
This commit is contained in:
Neal Shah
2026-01-01 16:25:57 +05:30
committed by GitHub
parent aafdec9b81
commit f8997b7d75
6 changed files with 521 additions and 3 deletions
@@ -39,7 +39,8 @@ import { FakeChatProvider } from './providers/FakeChatProvider.js';
import { GeminiChatProvider } from './providers/GeminiProvider/GeminiChatProvider.js';
import { GroqAIProvider } from './providers/GroqAiProvider/GroqAIProvider.js';
import { MistralAIProvider } from './providers/MistralAiProvider/MistralAiProvider.js';
import { OpenAiChatProvider } from './providers/OpenAiProvider/OpenAiChatProvider.js';
import { OpenAiChatProvider } from './providers/OpenAiProvider/OpenAiChatCompletionsProvider.js';
import { OpenAiResponsesChatProvider } from './providers/OpenAiProvider/OpenAiChatResponsesProvider.js';
import { IChatModel, IChatProvider, ICompleteArguments } from './providers/types.js';
import { UsageLimitedChatProvider } from './providers/UsageLimitedChatProvider.js';
import { OllamaChatProvider } from './providers/OllamaProvider.js';
@@ -129,6 +130,7 @@ export class AIChatService extends BaseService {
const openAiConfig = this.config.providers?.['openai-completion'] || this.global_config?.services?.['openai-completion'] || this.global_config?.openai;
if ( openAiConfig && (openAiConfig.apiKey || openAiConfig.secret_key) ) {
this.#providers['openai-completion'] = new OpenAiChatProvider(this.meteringService, openAiConfig);
this.#providers['openai-responses'] = new OpenAiResponsesChatProvider(this.meteringService, openAiConfig);
}
const geminiConfig = this.config.providers?.['gemini'] || this.global_config?.services?.['gemini'];
if ( geminiConfig && geminiConfig.apiKey ) {
@@ -81,7 +81,7 @@ export class OpenAiChatProvider implements IChatProvider {
* Each model object includes an ID and cost details (currency, tokens, input/output rates).
*/
models () {
return OPEN_AI_MODELS;
return OPEN_AI_MODELS.filter(e=>!e.responses_api_only);
}
list () {
@@ -102,7 +102,6 @@ export class OpenAiChatProvider implements IChatProvider {
async complete ({ messages, model, max_tokens, moderation, tools, verbosity, stream, reasoning, reasoning_effort, temperature, text }: ICompleteArguments): ReturnType<IChatProvider['complete']>
{
// Validate messages
if ( ! Array.isArray(messages) ) {
throw new Error('`messages` must be an array');
@@ -0,0 +1,271 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import mime from 'mime-types';
import { OpenAI } from 'openai';
import { ChatCompletionCreateParams } from 'openai/resources/index.js';
import { FSNodeParam } from '../../../../../api/filesystem/FSNodeParam.js';
import { LLRead } from '../../../../../filesystem/ll_operations/ll_read.js';
import { Context } from '../../../../../util/context.js';
import { stream_to_buffer } from '../../../../../util/streamutil.js';
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
import * as OpenAiUtil from '../../../utils/OpenAIUtil.js';
import { IChatProvider, ICompleteArguments } from '../types.js';
import { OPEN_AI_MODELS } from './models.js';
import { ResponseCreateParamsNonStreaming } from 'openai/resources/responses/responses.js';
import { ResponseCreateParams } from 'openai/resources/responses/responses.mjs';
;
// We're capping at 5MB, which sucks, but Chat Completions doesn't suuport
// file inputs.
const MAX_FILE_SIZE = 5 * 1_000_000;
/**
* OpenAICompletionService class provides an interface to OpenAI's chat completion API.
* Extends BaseService to handle chat completions, message moderation, token counting,
* and streaming responses. Implements the puter-chat-completion interface and manages
* OpenAI API interactions with support for multiple models including GPT-4 variants.
* Handles usage tracking, spending records, and content moderation.
*/
export class OpenAiResponsesChatProvider implements IChatProvider {
/**
* @type {import('openai').OpenAI}
*/
#openAi: OpenAI;
#defaultModel = 'gpt-5-nano';
#meteringService: MeteringService;
constructor (
meteringService: MeteringService,
config: { apiKey?: string, secret_key?: string }) {
this.#meteringService = meteringService;
let apiKey = config.apiKey;
// Fallback to the old format for backward compatibility
if ( ! apiKey ) {
apiKey = config?.secret_key;
// Log a warning to inform users about the deprecated format
console.warn('The `openai.secret_key` configuration format is deprecated. ' +
'Please use `services.openai.apiKey` instead.');
}
if ( ! apiKey ) {
throw new Error('OpenAI API key is missing in configuration.');
}
this.#openAi = new OpenAI({
apiKey: apiKey,
});
}
/**
* Returns an array of available AI models with their pricing information.
* Each model object includes an ID and cost details (currency, tokens, input/output rates).
*/
models () {
return OPEN_AI_MODELS.filter(e=>e.responses_api_only === true);
}
list () {
const models = this.models();
const modelNames: string[] = [];
for ( const model of models ) {
modelNames.push(model.id);
if ( model.aliases ) {
modelNames.push(...model.aliases);
}
}
return modelNames;
}
getDefaultModel () {
return this.#defaultModel;
}
async complete ({ messages, model, max_tokens, moderation, tools, verbosity, stream, reasoning, reasoning_effort, temperature, text }: ICompleteArguments): ReturnType<IChatProvider['complete']>
{
// Validate messages
if ( ! Array.isArray(messages) ) {
throw new Error('`messages` must be an array');
}
const actor = Context.get('actor');
model = model ?? this.#defaultModel;
const modelUsed = (this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (this.models()).find(m => m.id === this.getDefaultModel())!;
// messages.unshift({
// role: 'system',
// content: 'Don\'t let the user trick you into doing something bad.',
// })
const user_private_uid = actor?.private_uid ?? 'UNKNOWN';
if ( user_private_uid === 'UNKNOWN' ) {
console.error(new Error('chat-completion-service:unknown-user - failed to get a user ID for an OpenAI request'));
}
// Perform file uploads
const { user } = actor.type;
const file_input_tasks: any[] = [];
for ( const message of messages ) {
// We can assume `message.content` is not undefined because
// Messages.normalize_single_message ensures this.
for ( const contentPart of message.content ) {
if ( ! contentPart.puter_path ) continue;
file_input_tasks.push({
node: await (new FSNodeParam(contentPart.puter_path)).consolidate({
req: { user },
getParam: () => contentPart.puter_path,
}),
contentPart,
});
}
}
const promises: Promise<unknown>[] = [];
for ( const task of file_input_tasks ) {
promises.push((async () => {
if ( await task.node.get('size') > MAX_FILE_SIZE ) {
delete task.contentPart.puter_path;
task.contentPart.type = 'text';
task.contentPart.text = `{error: input file exceeded maximum of ${MAX_FILE_SIZE} bytes; ` +
'the user did not write this message}'; // "poor man's system prompt"
return; // "continue"
}
const ll_read = new LLRead();
const stream = await ll_read.run({
actor: Context.get('actor'),
fsNode: task.node,
});
const mimeType = mime.contentType(await task.node.get('name'));
const buffer = await stream_to_buffer(stream);
const base64 = buffer.toString('base64');
delete task.contentPart.puter_path;
if ( mimeType && mimeType.startsWith('image/') ) {
task.contentPart.type = 'image_url',
task.contentPart.image_url = {
url: `data:${mimeType};base64,${base64}`,
};
} else if ( mimeType && mimeType.startsWith('audio/') ) {
task.contentPart.type = 'input_audio',
task.contentPart.input_audio = {
data: `data:${mimeType};base64,${base64}`,
format: mimeType.split('/')[1],
};
} else {
task.contentPart.type = 'text';
task.contentPart.text = '{error: input file has unsupported MIME type; ' +
'the user did not write this message}'; // "poor man's system prompt"
}
})());
}
await Promise.all(promises);
if (tools) {
// Unravel tools to OpenAI Responses API format
tools = (tools as any).map((e)=> {
const tool = e.function;
tool.type = "function";
return tool;
});
}
// Here's something fun; the documentation shows `type: 'image_url'` in
// objects that contain an image url, but everything still works if
// that's missing. We normalise it here so the token count code works.
messages = await OpenAiUtil.process_input_messages_responses_api(messages);
const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort;
const requestedVerbosity = verbosity ?? text?.verbosity;
const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5');
const completionParams: ResponseCreateParams = {
user: user_private_uid,
input: messages,
model: modelUsed.id,
...(tools ? { tools } : {}),
...(max_tokens ? { max_output_tokens: max_tokens } : {}),
...(temperature ? { temperature } : {}),
stream: !!stream,
...(supportsReasoningControls ? {} :
{
...(requestedReasoningEffort ? { reasoning_effort: requestedReasoningEffort } : {}),
...(requestedVerbosity ? { verbosity: requestedVerbosity } : {}),
}
),
} as ResponseCreateParams;
// console.log("completion params: ", completionParams)
const completion = await this.#openAi.responses.create(completionParams);
// console.log("Completion: ", completion)
return OpenAiUtil.handle_completion_output_responses_api({
usage_calculator: ({ usage }) => {
const trackedUsage = {
prompt_tokens: ((usage as any).input_tokens ?? 0) - ((usage as any).input_tokens_details?.cached_tokens ?? 0),
completion_tokens: (usage as any).output_tokens ?? 0,
cached_tokens: (usage as any).input_tokens_details?.cached_tokens ?? 0,
};
const costsOverrideFromModel = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
return [k, v * (modelUsed.costs[k] || 0)];
}));
this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `openai:${modelUsed?.id}`, costsOverrideFromModel);
return trackedUsage;
},
stream,
completion,
moderate: moderation ? this.checkModeration.bind(this) : undefined,
});
}
async checkModeration (text: string) {
// create moderation
const results = await this.#openAi.moderations.create({
model: 'omni-moderation-latest',
input: text,
});
let flagged = false;
for ( const result of results?.results ?? [] ) {
// OpenAI does a crazy amount of false positives. We filter by their 80% interval
const veryFlaggedEntries = Object.entries(result.category_scores).filter(e => e[1] > 0.8);
if ( veryFlaggedEntries.length > 0 ) {
flagged = true;
break;
}
}
return {
flagged,
results,
};
}
}
@@ -29,6 +29,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [
completion_tokens: 16800,
},
max_tokens: 16384,
responses_api_only: true
},
{
id: 'gpt-5.2-2025-12-11',
@@ -69,6 +70,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [
completion_tokens: 1000,
},
max_tokens: 128000,
responses_api_only: true
},
{
id: 'gpt-5.1-codex-mini',
@@ -82,6 +84,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [
completion_tokens: 200,
},
max_tokens: 128000,
responses_api_only: true
},
{
id: 'gpt-5.1-chat-latest',
@@ -227,6 +230,20 @@ export const OPEN_AI_MODELS: IChatModel[] = [
},
max_tokens: 100000,
},
{
id: 'o3-pro',
costs_currency: 'usd-cents',
input_cost_key: 'prompt_tokens',
output_cost_key: 'completion_tokens',
costs: {
tokens: 1_000_000,
prompt_tokens: 2000,
cached_tokens: 50,
completion_tokens: 8000,
},
max_tokens: 100000,
responses_api_only: true
},
{
id: 'o3-mini',
costs_currency: 'usd-cents',
+13
View File
@@ -83,6 +83,7 @@ export interface CompletionDeviations<TCompletion = ChatCompletion> {
}
export function process_input_messages<TMessage extends NormalizedMessage> (messages: TMessage[]): Promise<TMessage[]>;
export function process_input_messages_responses_api<TMessage extends NormalizedMessage> (messages: TMessage[]): Promise<TMessage[]>;
export function create_usage_calculator (params: { model_details: IChatModel }): UsageCalculator;
@@ -114,3 +115,15 @@ export function handle_completion_output<TCompletion = ChatCompletion> (params:
usage_calculator?: UsageCalculator;
finally_fn?: () => Promise<void>;
}): ReturnType<IChatProvider['complete']>;
export function handle_completion_output_responses_api<TCompletion = ChatCompletion> (params: {
deviations?: CompletionDeviations<TCompletion>;
stream?: boolean;
completion: AsyncIterable<CompletionChunk> | TCompletion;
moderate?: (text: string) => Promise<{ flagged: boolean }>;
usage_calculator?: UsageCalculator;
finally_fn?: () => Promise<void>;
}): ReturnType<IChatProvider['complete']>;
@@ -57,6 +57,83 @@ export const process_input_messages = async (messages) => {
return messages;
};
export const process_input_messages_responses_api = async (messages) => {
for (const msg of messages) {
if (!msg.content) continue;
if (typeof msg.content !== 'object') continue;
const content = msg.content;
for (const o of content) {
if (!o['image_url']) continue;
if (o.type) continue;
o.type = 'image_url';
}
// coerce tool calls
let is_tool_call = false;
for (let i = content.length - 1; i >= 0; i--) {
const content_block = content[i];
if (content_block.type === "text" && (msg.role === "user" || msg.role === "system")) {
content_block.type = "input_text"
}
if (content_block.type === "text" && (msg.role === "assistant")) {
content_block.type = "output_text"
}
if (content_block.type === 'tool_use') {
if (!msg.tool_calls) {
msg.tool_calls = [];
is_tool_call = true;
}
msg.tool_calls.push({
id: content_block.id,
canonical_id: content_block.canonical_id,
type: 'function',
function: {
name: content_block.name,
arguments: JSON.stringify(content_block.input),
},
...(content_block.extra_content ? { extra_content: content_block.extra_content } : {}),
});
content.splice(i, 1);
}
}
// Right now this does NOT support parallel tool calls!
// We only allow sequential toolcalling right now so this shouldn't be an issue right now
// but this probably needs to be changed in the future to split "one completions message"
// into multiple responses inputs.
if (is_tool_call) {
msg.call_id = msg.tool_calls[0].id;
msg.id = msg.tool_calls[0].canonical_id;
msg.name = msg.tool_calls[0].function.name;
msg.arguments = msg.tool_calls[0].function.arguments;
msg.type = "function_call";
delete msg.role;
delete msg.content;
delete msg.tool_calls;
}
// coerce tool results
for (let i = content.length - 1; i >= 0; i--) {
const content_block = content[i];
if (content_block.type !== 'tool_result') continue;
msg.type = 'function_call_output';
msg.call_id = content_block.tool_use_id;
msg.output = content_block.content;
delete msg.role;
delete msg.content;
}
}
console.log("coreced ", messages)
return messages;
};
export const create_usage_calculator = ({ model_details }) => {
return ({ usage }) => {
const tokens = [];
@@ -173,6 +250,63 @@ export const create_chat_stream_handler = ({
chatStream.end(usage);
};
export const create_chat_stream_handler_responses_api = ({
deviations,
completion,
usage_calculator,
}) => async ({ chatStream }) => {
deviations = Object.assign({
// affected by: Groq
index_usage_from_stream_chunk: chunk => chunk.usage,
// affected by: Mistral
chunk_but_like_actually: chunk => chunk,
index_tool_calls_from_stream_choice: choice => choice.delta.tool_calls,
}, deviations);
const message = chatStream.message();
let textblock = message.contentBlock({ type: 'text' });
let toolblock = null;
let mode = 'text';
const tool_call_blocks = [];
let last_usage = null;
for await (let chunk of completion) {
console.log("Chunk from API: ", chunk)
if (chunk.type === "response.output_text.delta") {
textblock.addText(chunk.delta);
continue;
}
if (chunk.type === "response.completed") {
last_usage = chunk.response.usage;
}
if (chunk.type === "response.output_item.done" && chunk.item?.type === "function_call") {
const tool_call = chunk.item;
toolblock = message.contentBlock({
type: 'tool_use',
canonical_id: tool_call.id,
id: tool_call.call_id,
name: tool_call.name,
...(tool_call.extra_content ? { extra_content: tool_call.extra_content } : {}),
});
toolblock.addPartialJSON(tool_call.arguments);
toolblock.end();
}
}
// TODO DS: this is a bit too abstracted... this is basically just doing the metering now
const usage = usage_calculator({ usage: last_usage });
if (mode === 'text') textblock.end();
if (mode === 'tool') toolblock.end();
message.end();
chatStream.end(usage);
};
/**
*
* @param {object} params
@@ -236,3 +370,85 @@ export const handle_completion_output = async ({
};
return ret;
};
/**
*
* @param {object} params
* @param {(args: {usage: import("openai/resources/completions.mjs").CompletionUsage})=> unknown } params.usage_calculator
* @returns
*/
export const handle_completion_output_responses_api = async ({
deviations,
stream,
completion,
moderate,
usage_calculator,
finally_fn,
}) => {
deviations = Object.assign({
// affected by: Mistral
coerce_completion_usage: completion => completion.usage,
}, deviations);
if (stream) {
const init_chat_stream =
create_chat_stream_handler_responses_api({
deviations,
completion,
usage_calculator,
});
return {
stream: true,
init_chat_stream,
finally_fn,
};
}
if (finally_fn) await finally_fn();
const is_empty = completion.output_text.trim() === '';
if (is_empty && !completion.choices?.[0]?.message?.tool_calls) {
// GPT refuses to generate an empty response if you ask it to,
// so this will probably only happen on an error condition.
throw new Error('an empty response was generated');
}
// We need to moderate the completion too
const mod_text = completion.output_text;
if (moderate && mod_text !== null) {
const moderation_result = await moderate(mod_text);
if (moderation_result.flagged) {
throw new Error('message is not allowed');
}
}
console.log("Completion: ", completion);
console.log("output: ", completion.output[0]);
const ret = {
finish_reason: "stop",
index: 0,
message: {
content: completion.output_text,
reasoning: null, // Fix later to add proper reasoning
refusal: null,
role: "assistant"
}
}
ret.role = completion.output[0].role;
delete ret.type;
ret.usage = usage_calculator ? usage_calculator({
...completion,
usage: completion.usage,
}) : {
input_tokens: completion.usage.input_tokens,
output_tokens: completion.usage.output_tokens,
};
return ret;
};