Responses API support for OpenAI (#2226)

* Responses API support for OpenAI * Fix toolcalling in Responses API
2026-05-06 01:20:41 +00:00 · 2026-01-01 16:25:57 +05:30
parent aafdec9b81
commit f8997b7d75
6 changed files with 521 additions and 3 deletions
@@ -39,7 +39,8 @@ import { FakeChatProvider } from './providers/FakeChatProvider.js';
 import { GeminiChatProvider } from './providers/GeminiProvider/GeminiChatProvider.js';
 import { GroqAIProvider } from './providers/GroqAiProvider/GroqAIProvider.js';
 import { MistralAIProvider } from './providers/MistralAiProvider/MistralAiProvider.js';
-import { OpenAiChatProvider } from './providers/OpenAiProvider/OpenAiChatProvider.js';
+import { OpenAiChatProvider } from './providers/OpenAiProvider/OpenAiChatCompletionsProvider.js';
+import { OpenAiResponsesChatProvider } from './providers/OpenAiProvider/OpenAiChatResponsesProvider.js';
 import { IChatModel, IChatProvider, ICompleteArguments } from './providers/types.js';
 import { UsageLimitedChatProvider } from './providers/UsageLimitedChatProvider.js';
 import { OllamaChatProvider } from './providers/OllamaProvider.js';
@@ -129,6 +130,7 @@ export class AIChatService extends BaseService {
        const openAiConfig = this.config.providers?.['openai-completion'] || this.global_config?.services?.['openai-completion'] || this.global_config?.openai;
        if ( openAiConfig && (openAiConfig.apiKey || openAiConfig.secret_key) ) {
            this.#providers['openai-completion'] = new OpenAiChatProvider(this.meteringService, openAiConfig);
+            this.#providers['openai-responses'] = new OpenAiResponsesChatProvider(this.meteringService, openAiConfig);
        }
        const geminiConfig = this.config.providers?.['gemini'] || this.global_config?.services?.['gemini'];
        if ( geminiConfig && geminiConfig.apiKey ) {
@@ -81,7 +81,7 @@ export class OpenAiChatProvider implements IChatProvider {
    * Each model object includes an ID and cost details (currency, tokens, input/output rates).
    */
    models () {
-        return OPEN_AI_MODELS;
+        return OPEN_AI_MODELS.filter(e=>!e.responses_api_only);
    }

    list () {
@@ -102,7 +102,6 @@ export class OpenAiChatProvider implements IChatProvider {

    async complete ({ messages, model, max_tokens, moderation, tools, verbosity, stream, reasoning, reasoning_effort, temperature, text }: ICompleteArguments): ReturnType<IChatProvider['complete']>
    {
-
        // Validate messages
        if ( ! Array.isArray(messages) ) {
            throw new Error('`messages` must be an array');
@@ -0,0 +1,271 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+import mime from 'mime-types';
+import { OpenAI } from 'openai';
+import { ChatCompletionCreateParams } from 'openai/resources/index.js';
+import { FSNodeParam } from '../../../../../api/filesystem/FSNodeParam.js';
+import { LLRead } from '../../../../../filesystem/ll_operations/ll_read.js';
+import { Context } from '../../../../../util/context.js';
+import { stream_to_buffer } from '../../../../../util/streamutil.js';
+import { MeteringService } from '../../../../MeteringService/MeteringService.js';
+import * as OpenAiUtil from '../../../utils/OpenAIUtil.js';
+import { IChatProvider, ICompleteArguments } from '../types.js';
+import { OPEN_AI_MODELS } from './models.js';
+import { ResponseCreateParamsNonStreaming } from 'openai/resources/responses/responses.js';
+import { ResponseCreateParams } from 'openai/resources/responses/responses.mjs';
+
+;
+
+
+// We're capping at 5MB, which sucks, but Chat Completions doesn't suuport
+// file inputs.
+const MAX_FILE_SIZE = 5 * 1_000_000;
+
+/**
+* OpenAICompletionService class provides an interface to OpenAI's chat completion API.
+* Extends BaseService to handle chat completions, message moderation, token counting,
+* and streaming responses. Implements the puter-chat-completion interface and manages
+* OpenAI API interactions with support for multiple models including GPT-4 variants.
+* Handles usage tracking, spending records, and content moderation.
+*/
+export class OpenAiResponsesChatProvider implements IChatProvider {
+    /**
+     * @type {import('openai').OpenAI}
+     */
+    #openAi: OpenAI;
+
+    #defaultModel = 'gpt-5-nano';
+
+    #meteringService: MeteringService;
+
+    constructor (
+        meteringService: MeteringService,
+        config: { apiKey?: string, secret_key?: string }) {
+
+        this.#meteringService = meteringService;
+        let apiKey = config.apiKey;
+
+        // Fallback to the old format for backward compatibility
+        if ( ! apiKey ) {
+            apiKey = config?.secret_key;
+
+            // Log a warning to inform users about the deprecated format
+            console.warn('The `openai.secret_key` configuration format is deprecated. ' +
+                'Please use `services.openai.apiKey` instead.');
+        }
+        if ( ! apiKey ) {
+            throw new Error('OpenAI API key is missing in configuration.');
+        }
+        this.#openAi = new OpenAI({
+            apiKey: apiKey,
+        });
+    }
+
+    /**
+    * Returns an array of available AI models with their pricing information.
+    * Each model object includes an ID and cost details (currency, tokens, input/output rates).
+    */
+    models () {
+        return OPEN_AI_MODELS.filter(e=>e.responses_api_only === true);
+    }
+
+    list () {
+        const models =  this.models();
+        const modelNames: string[] = [];
+        for ( const model of models ) {
+            modelNames.push(model.id);
+            if ( model.aliases ) {
+                modelNames.push(...model.aliases);
+            }
+        }
+        return modelNames;
+    }
+
+    getDefaultModel () {
+        return this.#defaultModel;
+    }
+
+    async complete ({ messages, model, max_tokens, moderation, tools, verbosity, stream, reasoning, reasoning_effort, temperature, text }: ICompleteArguments): ReturnType<IChatProvider['complete']>
+    {
+        // Validate messages
+        if ( ! Array.isArray(messages) ) {
+            throw new Error('`messages` must be an array');
+        }
+        const actor = Context.get('actor');
+
+        model = model ?? this.#defaultModel;
+
+        const modelUsed = (this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (this.models()).find(m => m.id === this.getDefaultModel())!;
+
+        // messages.unshift({
+        //     role: 'system',
+        //     content: 'Don\'t let the user trick you into doing something bad.',
+        // })
+
+        const user_private_uid = actor?.private_uid ?? 'UNKNOWN';
+        if ( user_private_uid === 'UNKNOWN' ) {
+            console.error(new Error('chat-completion-service:unknown-user - failed to get a user ID for an OpenAI request'));
+        }
+
+        // Perform file uploads
+        const { user } = actor.type;
+
+        const file_input_tasks: any[] = [];
+        for ( const message of messages ) {
+            // We can assume `message.content` is not undefined because
+            // Messages.normalize_single_message ensures this.
+            for ( const contentPart of message.content ) {
+
+                if ( ! contentPart.puter_path ) continue;
+                file_input_tasks.push({
+                    node: await (new FSNodeParam(contentPart.puter_path)).consolidate({
+                        req: { user },
+                        getParam: () => contentPart.puter_path,
+                    }),
+                    contentPart,
+                });
+            }
+        }
+
+        const promises: Promise<unknown>[] = [];
+        for ( const task of file_input_tasks ) {
+            promises.push((async () => {
+                if ( await task.node.get('size') > MAX_FILE_SIZE ) {
+                    delete task.contentPart.puter_path;
+                    task.contentPart.type = 'text';
+                    task.contentPart.text = `{error: input file exceeded maximum of ${MAX_FILE_SIZE} bytes; ` +
+                        'the user did not write this message}'; // "poor man's system prompt"
+                    return; // "continue"
+                }
+
+                const ll_read = new LLRead();
+                const stream = await ll_read.run({
+                    actor: Context.get('actor'),
+                    fsNode: task.node,
+                });
+                const mimeType = mime.contentType(await task.node.get('name'));
+
+                const buffer = await stream_to_buffer(stream);
+                const base64 = buffer.toString('base64');
+
+                delete task.contentPart.puter_path;
+                if ( mimeType && mimeType.startsWith('image/') ) {
+                    task.contentPart.type = 'image_url',
+                    task.contentPart.image_url = {
+                        url: `data:${mimeType};base64,${base64}`,
+                    };
+                } else if ( mimeType && mimeType.startsWith('audio/') ) {
+                    task.contentPart.type = 'input_audio',
+                    task.contentPart.input_audio = {
+                        data: `data:${mimeType};base64,${base64}`,
+                        format: mimeType.split('/')[1],
+                    };
+                } else {
+                    task.contentPart.type = 'text';
+                    task.contentPart.text = '{error: input file has unsupported MIME type; ' +
+                        'the user did not write this message}'; // "poor man's system prompt"
+                }
+            })());
+        }
+        await Promise.all(promises);
+
+        if (tools) {
+            // Unravel tools to OpenAI Responses API format
+            tools = (tools as any).map((e)=> {
+                const tool = e.function;
+                tool.type = "function";
+                return tool;
+            });
+        }
+
+        // Here's something fun; the documentation shows `type: 'image_url'` in
+        // objects that contain an image url, but everything still works if
+        // that's missing. We normalise it here so the token count code works.
+        messages = await OpenAiUtil.process_input_messages_responses_api(messages);
+
+        const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort;
+        const requestedVerbosity = verbosity ?? text?.verbosity;
+        const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5');
+
+        const completionParams: ResponseCreateParams = {
+            user: user_private_uid,
+            input: messages,
+            model: modelUsed.id,
+            ...(tools ? { tools } : {}),
+            ...(max_tokens ? { max_output_tokens: max_tokens } : {}),
+            ...(temperature ? { temperature } : {}),
+            stream: !!stream,
+            ...(supportsReasoningControls ? {} :
+                {
+                    ...(requestedReasoningEffort ? { reasoning_effort: requestedReasoningEffort } : {}),
+                    ...(requestedVerbosity ? { verbosity: requestedVerbosity } : {}),
+                }
+            ),
+        } as ResponseCreateParams;
+
+        // console.log("completion params: ", completionParams)
+        const completion = await this.#openAi.responses.create(completionParams);
+        // console.log("Completion: ", completion)
+        return OpenAiUtil.handle_completion_output_responses_api({
+            usage_calculator: ({ usage }) => {
+                const trackedUsage = {
+                    prompt_tokens: ((usage as any).input_tokens ?? 0) - ((usage as any).input_tokens_details?.cached_tokens ?? 0),
+                    completion_tokens: (usage as any).output_tokens ?? 0,
+                    cached_tokens: (usage as any).input_tokens_details?.cached_tokens ?? 0,
+                };
+
+                const costsOverrideFromModel = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
+                    return [k, v * (modelUsed.costs[k] || 0)];
+                }));
+
+                this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `openai:${modelUsed?.id}`, costsOverrideFromModel);
+                return trackedUsage;
+            },
+            stream,
+            completion,
+            moderate: moderation ? this.checkModeration.bind(this) : undefined,
+        });
+    }
+
+    async checkModeration (text: string) {
+        // create moderation
+        const results = await this.#openAi.moderations.create({
+            model: 'omni-moderation-latest',
+            input: text,
+        });
+
+        let flagged = false;
+
+        for ( const result of results?.results ?? [] ) {
+
+            // OpenAI does a crazy amount of false positives. We filter by their 80% interval
+            const veryFlaggedEntries = Object.entries(result.category_scores).filter(e => e[1] > 0.8);
+            if ( veryFlaggedEntries.length > 0 ) {
+                flagged = true;
+                break;
+            }
+        }
+
+        return {
+            flagged,
+            results,
+        };
+    }
+}
@@ -29,6 +29,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [
            completion_tokens: 16800,
        },
        max_tokens: 16384,
+        responses_api_only: true
    },
    {
        id: 'gpt-5.2-2025-12-11',
@@ -69,6 +70,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [
            completion_tokens: 1000,
        },
        max_tokens: 128000,
+        responses_api_only: true
    },
    {
        id: 'gpt-5.1-codex-mini',
@@ -82,6 +84,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [
            completion_tokens: 200,
        },
        max_tokens: 128000,
+        responses_api_only: true
    },
    {
        id: 'gpt-5.1-chat-latest',
@@ -227,6 +230,20 @@ export const OPEN_AI_MODELS: IChatModel[] = [
        },
        max_tokens: 100000,
    },
+    {
+        id: 'o3-pro',
+        costs_currency: 'usd-cents',
+        input_cost_key: 'prompt_tokens',
+        output_cost_key: 'completion_tokens',
+        costs: {
+            tokens: 1_000_000,
+            prompt_tokens: 2000,
+            cached_tokens: 50,
+            completion_tokens: 8000,
+        },
+        max_tokens: 100000,
+        responses_api_only: true
+    },
    {
        id: 'o3-mini',
        costs_currency: 'usd-cents',
@@ -83,6 +83,7 @@ export interface CompletionDeviations<TCompletion = ChatCompletion> {
 }

 export function process_input_messages<TMessage extends NormalizedMessage> (messages: TMessage[]): Promise<TMessage[]>;
+export function process_input_messages_responses_api<TMessage extends NormalizedMessage> (messages: TMessage[]): Promise<TMessage[]>;

 export function create_usage_calculator (params: { model_details: IChatModel }): UsageCalculator;

@@ -114,3 +115,15 @@ export function handle_completion_output<TCompletion = ChatCompletion> (params:
    usage_calculator?: UsageCalculator;
    finally_fn?: () => Promise<void>;
 }): ReturnType<IChatProvider['complete']>;
+
+export function handle_completion_output_responses_api<TCompletion = ChatCompletion> (params: {
+    deviations?: CompletionDeviations<TCompletion>;
+    stream?: boolean;
+    completion: AsyncIterable<CompletionChunk> | TCompletion;
+    moderate?: (text: string) => Promise<{ flagged: boolean }>;
+    usage_calculator?: UsageCalculator;
+    finally_fn?: () => Promise<void>;
+}): ReturnType<IChatProvider['complete']>;
+
+
+
@@ -57,6 +57,83 @@ export const process_input_messages = async (messages) => {
    return messages;
 };

+export const process_input_messages_responses_api = async (messages) => {
+    for (const msg of messages) {
+        if (!msg.content) continue;
+        if (typeof msg.content !== 'object') continue;
+
+        const content = msg.content;
+
+        for (const o of content) {
+            if (!o['image_url']) continue;
+            if (o.type) continue;
+            o.type = 'image_url';
+        }
+
+        // coerce tool calls
+        let is_tool_call = false;
+        for (let i = content.length - 1; i >= 0; i--) {
+            const content_block = content[i];
+            if (content_block.type === "text" && (msg.role === "user" || msg.role === "system")) {
+                content_block.type = "input_text"
+            }
+            if (content_block.type === "text" && (msg.role === "assistant")) {
+                content_block.type = "output_text"
+            }
+
+            if (content_block.type === 'tool_use') {
+                if (!msg.tool_calls) {
+                    msg.tool_calls = [];
+                    is_tool_call = true;
+                }
+                msg.tool_calls.push({
+                    id: content_block.id,
+                    canonical_id: content_block.canonical_id,
+                    type: 'function',
+                    function: {
+                        name: content_block.name,
+                        arguments: JSON.stringify(content_block.input),
+                    },
+                    ...(content_block.extra_content ? { extra_content: content_block.extra_content } : {}),
+                });
+                
+                content.splice(i, 1);
+            }
+        }
+
+        // Right now this does NOT support parallel tool calls! 
+        // We only allow sequential toolcalling right now so this shouldn't be an issue right now
+        // but this probably needs to be changed in the future to split "one completions message"
+        // into multiple responses inputs.
+        if (is_tool_call) {
+            msg.call_id = msg.tool_calls[0].id;
+            msg.id = msg.tool_calls[0].canonical_id;
+            msg.name = msg.tool_calls[0].function.name;
+            msg.arguments = msg.tool_calls[0].function.arguments;
+            msg.type = "function_call";
+
+            delete msg.role;
+            delete msg.content;
+            delete msg.tool_calls;
+        }
+
+        // coerce tool results
+        for (let i = content.length - 1; i >= 0; i--) {
+            const content_block = content[i];
+            if (content_block.type !== 'tool_result') continue;
+            msg.type = 'function_call_output';
+            msg.call_id = content_block.tool_use_id;
+            msg.output = content_block.content;
+
+            delete msg.role;
+            delete msg.content;
+        }
+    }
+    console.log("coreced ", messages)
+
+    return messages;
+};
+
 export const create_usage_calculator = ({ model_details }) => {
    return ({ usage }) => {
        const tokens = [];
@@ -173,6 +250,63 @@ export const create_chat_stream_handler = ({
    chatStream.end(usage);
 };

+export const create_chat_stream_handler_responses_api = ({
+    deviations,
+    completion,
+    usage_calculator,
+}) => async ({ chatStream }) => {
+    deviations = Object.assign({
+        // affected by: Groq
+        index_usage_from_stream_chunk: chunk => chunk.usage,
+        // affected by: Mistral
+        chunk_but_like_actually: chunk => chunk,
+        index_tool_calls_from_stream_choice: choice => choice.delta.tool_calls,
+    }, deviations);
+
+    const message = chatStream.message();
+    let textblock = message.contentBlock({ type: 'text' });
+    let toolblock = null;
+    let mode = 'text';
+    const tool_call_blocks = [];
+
+    let last_usage = null;
+    for await (let chunk of completion) {
+        console.log("Chunk from API: ", chunk)
+    
+        if (chunk.type === "response.output_text.delta") {
+            textblock.addText(chunk.delta);
+            continue;
+        }
+
+        
+        if (chunk.type === "response.completed") {
+            last_usage = chunk.response.usage;
+        }
+        
+        if (chunk.type === "response.output_item.done" && chunk.item?.type === "function_call") {
+            const tool_call = chunk.item;
+            toolblock = message.contentBlock({
+                type: 'tool_use',
+                canonical_id: tool_call.id,
+                id: tool_call.call_id,
+                name: tool_call.name,
+                ...(tool_call.extra_content ? { extra_content: tool_call.extra_content } : {}),
+            });
+            toolblock.addPartialJSON(tool_call.arguments);
+            toolblock.end();
+        }
+    }
+
+    // TODO DS: this is a bit too abstracted... this is basically just doing the metering now
+    const usage = usage_calculator({ usage: last_usage });
+
+    if (mode === 'text') textblock.end();
+    if (mode === 'tool') toolblock.end();
+
+    message.end();
+    chatStream.end(usage);
+};
+
 /**
 *
 * @param {object} params
@@ -236,3 +370,85 @@ export const handle_completion_output = async ({
    };
    return ret;
 };
+
+/**
+ *
+ * @param {object} params
+ * @param {(args: {usage: import("openai/resources/completions.mjs").CompletionUsage})=> unknown } params.usage_calculator
+ * @returns
+ */
+export const handle_completion_output_responses_api = async ({
+    deviations,
+    stream,
+    completion,
+    moderate,
+    usage_calculator,
+    finally_fn,
+}) => {
+    deviations = Object.assign({
+        // affected by: Mistral
+        coerce_completion_usage: completion => completion.usage,
+    }, deviations);
+
+    if (stream) {
+        const init_chat_stream =
+            create_chat_stream_handler_responses_api({
+                deviations,
+                completion,
+                usage_calculator,
+            });
+
+        return {
+            stream: true,
+            init_chat_stream,
+            finally_fn,
+        };
+    }
+
+    if (finally_fn) await finally_fn();
+
+    const is_empty = completion.output_text.trim() === '';
+    if (is_empty && !completion.choices?.[0]?.message?.tool_calls) {
+        // GPT refuses to generate an empty response if you ask it to,
+        // so this will probably only happen on an error condition.
+        throw new Error('an empty response was generated');
+    }
+
+    // We need to moderate the completion too
+    const mod_text = completion.output_text;
+    if (moderate && mod_text !== null) {
+        const moderation_result = await moderate(mod_text);
+        if (moderation_result.flagged) {
+            throw new Error('message is not allowed');
+        }
+    }
+
+
+    console.log("Completion: ", completion);
+    console.log("output: ", completion.output[0]);
+
+    const ret = {
+        finish_reason: "stop",
+        index: 0,
+        message: {
+            content: completion.output_text,
+            reasoning: null, // Fix later to add proper reasoning
+            refusal: null,
+            role: "assistant"
+        }
+    }
+    ret.role = completion.output[0].role;
+
+    delete ret.type;
+
+    
+    ret.usage = usage_calculator ? usage_calculator({
+        ...completion,
+        usage: completion.usage,
+    }) : {
+        input_tokens: completion.usage.input_tokens,
+        output_tokens: completion.usage.output_tokens,
+    };
+    return ret;
+
+};