mirror of
https://github.com/HeyPuter/puter.git
synced 2026-05-06 01:20:41 +00:00
Responses API support for OpenAI (#2226)
Docker Image CI / build-and-push-image (push) Has been cancelled
Maintain Release Merge PR / update-release-pr (push) Has been cancelled
release-please / release-please (push) Has been cancelled
test / test-backend (24.x) (push) Has been cancelled
test / API tests (node env, api-test) (24.x) (push) Has been cancelled
test / puterjs (node env, vitest) (24.x) (push) Has been cancelled
Docker Image CI / build-and-push-image (push) Has been cancelled
Maintain Release Merge PR / update-release-pr (push) Has been cancelled
release-please / release-please (push) Has been cancelled
test / test-backend (24.x) (push) Has been cancelled
test / API tests (node env, api-test) (24.x) (push) Has been cancelled
test / puterjs (node env, vitest) (24.x) (push) Has been cancelled
* Responses API support for OpenAI * Fix toolcalling in Responses API
This commit is contained in:
@@ -39,7 +39,8 @@ import { FakeChatProvider } from './providers/FakeChatProvider.js';
|
||||
import { GeminiChatProvider } from './providers/GeminiProvider/GeminiChatProvider.js';
|
||||
import { GroqAIProvider } from './providers/GroqAiProvider/GroqAIProvider.js';
|
||||
import { MistralAIProvider } from './providers/MistralAiProvider/MistralAiProvider.js';
|
||||
import { OpenAiChatProvider } from './providers/OpenAiProvider/OpenAiChatProvider.js';
|
||||
import { OpenAiChatProvider } from './providers/OpenAiProvider/OpenAiChatCompletionsProvider.js';
|
||||
import { OpenAiResponsesChatProvider } from './providers/OpenAiProvider/OpenAiChatResponsesProvider.js';
|
||||
import { IChatModel, IChatProvider, ICompleteArguments } from './providers/types.js';
|
||||
import { UsageLimitedChatProvider } from './providers/UsageLimitedChatProvider.js';
|
||||
import { OllamaChatProvider } from './providers/OllamaProvider.js';
|
||||
@@ -129,6 +130,7 @@ export class AIChatService extends BaseService {
|
||||
const openAiConfig = this.config.providers?.['openai-completion'] || this.global_config?.services?.['openai-completion'] || this.global_config?.openai;
|
||||
if ( openAiConfig && (openAiConfig.apiKey || openAiConfig.secret_key) ) {
|
||||
this.#providers['openai-completion'] = new OpenAiChatProvider(this.meteringService, openAiConfig);
|
||||
this.#providers['openai-responses'] = new OpenAiResponsesChatProvider(this.meteringService, openAiConfig);
|
||||
}
|
||||
const geminiConfig = this.config.providers?.['gemini'] || this.global_config?.services?.['gemini'];
|
||||
if ( geminiConfig && geminiConfig.apiKey ) {
|
||||
|
||||
+1
-2
@@ -81,7 +81,7 @@ export class OpenAiChatProvider implements IChatProvider {
|
||||
* Each model object includes an ID and cost details (currency, tokens, input/output rates).
|
||||
*/
|
||||
models () {
|
||||
return OPEN_AI_MODELS;
|
||||
return OPEN_AI_MODELS.filter(e=>!e.responses_api_only);
|
||||
}
|
||||
|
||||
list () {
|
||||
@@ -102,7 +102,6 @@ export class OpenAiChatProvider implements IChatProvider {
|
||||
|
||||
async complete ({ messages, model, max_tokens, moderation, tools, verbosity, stream, reasoning, reasoning_effort, temperature, text }: ICompleteArguments): ReturnType<IChatProvider['complete']>
|
||||
{
|
||||
|
||||
// Validate messages
|
||||
if ( ! Array.isArray(messages) ) {
|
||||
throw new Error('`messages` must be an array');
|
||||
+271
@@ -0,0 +1,271 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present Puter Technologies Inc.
|
||||
*
|
||||
* This file is part of Puter.
|
||||
*
|
||||
* Puter is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published
|
||||
* by the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import mime from 'mime-types';
|
||||
import { OpenAI } from 'openai';
|
||||
import { ChatCompletionCreateParams } from 'openai/resources/index.js';
|
||||
import { FSNodeParam } from '../../../../../api/filesystem/FSNodeParam.js';
|
||||
import { LLRead } from '../../../../../filesystem/ll_operations/ll_read.js';
|
||||
import { Context } from '../../../../../util/context.js';
|
||||
import { stream_to_buffer } from '../../../../../util/streamutil.js';
|
||||
import { MeteringService } from '../../../../MeteringService/MeteringService.js';
|
||||
import * as OpenAiUtil from '../../../utils/OpenAIUtil.js';
|
||||
import { IChatProvider, ICompleteArguments } from '../types.js';
|
||||
import { OPEN_AI_MODELS } from './models.js';
|
||||
import { ResponseCreateParamsNonStreaming } from 'openai/resources/responses/responses.js';
|
||||
import { ResponseCreateParams } from 'openai/resources/responses/responses.mjs';
|
||||
|
||||
;
|
||||
|
||||
|
||||
// We're capping at 5MB, which sucks, but Chat Completions doesn't suuport
|
||||
// file inputs.
|
||||
const MAX_FILE_SIZE = 5 * 1_000_000;
|
||||
|
||||
/**
|
||||
* OpenAICompletionService class provides an interface to OpenAI's chat completion API.
|
||||
* Extends BaseService to handle chat completions, message moderation, token counting,
|
||||
* and streaming responses. Implements the puter-chat-completion interface and manages
|
||||
* OpenAI API interactions with support for multiple models including GPT-4 variants.
|
||||
* Handles usage tracking, spending records, and content moderation.
|
||||
*/
|
||||
export class OpenAiResponsesChatProvider implements IChatProvider {
|
||||
/**
|
||||
* @type {import('openai').OpenAI}
|
||||
*/
|
||||
#openAi: OpenAI;
|
||||
|
||||
#defaultModel = 'gpt-5-nano';
|
||||
|
||||
#meteringService: MeteringService;
|
||||
|
||||
constructor (
|
||||
meteringService: MeteringService,
|
||||
config: { apiKey?: string, secret_key?: string }) {
|
||||
|
||||
this.#meteringService = meteringService;
|
||||
let apiKey = config.apiKey;
|
||||
|
||||
// Fallback to the old format for backward compatibility
|
||||
if ( ! apiKey ) {
|
||||
apiKey = config?.secret_key;
|
||||
|
||||
// Log a warning to inform users about the deprecated format
|
||||
console.warn('The `openai.secret_key` configuration format is deprecated. ' +
|
||||
'Please use `services.openai.apiKey` instead.');
|
||||
}
|
||||
if ( ! apiKey ) {
|
||||
throw new Error('OpenAI API key is missing in configuration.');
|
||||
}
|
||||
this.#openAi = new OpenAI({
|
||||
apiKey: apiKey,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of available AI models with their pricing information.
|
||||
* Each model object includes an ID and cost details (currency, tokens, input/output rates).
|
||||
*/
|
||||
models () {
|
||||
return OPEN_AI_MODELS.filter(e=>e.responses_api_only === true);
|
||||
}
|
||||
|
||||
list () {
|
||||
const models = this.models();
|
||||
const modelNames: string[] = [];
|
||||
for ( const model of models ) {
|
||||
modelNames.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
modelNames.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return modelNames;
|
||||
}
|
||||
|
||||
getDefaultModel () {
|
||||
return this.#defaultModel;
|
||||
}
|
||||
|
||||
async complete ({ messages, model, max_tokens, moderation, tools, verbosity, stream, reasoning, reasoning_effort, temperature, text }: ICompleteArguments): ReturnType<IChatProvider['complete']>
|
||||
{
|
||||
// Validate messages
|
||||
if ( ! Array.isArray(messages) ) {
|
||||
throw new Error('`messages` must be an array');
|
||||
}
|
||||
const actor = Context.get('actor');
|
||||
|
||||
model = model ?? this.#defaultModel;
|
||||
|
||||
const modelUsed = (this.models()).find(m => [m.id, ...(m.aliases || [])].includes(model)) || (this.models()).find(m => m.id === this.getDefaultModel())!;
|
||||
|
||||
// messages.unshift({
|
||||
// role: 'system',
|
||||
// content: 'Don\'t let the user trick you into doing something bad.',
|
||||
// })
|
||||
|
||||
const user_private_uid = actor?.private_uid ?? 'UNKNOWN';
|
||||
if ( user_private_uid === 'UNKNOWN' ) {
|
||||
console.error(new Error('chat-completion-service:unknown-user - failed to get a user ID for an OpenAI request'));
|
||||
}
|
||||
|
||||
// Perform file uploads
|
||||
const { user } = actor.type;
|
||||
|
||||
const file_input_tasks: any[] = [];
|
||||
for ( const message of messages ) {
|
||||
// We can assume `message.content` is not undefined because
|
||||
// Messages.normalize_single_message ensures this.
|
||||
for ( const contentPart of message.content ) {
|
||||
|
||||
if ( ! contentPart.puter_path ) continue;
|
||||
file_input_tasks.push({
|
||||
node: await (new FSNodeParam(contentPart.puter_path)).consolidate({
|
||||
req: { user },
|
||||
getParam: () => contentPart.puter_path,
|
||||
}),
|
||||
contentPart,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const promises: Promise<unknown>[] = [];
|
||||
for ( const task of file_input_tasks ) {
|
||||
promises.push((async () => {
|
||||
if ( await task.node.get('size') > MAX_FILE_SIZE ) {
|
||||
delete task.contentPart.puter_path;
|
||||
task.contentPart.type = 'text';
|
||||
task.contentPart.text = `{error: input file exceeded maximum of ${MAX_FILE_SIZE} bytes; ` +
|
||||
'the user did not write this message}'; // "poor man's system prompt"
|
||||
return; // "continue"
|
||||
}
|
||||
|
||||
const ll_read = new LLRead();
|
||||
const stream = await ll_read.run({
|
||||
actor: Context.get('actor'),
|
||||
fsNode: task.node,
|
||||
});
|
||||
const mimeType = mime.contentType(await task.node.get('name'));
|
||||
|
||||
const buffer = await stream_to_buffer(stream);
|
||||
const base64 = buffer.toString('base64');
|
||||
|
||||
delete task.contentPart.puter_path;
|
||||
if ( mimeType && mimeType.startsWith('image/') ) {
|
||||
task.contentPart.type = 'image_url',
|
||||
task.contentPart.image_url = {
|
||||
url: `data:${mimeType};base64,${base64}`,
|
||||
};
|
||||
} else if ( mimeType && mimeType.startsWith('audio/') ) {
|
||||
task.contentPart.type = 'input_audio',
|
||||
task.contentPart.input_audio = {
|
||||
data: `data:${mimeType};base64,${base64}`,
|
||||
format: mimeType.split('/')[1],
|
||||
};
|
||||
} else {
|
||||
task.contentPart.type = 'text';
|
||||
task.contentPart.text = '{error: input file has unsupported MIME type; ' +
|
||||
'the user did not write this message}'; // "poor man's system prompt"
|
||||
}
|
||||
})());
|
||||
}
|
||||
await Promise.all(promises);
|
||||
|
||||
if (tools) {
|
||||
// Unravel tools to OpenAI Responses API format
|
||||
tools = (tools as any).map((e)=> {
|
||||
const tool = e.function;
|
||||
tool.type = "function";
|
||||
return tool;
|
||||
});
|
||||
}
|
||||
|
||||
// Here's something fun; the documentation shows `type: 'image_url'` in
|
||||
// objects that contain an image url, but everything still works if
|
||||
// that's missing. We normalise it here so the token count code works.
|
||||
messages = await OpenAiUtil.process_input_messages_responses_api(messages);
|
||||
|
||||
const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort;
|
||||
const requestedVerbosity = verbosity ?? text?.verbosity;
|
||||
const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5');
|
||||
|
||||
const completionParams: ResponseCreateParams = {
|
||||
user: user_private_uid,
|
||||
input: messages,
|
||||
model: modelUsed.id,
|
||||
...(tools ? { tools } : {}),
|
||||
...(max_tokens ? { max_output_tokens: max_tokens } : {}),
|
||||
...(temperature ? { temperature } : {}),
|
||||
stream: !!stream,
|
||||
...(supportsReasoningControls ? {} :
|
||||
{
|
||||
...(requestedReasoningEffort ? { reasoning_effort: requestedReasoningEffort } : {}),
|
||||
...(requestedVerbosity ? { verbosity: requestedVerbosity } : {}),
|
||||
}
|
||||
),
|
||||
} as ResponseCreateParams;
|
||||
|
||||
// console.log("completion params: ", completionParams)
|
||||
const completion = await this.#openAi.responses.create(completionParams);
|
||||
// console.log("Completion: ", completion)
|
||||
return OpenAiUtil.handle_completion_output_responses_api({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = {
|
||||
prompt_tokens: ((usage as any).input_tokens ?? 0) - ((usage as any).input_tokens_details?.cached_tokens ?? 0),
|
||||
completion_tokens: (usage as any).output_tokens ?? 0,
|
||||
cached_tokens: (usage as any).input_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
|
||||
const costsOverrideFromModel = Object.fromEntries(Object.entries(trackedUsage).map(([k, v]) => {
|
||||
return [k, v * (modelUsed.costs[k] || 0)];
|
||||
}));
|
||||
|
||||
this.#meteringService.utilRecordUsageObject(trackedUsage, actor, `openai:${modelUsed?.id}`, costsOverrideFromModel);
|
||||
return trackedUsage;
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
moderate: moderation ? this.checkModeration.bind(this) : undefined,
|
||||
});
|
||||
}
|
||||
|
||||
async checkModeration (text: string) {
|
||||
// create moderation
|
||||
const results = await this.#openAi.moderations.create({
|
||||
model: 'omni-moderation-latest',
|
||||
input: text,
|
||||
});
|
||||
|
||||
let flagged = false;
|
||||
|
||||
for ( const result of results?.results ?? [] ) {
|
||||
|
||||
// OpenAI does a crazy amount of false positives. We filter by their 80% interval
|
||||
const veryFlaggedEntries = Object.entries(result.category_scores).filter(e => e[1] > 0.8);
|
||||
if ( veryFlaggedEntries.length > 0 ) {
|
||||
flagged = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
flagged,
|
||||
results,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -29,6 +29,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [
|
||||
completion_tokens: 16800,
|
||||
},
|
||||
max_tokens: 16384,
|
||||
responses_api_only: true
|
||||
},
|
||||
{
|
||||
id: 'gpt-5.2-2025-12-11',
|
||||
@@ -69,6 +70,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [
|
||||
completion_tokens: 1000,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
responses_api_only: true
|
||||
},
|
||||
{
|
||||
id: 'gpt-5.1-codex-mini',
|
||||
@@ -82,6 +84,7 @@ export const OPEN_AI_MODELS: IChatModel[] = [
|
||||
completion_tokens: 200,
|
||||
},
|
||||
max_tokens: 128000,
|
||||
responses_api_only: true
|
||||
},
|
||||
{
|
||||
id: 'gpt-5.1-chat-latest',
|
||||
@@ -227,6 +230,20 @@ export const OPEN_AI_MODELS: IChatModel[] = [
|
||||
},
|
||||
max_tokens: 100000,
|
||||
},
|
||||
{
|
||||
id: 'o3-pro',
|
||||
costs_currency: 'usd-cents',
|
||||
input_cost_key: 'prompt_tokens',
|
||||
output_cost_key: 'completion_tokens',
|
||||
costs: {
|
||||
tokens: 1_000_000,
|
||||
prompt_tokens: 2000,
|
||||
cached_tokens: 50,
|
||||
completion_tokens: 8000,
|
||||
},
|
||||
max_tokens: 100000,
|
||||
responses_api_only: true
|
||||
},
|
||||
{
|
||||
id: 'o3-mini',
|
||||
costs_currency: 'usd-cents',
|
||||
|
||||
@@ -83,6 +83,7 @@ export interface CompletionDeviations<TCompletion = ChatCompletion> {
|
||||
}
|
||||
|
||||
export function process_input_messages<TMessage extends NormalizedMessage> (messages: TMessage[]): Promise<TMessage[]>;
|
||||
export function process_input_messages_responses_api<TMessage extends NormalizedMessage> (messages: TMessage[]): Promise<TMessage[]>;
|
||||
|
||||
export function create_usage_calculator (params: { model_details: IChatModel }): UsageCalculator;
|
||||
|
||||
@@ -114,3 +115,15 @@ export function handle_completion_output<TCompletion = ChatCompletion> (params:
|
||||
usage_calculator?: UsageCalculator;
|
||||
finally_fn?: () => Promise<void>;
|
||||
}): ReturnType<IChatProvider['complete']>;
|
||||
|
||||
export function handle_completion_output_responses_api<TCompletion = ChatCompletion> (params: {
|
||||
deviations?: CompletionDeviations<TCompletion>;
|
||||
stream?: boolean;
|
||||
completion: AsyncIterable<CompletionChunk> | TCompletion;
|
||||
moderate?: (text: string) => Promise<{ flagged: boolean }>;
|
||||
usage_calculator?: UsageCalculator;
|
||||
finally_fn?: () => Promise<void>;
|
||||
}): ReturnType<IChatProvider['complete']>;
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -57,6 +57,83 @@ export const process_input_messages = async (messages) => {
|
||||
return messages;
|
||||
};
|
||||
|
||||
export const process_input_messages_responses_api = async (messages) => {
|
||||
for (const msg of messages) {
|
||||
if (!msg.content) continue;
|
||||
if (typeof msg.content !== 'object') continue;
|
||||
|
||||
const content = msg.content;
|
||||
|
||||
for (const o of content) {
|
||||
if (!o['image_url']) continue;
|
||||
if (o.type) continue;
|
||||
o.type = 'image_url';
|
||||
}
|
||||
|
||||
// coerce tool calls
|
||||
let is_tool_call = false;
|
||||
for (let i = content.length - 1; i >= 0; i--) {
|
||||
const content_block = content[i];
|
||||
if (content_block.type === "text" && (msg.role === "user" || msg.role === "system")) {
|
||||
content_block.type = "input_text"
|
||||
}
|
||||
if (content_block.type === "text" && (msg.role === "assistant")) {
|
||||
content_block.type = "output_text"
|
||||
}
|
||||
|
||||
if (content_block.type === 'tool_use') {
|
||||
if (!msg.tool_calls) {
|
||||
msg.tool_calls = [];
|
||||
is_tool_call = true;
|
||||
}
|
||||
msg.tool_calls.push({
|
||||
id: content_block.id,
|
||||
canonical_id: content_block.canonical_id,
|
||||
type: 'function',
|
||||
function: {
|
||||
name: content_block.name,
|
||||
arguments: JSON.stringify(content_block.input),
|
||||
},
|
||||
...(content_block.extra_content ? { extra_content: content_block.extra_content } : {}),
|
||||
});
|
||||
|
||||
content.splice(i, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Right now this does NOT support parallel tool calls!
|
||||
// We only allow sequential toolcalling right now so this shouldn't be an issue right now
|
||||
// but this probably needs to be changed in the future to split "one completions message"
|
||||
// into multiple responses inputs.
|
||||
if (is_tool_call) {
|
||||
msg.call_id = msg.tool_calls[0].id;
|
||||
msg.id = msg.tool_calls[0].canonical_id;
|
||||
msg.name = msg.tool_calls[0].function.name;
|
||||
msg.arguments = msg.tool_calls[0].function.arguments;
|
||||
msg.type = "function_call";
|
||||
|
||||
delete msg.role;
|
||||
delete msg.content;
|
||||
delete msg.tool_calls;
|
||||
}
|
||||
|
||||
// coerce tool results
|
||||
for (let i = content.length - 1; i >= 0; i--) {
|
||||
const content_block = content[i];
|
||||
if (content_block.type !== 'tool_result') continue;
|
||||
msg.type = 'function_call_output';
|
||||
msg.call_id = content_block.tool_use_id;
|
||||
msg.output = content_block.content;
|
||||
|
||||
delete msg.role;
|
||||
delete msg.content;
|
||||
}
|
||||
}
|
||||
console.log("coreced ", messages)
|
||||
|
||||
return messages;
|
||||
};
|
||||
|
||||
export const create_usage_calculator = ({ model_details }) => {
|
||||
return ({ usage }) => {
|
||||
const tokens = [];
|
||||
@@ -173,6 +250,63 @@ export const create_chat_stream_handler = ({
|
||||
chatStream.end(usage);
|
||||
};
|
||||
|
||||
export const create_chat_stream_handler_responses_api = ({
|
||||
deviations,
|
||||
completion,
|
||||
usage_calculator,
|
||||
}) => async ({ chatStream }) => {
|
||||
deviations = Object.assign({
|
||||
// affected by: Groq
|
||||
index_usage_from_stream_chunk: chunk => chunk.usage,
|
||||
// affected by: Mistral
|
||||
chunk_but_like_actually: chunk => chunk,
|
||||
index_tool_calls_from_stream_choice: choice => choice.delta.tool_calls,
|
||||
}, deviations);
|
||||
|
||||
const message = chatStream.message();
|
||||
let textblock = message.contentBlock({ type: 'text' });
|
||||
let toolblock = null;
|
||||
let mode = 'text';
|
||||
const tool_call_blocks = [];
|
||||
|
||||
let last_usage = null;
|
||||
for await (let chunk of completion) {
|
||||
console.log("Chunk from API: ", chunk)
|
||||
|
||||
if (chunk.type === "response.output_text.delta") {
|
||||
textblock.addText(chunk.delta);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (chunk.type === "response.completed") {
|
||||
last_usage = chunk.response.usage;
|
||||
}
|
||||
|
||||
if (chunk.type === "response.output_item.done" && chunk.item?.type === "function_call") {
|
||||
const tool_call = chunk.item;
|
||||
toolblock = message.contentBlock({
|
||||
type: 'tool_use',
|
||||
canonical_id: tool_call.id,
|
||||
id: tool_call.call_id,
|
||||
name: tool_call.name,
|
||||
...(tool_call.extra_content ? { extra_content: tool_call.extra_content } : {}),
|
||||
});
|
||||
toolblock.addPartialJSON(tool_call.arguments);
|
||||
toolblock.end();
|
||||
}
|
||||
}
|
||||
|
||||
// TODO DS: this is a bit too abstracted... this is basically just doing the metering now
|
||||
const usage = usage_calculator({ usage: last_usage });
|
||||
|
||||
if (mode === 'text') textblock.end();
|
||||
if (mode === 'tool') toolblock.end();
|
||||
|
||||
message.end();
|
||||
chatStream.end(usage);
|
||||
};
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {object} params
|
||||
@@ -236,3 +370,85 @@ export const handle_completion_output = async ({
|
||||
};
|
||||
return ret;
|
||||
};
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {object} params
|
||||
* @param {(args: {usage: import("openai/resources/completions.mjs").CompletionUsage})=> unknown } params.usage_calculator
|
||||
* @returns
|
||||
*/
|
||||
export const handle_completion_output_responses_api = async ({
|
||||
deviations,
|
||||
stream,
|
||||
completion,
|
||||
moderate,
|
||||
usage_calculator,
|
||||
finally_fn,
|
||||
}) => {
|
||||
deviations = Object.assign({
|
||||
// affected by: Mistral
|
||||
coerce_completion_usage: completion => completion.usage,
|
||||
}, deviations);
|
||||
|
||||
if (stream) {
|
||||
const init_chat_stream =
|
||||
create_chat_stream_handler_responses_api({
|
||||
deviations,
|
||||
completion,
|
||||
usage_calculator,
|
||||
});
|
||||
|
||||
return {
|
||||
stream: true,
|
||||
init_chat_stream,
|
||||
finally_fn,
|
||||
};
|
||||
}
|
||||
|
||||
if (finally_fn) await finally_fn();
|
||||
|
||||
const is_empty = completion.output_text.trim() === '';
|
||||
if (is_empty && !completion.choices?.[0]?.message?.tool_calls) {
|
||||
// GPT refuses to generate an empty response if you ask it to,
|
||||
// so this will probably only happen on an error condition.
|
||||
throw new Error('an empty response was generated');
|
||||
}
|
||||
|
||||
// We need to moderate the completion too
|
||||
const mod_text = completion.output_text;
|
||||
if (moderate && mod_text !== null) {
|
||||
const moderation_result = await moderate(mod_text);
|
||||
if (moderation_result.flagged) {
|
||||
throw new Error('message is not allowed');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
console.log("Completion: ", completion);
|
||||
console.log("output: ", completion.output[0]);
|
||||
|
||||
const ret = {
|
||||
finish_reason: "stop",
|
||||
index: 0,
|
||||
message: {
|
||||
content: completion.output_text,
|
||||
reasoning: null, // Fix later to add proper reasoning
|
||||
refusal: null,
|
||||
role: "assistant"
|
||||
}
|
||||
}
|
||||
ret.role = completion.output[0].role;
|
||||
|
||||
delete ret.type;
|
||||
|
||||
|
||||
ret.usage = usage_calculator ? usage_calculator({
|
||||
...completion,
|
||||
usage: completion.usage,
|
||||
}) : {
|
||||
input_tokens: completion.usage.input_tokens,
|
||||
output_tokens: completion.usage.output_tokens,
|
||||
};
|
||||
return ret;
|
||||
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user