Add support for GPT-5.1 model and reasoning controls (#1967)
Docker Image CI / build-and-push-image (push) Has been cancelled
Maintain Release Merge PR / update-release-pr (push) Has been cancelled
release-please / release-please (push) Has been cancelled
test / test (20.x) (push) Has been cancelled
test / test (22.x) (push) Has been cancelled
test / backend (node env, api-test) (22.x) (push) Has been cancelled
test / puterjs (browser env, playwright) (22.x) (push) Has been cancelled
test / puterjs (node env, vitest) (22.x) (push) Has been cancelled

closes #1966
This commit is contained in:
Nariman Jelveh
2025-11-13 19:23:04 -08:00
committed by GitHub
parent 849beecd4f
commit 545b5a071e
7 changed files with 57 additions and 8 deletions
@@ -109,7 +109,7 @@ export class OpenAICompletionService {
return this.#defaultModel;
}
async complete({ messages, stream, model, tools, max_tokens, temperature }) {
async complete({ messages, stream, model, tools, max_tokens, temperature, reasoning, text, reasoning_effort, verbosity }) {
return await this.#complete(messages, {
model: model,
tools,
@@ -117,7 +117,10 @@ export class OpenAICompletionService {
stream,
max_tokens,
temperature,
reasoning,
text,
reasoning_effort,
verbosity,
});
}
@@ -166,6 +169,7 @@ export class OpenAICompletionService {
async #complete(messages, {
stream, moderation, model, tools,
temperature, max_tokens,
reasoning, text, reasoning_effort, verbosity,
}) {
// Validate messages
if ( ! Array.isArray(messages) ) {
@@ -252,7 +256,11 @@ export class OpenAICompletionService {
// that's missing. We normalise it here so the token count code works.
messages = await OpenAIUtil.process_input_messages(messages);
const completion = await this.#openAi.chat.completions.create({
const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort;
const requestedVerbosity = verbosity ?? text?.verbosity;
const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5');
const completionParams = {
user: user_private_uid,
messages: messages,
model: model,
@@ -263,7 +271,18 @@ export class OpenAICompletionService {
...(stream ? {
stream_options: { include_usage: true },
} : {}),
});
};
if ( supportsReasoningControls ) {
if ( requestedReasoningEffort ) {
completionParams.reasoning_effort = requestedReasoningEffort;
}
if ( requestedVerbosity ) {
completionParams.verbosity = requestedVerbosity;
}
}
const completion = await this.#openAi.chat.completions.create(completionParams);
// TODO DS: simplify this logic for all the ai services, each service should handle its cost calculation in the service
// for now I'm overloading this usage calculator to handle the future promise resolution...
return OpenAIUtil.handle_completion_output({
@@ -1,6 +1,16 @@
// TODO DS: centralize somewhere
export const OPEN_AI_MODELS = [
{
id: 'gpt-5.1',
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 125,
output: 1000,
},
max_tokens: 128000,
},
{
id: 'gpt-5-2025-08-07',
aliases: ['gpt-5'],
@@ -163,4 +173,4 @@ export const OPEN_AI_MODELS = [
output: 15000,
},
},
];
];
@@ -20,6 +20,9 @@
export const OPENAI_COST_MAP = {
// GPT-5 models
'openai:gpt-5.1:prompt_tokens': 125,
'openai:gpt-5.1:cached_tokens': 13,
'openai:gpt-5.1:completion_tokens': 1000,
'openai:gpt-5-2025-08-07:prompt_tokens': 125,
'openai:gpt-5-2025-08-07:cached_tokens': 13,
'openai:gpt-5-2025-08-07:completion_tokens': 1000,
@@ -130,6 +130,10 @@ export const OPENROUTER_COST_MAP = {
'openrouter:openai/gpt-5-chat:prompt': 125,
'openrouter:openai/gpt-5-chat:completion': 1000,
'openrouter:openai/gpt-5-chat:input_cache_read': 12,
'openrouter:openai/gpt-5.1:prompt': 125,
'openrouter:openai/gpt-5.1:completion': 1000,
'openrouter:openai/gpt-5.1:web_search': 1000000,
'openrouter:openai/gpt-5.1:input_cache_read': 12,
'openrouter:openai/gpt-5:prompt': 125,
'openrouter:openai/gpt-5:completion': 1000,
'openrouter:openai/gpt-5:web_search': 1000000,
+10
View File
@@ -78,6 +78,16 @@ interface ChatOptions {
stream?: boolean;
max_tokens?: number;
temperature?: number;
reasoning?: {
effort?: 'none' | 'low' | 'medium' | 'high' | 'minimal';
[key: string]: unknown;
};
reasoning_effort?: 'none' | 'low' | 'medium' | 'high' | 'minimal';
text?: {
verbosity?: 'low' | 'medium' | 'high';
[key: string]: unknown;
};
verbosity?: 'low' | 'medium' | 'high';
tools?: ToolDefinition[];
}
+1 -1
View File
@@ -809,7 +809,7 @@ class AI{
}
// Additional parameters to pass from userParams to requestParams
const PARAMS_TO_PASS = ['tools', 'response'];
const PARAMS_TO_PASS = ['tools', 'response', 'reasoning', 'reasoning_effort', 'text', 'verbosity'];
for ( const name of PARAMS_TO_PASS ) {
if ( userParams[name] ) {
requestParams[name] = userParams[name];
+5 -2
View File
@@ -7,6 +7,7 @@ const TEST_MODELS = [
"openrouter:anthropic/claude-sonnet-4",
"google/gemini-2.5-pro",
"deepseek-chat",
"gpt-5.1",
"gpt-5-nano",
"openai/gpt-5-nano",
"claude-sonnet-4-latest",
@@ -48,7 +49,9 @@ const testChatWithParametersCore = async function(model) {
const result = await puter.ai.chat("What is 2+2?", {
model: model,
temperature: 0.7,
max_tokens: 50
max_tokens: 50,
reasoning: { effort: 'low' },
text: { verbosity: 'low' },
});
// Check basic result structure
@@ -212,4 +215,4 @@ const generateAllTests = function() {
};
// Export the generated tests
window.aiTests = generateAllTests();
window.aiTests = generateAllTests();