Add support for GPT-5.1 model and reasoning controls (#1967)

closes #1966
2026-05-05 17:10:45 +00:00 · 2025-11-13 19:23:04 -08:00
parent 849beecd4f
commit 545b5a071e
7 changed files with 57 additions and 8 deletions
@@ -109,7 +109,7 @@ export class OpenAICompletionService {
        return this.#defaultModel;
    }

-    async complete({ messages, stream, model, tools, max_tokens, temperature }) {
+    async complete({ messages, stream, model, tools, max_tokens, temperature, reasoning, text, reasoning_effort, verbosity }) {
        return await this.#complete(messages, {
            model: model,
            tools,
@@ -117,7 +117,10 @@ export class OpenAICompletionService {
            stream,
            max_tokens,
            temperature,
-
+            reasoning,
+            text,
+            reasoning_effort,
+            verbosity,
        });
    }

@@ -166,6 +169,7 @@ export class OpenAICompletionService {
    async #complete(messages, {
        stream, moderation, model, tools,
        temperature, max_tokens,
+        reasoning, text, reasoning_effort, verbosity,
    }) {
        // Validate messages
        if ( ! Array.isArray(messages) ) {
@@ -252,7 +256,11 @@ export class OpenAICompletionService {
        // that's missing. We normalise it here so the token count code works.
        messages = await OpenAIUtil.process_input_messages(messages);

-        const completion = await this.#openAi.chat.completions.create({
+        const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort;
+        const requestedVerbosity = verbosity ?? text?.verbosity;
+        const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5');
+
+        const completionParams = {
            user: user_private_uid,
            messages: messages,
            model: model,
@@ -263,7 +271,18 @@ export class OpenAICompletionService {
            ...(stream ? {
                stream_options: { include_usage: true },
            } : {}),
-        });
+        };
+
+        if ( supportsReasoningControls ) {
+            if ( requestedReasoningEffort ) {
+                completionParams.reasoning_effort = requestedReasoningEffort;
+            }
+            if ( requestedVerbosity ) {
+                completionParams.verbosity = requestedVerbosity;
+            }
+        }
+
+        const completion = await this.#openAi.chat.completions.create(completionParams);
        // TODO DS: simplify this logic for all the ai services, each service should handle its cost calculation in the service
        // for now I'm overloading this usage calculator to handle the future promise resolution...
        return OpenAIUtil.handle_completion_output({
@@ -1,6 +1,16 @@
 // TODO DS: centralize somewhere

 export const OPEN_AI_MODELS = [
+    {
+        id: 'gpt-5.1',
+        cost: {
+            currency: 'usd-cents',
+            tokens: 1_000_000,
+            input: 125,
+            output: 1000,
+        },
+        max_tokens: 128000,
+    },
    {
        id: 'gpt-5-2025-08-07',
        aliases: ['gpt-5'],
@@ -163,4 +173,4 @@ export const OPEN_AI_MODELS = [
            output: 15000,
        },
    },
-];
+];
@@ -20,6 +20,9 @@

 export const OPENAI_COST_MAP = {
    // GPT-5 models
+    'openai:gpt-5.1:prompt_tokens': 125,
+    'openai:gpt-5.1:cached_tokens': 13,
+    'openai:gpt-5.1:completion_tokens': 1000,
    'openai:gpt-5-2025-08-07:prompt_tokens': 125,
    'openai:gpt-5-2025-08-07:cached_tokens': 13,
    'openai:gpt-5-2025-08-07:completion_tokens': 1000,
@@ -130,6 +130,10 @@ export const OPENROUTER_COST_MAP = {
    'openrouter:openai/gpt-5-chat:prompt': 125,
    'openrouter:openai/gpt-5-chat:completion': 1000,
    'openrouter:openai/gpt-5-chat:input_cache_read': 12,
+    'openrouter:openai/gpt-5.1:prompt': 125,
+    'openrouter:openai/gpt-5.1:completion': 1000,
+    'openrouter:openai/gpt-5.1:web_search': 1000000,
+    'openrouter:openai/gpt-5.1:input_cache_read': 12,
    'openrouter:openai/gpt-5:prompt': 125,
    'openrouter:openai/gpt-5:completion': 1000,
    'openrouter:openai/gpt-5:web_search': 1000000,
@@ -78,6 +78,16 @@ interface ChatOptions {
    stream?: boolean;
    max_tokens?: number;
    temperature?: number;
+    reasoning?: {
+        effort?: 'none' | 'low' | 'medium' | 'high' | 'minimal';
+        [key: string]: unknown;
+    };
+    reasoning_effort?: 'none' | 'low' | 'medium' | 'high' | 'minimal';
+    text?: {
+        verbosity?: 'low' | 'medium' | 'high';
+        [key: string]: unknown;
+    };
+    verbosity?: 'low' | 'medium' | 'high';
    tools?: ToolDefinition[];
 }

@@ -809,7 +809,7 @@ class AI{
        }

        // Additional parameters to pass from userParams to requestParams
-        const PARAMS_TO_PASS = ['tools', 'response'];
+        const PARAMS_TO_PASS = ['tools', 'response', 'reasoning', 'reasoning_effort', 'text', 'verbosity'];
        for ( const name of PARAMS_TO_PASS ) {
            if ( userParams[name] ) {
                requestParams[name] = userParams[name];
@@ -7,6 +7,7 @@ const TEST_MODELS = [
    "openrouter:anthropic/claude-sonnet-4",
    "google/gemini-2.5-pro",
    "deepseek-chat",
+    "gpt-5.1",
    "gpt-5-nano",
    "openai/gpt-5-nano",
    "claude-sonnet-4-latest",
@@ -48,7 +49,9 @@ const testChatWithParametersCore = async function(model) {
    const result = await puter.ai.chat("What is 2+2?", { 
        model: model,
        temperature: 0.7,
-        max_tokens: 50
+        max_tokens: 50,
+        reasoning: { effort: 'low' },
+        text: { verbosity: 'low' },
    });
    
    // Check basic result structure
@@ -212,4 +215,4 @@ const generateAllTests = function() {
 };

 // Export the generated tests
-window.aiTests = generateAllTests(); 
+window.aiTests = generateAllTests();