diff --git a/src/backend/src/services/ai/AIInterfaceService.js b/src/backend/src/services/ai/AIInterfaceService.js index a62ef103c..ca9e7328b 100644 --- a/src/backend/src/services/ai/AIInterfaceService.js +++ b/src/backend/src/services/ai/AIInterfaceService.js @@ -106,6 +106,10 @@ class AIInterfaceService extends BaseService { vision: { type: 'flag' }, stream: { type: 'flag' }, response: { type: 'json' }, + reasoning: { type: 'json', optional: true }, + reasoning_effort: { type: 'string', optional: true }, + text: { type: 'json', optional: true }, + verbosity: { type: 'string', optional: true }, model: { type: 'string' }, provider: { type: 'string', optional: true }, temperature: { type: 'number' }, diff --git a/src/backend/src/services/ai/chat/providers/ClaudeProvider/ClaudeProvider.ts b/src/backend/src/services/ai/chat/providers/ClaudeProvider/ClaudeProvider.ts index 7600fc4c5..3b26ee23b 100644 --- a/src/backend/src/services/ai/chat/providers/ClaudeProvider/ClaudeProvider.ts +++ b/src/backend/src/services/ai/chat/providers/ClaudeProvider/ClaudeProvider.ts @@ -69,7 +69,7 @@ export class ClaudeProvider implements IChatProvider { return model_names; } - async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType { + async complete ({ messages, stream, model, tools, max_tokens, temperature, reasoning, reasoning_effort }: ICompleteArguments): ReturnType { tools = make_claude_tools(tools); let system_prompts: string | any[]; @@ -168,6 +168,14 @@ export class ClaudeProvider implements IChatProvider { }); const modelUsed = this.models().find(m => [m.id, ...(m.aliases || [])].includes(model)) || this.models().find(m => m.id === this.getDefaultModel())!; + const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort; + const thinkingConfig = this.#buildThinkingConfig({ + modelId: modelUsed.id, + reasoningEffort: requestedReasoningEffort, + maxTokens: max_tokens, + }); + // Anthropic requires temperature=1 whenever thinking is enabled. + const resolvedTemperature = thinkingConfig ? 1 : (temperature ?? 0); const sdkParams: MessageCreateParams = { model: modelUsed.id, max_tokens: Math.floor(max_tokens || @@ -175,7 +183,7 @@ export class ClaudeProvider implements IChatProvider { model === 'claude-3-5-sonnet-20241022' || model === 'claude-3-5-sonnet-20240620' ) ? 8192 : this.models().filter(e => (e.name === model || e.aliases?.includes(model)))[0]?.max_tokens || 4096)), //required - temperature: temperature || 0, // required + temperature: resolvedTemperature, // required ...( (system_prompts && system_prompts[0]?.content) ? { system: system_prompts[0]?.content, } : {}), @@ -185,6 +193,7 @@ export class ClaudeProvider implements IChatProvider { }, messages, ...(tools ? { tools } : {}), + ...(thinkingConfig ? { thinking: thinkingConfig } : {}), } as MessageCreateParams; let beta_mode = false; @@ -259,8 +268,10 @@ export class ClaudeProvider implements IChatProvider { for ( const task of file_delete_tasks ) { promises.push((async () => { try { - await this.anthropic.beta.files.delete(task.file_id, - { betas: ['files-api-2025-04-14'] }); + await this.anthropic.beta.files.delete( + task.file_id, + { betas: ['files-api-2025-04-14'] }, + ); } catch (e) { this.errorService.report('claude:file-delete-task', { source: e, @@ -285,6 +296,7 @@ export class ClaudeProvider implements IChatProvider { const usageSum: Record = {}; let message, contentBlock; + let currentContentBlockType: string | null = null; for await ( const event of completion ) { if ( event.type === 'message_delta' ) { @@ -292,8 +304,12 @@ export class ClaudeProvider implements IChatProvider { const meteredData = this.#usageFormatterUtil(usageObject as Usage | BetaUsage); for ( const key in meteredData ) { - if ( ! usageSum[key] ) usageSum[key] = 0; - usageSum[key] += meteredData[key as keyof typeof meteredData]; + // Anthropic message_delta usage counters are cumulative. + // Keep the latest value instead of summing every delta. + usageSum[key] = Math.max( + usageSum[key] ?? 0, + meteredData[key as keyof typeof meteredData], + ); } } @@ -308,6 +324,7 @@ export class ClaudeProvider implements IChatProvider { } if ( event.type === 'content_block_start' ) { + currentContentBlockType = event.content_block.type; if ( event.content_block.type === 'tool_use' ) { contentBlock = message!.contentBlock({ type: event.content_block.type, @@ -316,6 +333,14 @@ export class ClaudeProvider implements IChatProvider { }); continue; } + if ( event.content_block.type === 'thinking' ) { + // We map Anthropic "thinking" blocks to our text stream type, + // then forward deltas through addReasoning(). + contentBlock = message!.contentBlock({ + type: 'text', + }); + continue; + } contentBlock = message!.contentBlock({ type: event.content_block.type, }); @@ -325,6 +350,7 @@ export class ClaudeProvider implements IChatProvider { if ( event.type === 'content_block_stop' ) { contentBlock!.end(); contentBlock = null; + currentContentBlockType = null; continue; } @@ -334,15 +360,35 @@ export class ClaudeProvider implements IChatProvider { continue; } if ( event.delta.type === 'text_delta' ) { - (contentBlock as AIChatTextStream)!.addText(event.delta.text); + if ( currentContentBlockType === 'thinking' ) { + (contentBlock as AIChatTextStream)!.addReasoning(event.delta.text); + } else { + (contentBlock as AIChatTextStream)!.addText(event.delta.text); + } + continue; + } + if ( event.delta.type === 'thinking_delta' ) { + (contentBlock as AIChatTextStream)!.addReasoning(event.delta.thinking); + continue; + } + if ( event.delta.type === 'signature_delta' ) { continue; } } } + // Some usage fields (e.g. thinking_tokens) may only be available + // on the final message usage object. + const finalUsage = await completion.finalMessage() + .then(message => this.#usageFormatterUtil(message.usage as Usage | BetaUsage)) + .catch(() => null); + if ( finalUsage ) { + for ( const [key, value] of Object.entries(finalUsage) ) { + usageSum[key] = value; + } + } + chatStream.end(usageSum); - const costsOverrideFromModel = Object.fromEntries(Object.entries(usageSum).map(([k, v]) => { - return [k, v * (modelUsed.costs[k])]; - })); + const costsOverrideFromModel = this.#buildCostsOverrideFromModel(usageSum, modelUsed); this.#meteringService.utilRecordUsageObject(usageSum, actor, `claude:${modelUsed.id}`, costsOverrideFromModel); }; @@ -357,15 +403,13 @@ export class ClaudeProvider implements IChatProvider { try { msg = await anthropic.messages.create(sdkParams); } catch (e) { - console.log('FUCK! anthropic error: ', e); + console.error('anthropic error:', e); throw e; } await cleanup_files(); const usage = this.#usageFormatterUtil((msg as Message).usage as Usage | BetaUsage); - const costsOverrideFromModel = Object.fromEntries(Object.entries(usage).map(([k, v]) => { - return [k, v * (modelUsed.costs[k])]; - })); + const costsOverrideFromModel = this.#buildCostsOverrideFromModel(usage, modelUsed); this.#meteringService.utilRecordUsageObject(usage, actor, `claude:${modelUsed.id}`, costsOverrideFromModel); // TODO DS: cleanup old usage tracking @@ -383,9 +427,54 @@ export class ClaudeProvider implements IChatProvider { ephemeral_1h_input_tokens: usage?.cache_creation?.ephemeral_1h_input_tokens || 0, cache_read_input_tokens: usage?.cache_read_input_tokens || 0, output_tokens: usage?.output_tokens || 0, + thinking_tokens: (usage as any)?.thinking_tokens || (usage as any)?.output_tokens_details?.thinking_tokens || 0, }; }; + #buildThinkingConfig ({ + modelId, + reasoningEffort, + maxTokens, + }: { + modelId: string; + reasoningEffort?: 'low' | 'medium' | 'high'; + maxTokens?: number; + }) { + if ( ! reasoningEffort ) return undefined; + + const requestedBudget = { + low: 1024, + medium: 4096, + high: 8192, + }[reasoningEffort]; + + // Keep budget <= max_tokens when it's set. If max_tokens is too low + // to satisfy Anthropic's minimum thinking budget, disable thinking. + if ( typeof maxTokens === 'number' && Number.isFinite(maxTokens) ) { + const maxBudget = Math.floor(maxTokens - 1); + if ( maxBudget < 1024 ) { + return undefined; + } + } + + const budget_tokens = Math.floor(Math.max( + 1024, + Math.min(requestedBudget, (maxTokens ? (maxTokens - 1) : requestedBudget)), + )); + + return { + type: 'enabled' as const, + budget_tokens, + }; + } + + #buildCostsOverrideFromModel (usage: Record, modelUsed: { costs: Record }) { + return Object.fromEntries(Object.entries(usage).map(([k, v]) => { + const modelCost = modelUsed.costs[k] ?? (k === 'thinking_tokens' ? modelUsed.costs.output_tokens : 0); + return [k, v * modelCost]; + })); + } + models () { return CLAUDE_MODELS; }