add reasoning to claude models (#2676)

This commit is contained in:
ProgrammerIn-wonderland
2026-03-17 12:26:53 -07:00
committed by GitHub
parent 53f8d67679
commit c7a481118f
2 changed files with 107 additions and 14 deletions
@@ -106,6 +106,10 @@ class AIInterfaceService extends BaseService {
vision: { type: 'flag' },
stream: { type: 'flag' },
response: { type: 'json' },
reasoning: { type: 'json', optional: true },
reasoning_effort: { type: 'string', optional: true },
text: { type: 'json', optional: true },
verbosity: { type: 'string', optional: true },
model: { type: 'string' },
provider: { type: 'string', optional: true },
temperature: { type: 'number' },
@@ -69,7 +69,7 @@ export class ClaudeProvider implements IChatProvider {
return model_names;
}
async complete ({ messages, stream, model, tools, max_tokens, temperature }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
async complete ({ messages, stream, model, tools, max_tokens, temperature, reasoning, reasoning_effort }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
tools = make_claude_tools(tools);
let system_prompts: string | any[];
@@ -168,6 +168,14 @@ export class ClaudeProvider implements IChatProvider {
});
const modelUsed = this.models().find(m => [m.id, ...(m.aliases || [])].includes(model)) || this.models().find(m => m.id === this.getDefaultModel())!;
const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort;
const thinkingConfig = this.#buildThinkingConfig({
modelId: modelUsed.id,
reasoningEffort: requestedReasoningEffort,
maxTokens: max_tokens,
});
// Anthropic requires temperature=1 whenever thinking is enabled.
const resolvedTemperature = thinkingConfig ? 1 : (temperature ?? 0);
const sdkParams: MessageCreateParams = {
model: modelUsed.id,
max_tokens: Math.floor(max_tokens ||
@@ -175,7 +183,7 @@ export class ClaudeProvider implements IChatProvider {
model === 'claude-3-5-sonnet-20241022'
|| model === 'claude-3-5-sonnet-20240620'
) ? 8192 : this.models().filter(e => (e.name === model || e.aliases?.includes(model)))[0]?.max_tokens || 4096)), //required
temperature: temperature || 0, // required
temperature: resolvedTemperature, // required
...( (system_prompts && system_prompts[0]?.content) ? {
system: system_prompts[0]?.content,
} : {}),
@@ -185,6 +193,7 @@ export class ClaudeProvider implements IChatProvider {
},
messages,
...(tools ? { tools } : {}),
...(thinkingConfig ? { thinking: thinkingConfig } : {}),
} as MessageCreateParams;
let beta_mode = false;
@@ -259,8 +268,10 @@ export class ClaudeProvider implements IChatProvider {
for ( const task of file_delete_tasks ) {
promises.push((async () => {
try {
await this.anthropic.beta.files.delete(task.file_id,
{ betas: ['files-api-2025-04-14'] });
await this.anthropic.beta.files.delete(
task.file_id,
{ betas: ['files-api-2025-04-14'] },
);
} catch (e) {
this.errorService.report('claude:file-delete-task', {
source: e,
@@ -285,6 +296,7 @@ export class ClaudeProvider implements IChatProvider {
const usageSum: Record<string, number> = {};
let message, contentBlock;
let currentContentBlockType: string | null = null;
for await ( const event of completion ) {
if ( event.type === 'message_delta' ) {
@@ -292,8 +304,12 @@ export class ClaudeProvider implements IChatProvider {
const meteredData = this.#usageFormatterUtil(usageObject as Usage | BetaUsage);
for ( const key in meteredData ) {
if ( ! usageSum[key] ) usageSum[key] = 0;
usageSum[key] += meteredData[key as keyof typeof meteredData];
// Anthropic message_delta usage counters are cumulative.
// Keep the latest value instead of summing every delta.
usageSum[key] = Math.max(
usageSum[key] ?? 0,
meteredData[key as keyof typeof meteredData],
);
}
}
@@ -308,6 +324,7 @@ export class ClaudeProvider implements IChatProvider {
}
if ( event.type === 'content_block_start' ) {
currentContentBlockType = event.content_block.type;
if ( event.content_block.type === 'tool_use' ) {
contentBlock = message!.contentBlock({
type: event.content_block.type,
@@ -316,6 +333,14 @@ export class ClaudeProvider implements IChatProvider {
});
continue;
}
if ( event.content_block.type === 'thinking' ) {
// We map Anthropic "thinking" blocks to our text stream type,
// then forward deltas through addReasoning().
contentBlock = message!.contentBlock({
type: 'text',
});
continue;
}
contentBlock = message!.contentBlock({
type: event.content_block.type,
});
@@ -325,6 +350,7 @@ export class ClaudeProvider implements IChatProvider {
if ( event.type === 'content_block_stop' ) {
contentBlock!.end();
contentBlock = null;
currentContentBlockType = null;
continue;
}
@@ -334,15 +360,35 @@ export class ClaudeProvider implements IChatProvider {
continue;
}
if ( event.delta.type === 'text_delta' ) {
(contentBlock as AIChatTextStream)!.addText(event.delta.text);
if ( currentContentBlockType === 'thinking' ) {
(contentBlock as AIChatTextStream)!.addReasoning(event.delta.text);
} else {
(contentBlock as AIChatTextStream)!.addText(event.delta.text);
}
continue;
}
if ( event.delta.type === 'thinking_delta' ) {
(contentBlock as AIChatTextStream)!.addReasoning(event.delta.thinking);
continue;
}
if ( event.delta.type === 'signature_delta' ) {
continue;
}
}
}
// Some usage fields (e.g. thinking_tokens) may only be available
// on the final message usage object.
const finalUsage = await completion.finalMessage()
.then(message => this.#usageFormatterUtil(message.usage as Usage | BetaUsage))
.catch(() => null);
if ( finalUsage ) {
for ( const [key, value] of Object.entries(finalUsage) ) {
usageSum[key] = value;
}
}
chatStream.end(usageSum);
const costsOverrideFromModel = Object.fromEntries(Object.entries(usageSum).map(([k, v]) => {
return [k, v * (modelUsed.costs[k])];
}));
const costsOverrideFromModel = this.#buildCostsOverrideFromModel(usageSum, modelUsed);
this.#meteringService.utilRecordUsageObject(usageSum, actor, `claude:${modelUsed.id}`, costsOverrideFromModel);
};
@@ -357,15 +403,13 @@ export class ClaudeProvider implements IChatProvider {
try {
msg = await anthropic.messages.create(sdkParams);
} catch (e) {
console.log('FUCK! anthropic error: ', e);
console.error('anthropic error:', e);
throw e;
}
await cleanup_files();
const usage = this.#usageFormatterUtil((msg as Message).usage as Usage | BetaUsage);
const costsOverrideFromModel = Object.fromEntries(Object.entries(usage).map(([k, v]) => {
return [k, v * (modelUsed.costs[k])];
}));
const costsOverrideFromModel = this.#buildCostsOverrideFromModel(usage, modelUsed);
this.#meteringService.utilRecordUsageObject(usage, actor, `claude:${modelUsed.id}`, costsOverrideFromModel);
// TODO DS: cleanup old usage tracking
@@ -383,9 +427,54 @@ export class ClaudeProvider implements IChatProvider {
ephemeral_1h_input_tokens: usage?.cache_creation?.ephemeral_1h_input_tokens || 0,
cache_read_input_tokens: usage?.cache_read_input_tokens || 0,
output_tokens: usage?.output_tokens || 0,
thinking_tokens: (usage as any)?.thinking_tokens || (usage as any)?.output_tokens_details?.thinking_tokens || 0,
};
};
#buildThinkingConfig ({
modelId,
reasoningEffort,
maxTokens,
}: {
modelId: string;
reasoningEffort?: 'low' | 'medium' | 'high';
maxTokens?: number;
}) {
if ( ! reasoningEffort ) return undefined;
const requestedBudget = {
low: 1024,
medium: 4096,
high: 8192,
}[reasoningEffort];
// Keep budget <= max_tokens when it's set. If max_tokens is too low
// to satisfy Anthropic's minimum thinking budget, disable thinking.
if ( typeof maxTokens === 'number' && Number.isFinite(maxTokens) ) {
const maxBudget = Math.floor(maxTokens - 1);
if ( maxBudget < 1024 ) {
return undefined;
}
}
const budget_tokens = Math.floor(Math.max(
1024,
Math.min(requestedBudget, (maxTokens ? (maxTokens - 1) : requestedBudget)),
));
return {
type: 'enabled' as const,
budget_tokens,
};
}
#buildCostsOverrideFromModel (usage: Record<string, number>, modelUsed: { costs: Record<string, number> }) {
return Object.fromEntries(Object.entries(usage).map(([k, v]) => {
const modelCost = modelUsed.costs[k] ?? (k === 'thinking_tokens' ? modelUsed.costs.output_tokens : 0);
return [k, v * modelCost];
}));
}
models () {
return CLAUDE_MODELS;
}