From 02d95db7edfbc6ff84ca8eaa8fae506a801fb8df Mon Sep 17 00:00:00 2001 From: Daniel Salazar Date: Thu, 20 Nov 2025 17:49:05 -0800 Subject: [PATCH] fix: open router cache token metering (#1988) --- package-lock.json | 29 +- .../src/modules/puterai/ClaudeService.js | 94 ++---- .../src/modules/puterai/OllamaService.js | 63 ++-- .../OpenAICompletionService.mjs | 24 +- .../puterai/OpenAiCompletionService/index.mjs | 10 +- .../OpenAiCompletionService/models.mjs | 2 +- .../src/modules/puterai/OpenRouterService.js | 188 +++++------ .../src/modules/puterai/lib/OpenAIUtil.js | 12 +- .../MeteringService/MeteringService.ts | 18 +- .../costMaps/openrouterCostMap.ts | 291 ++++++++++-------- 10 files changed, 361 insertions(+), 370 deletions(-) diff --git a/package-lock.json b/package-lock.json index 90dd3bb5f..e6e84a05c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -904,6 +904,7 @@ "integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.3", @@ -3047,6 +3048,7 @@ "resolved": "https://registry.npmjs.org/@jimp/custom/-/custom-0.22.12.tgz", "integrity": "sha512-xcmww1O/JFP2MrlGUMd3Q78S3Qu6W3mYTXYuIqFq33EorgYHV/HqymHfXy9GjiCJ7OI+7lWx6nYFOzU7M4rd1Q==", "license": "MIT", + "peer": true, "dependencies": { "@jimp/core": "^0.22.12" } @@ -3083,6 +3085,7 @@ "resolved": "https://registry.npmjs.org/@jimp/plugin-blit/-/plugin-blit-0.22.12.tgz", "integrity": "sha512-xslz2ZoFZOPLY8EZ4dC29m168BtDx95D6K80TzgUi8gqT7LY6CsajWO0FAxDwHz6h0eomHMfyGX0stspBrTKnQ==", "license": "MIT", + "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3095,6 +3098,7 @@ "resolved": "https://registry.npmjs.org/@jimp/plugin-blur/-/plugin-blur-0.22.12.tgz", "integrity": "sha512-S0vJADTuh1Q9F+cXAwFPlrKWzDj2F9t/9JAbUvaaDuivpyWuImEKXVz5PUZw2NbpuSHjwssbTpOZ8F13iJX4uw==", "license": "MIT", + "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3119,6 +3123,7 @@ "resolved": "https://registry.npmjs.org/@jimp/plugin-color/-/plugin-color-0.22.12.tgz", "integrity": "sha512-xImhTE5BpS8xa+mAN6j4sMRWaUgUDLoaGHhJhpC+r7SKKErYDR0WQV4yCE4gP+N0gozD0F3Ka1LUSaMXrn7ZIA==", "license": "MIT", + "peer": true, "dependencies": { "@jimp/utils": "^0.22.12", "tinycolor2": "^1.6.0" @@ -3162,6 +3167,7 @@ "resolved": "https://registry.npmjs.org/@jimp/plugin-crop/-/plugin-crop-0.22.12.tgz", "integrity": "sha512-FNuUN0OVzRCozx8XSgP9MyLGMxNHHJMFt+LJuFjn1mu3k0VQxrzqbN06yIl46TVejhyAhcq5gLzqmSCHvlcBVw==", "license": "MIT", + "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3285,6 +3291,7 @@ "resolved": "https://registry.npmjs.org/@jimp/plugin-resize/-/plugin-resize-0.22.12.tgz", "integrity": "sha512-3NyTPlPbTnGKDIbaBgQ3HbE6wXbAlFfxHVERmrbqAi8R3r6fQPxpCauA8UVDnieg5eo04D0T8nnnNIX//i/sXg==", "license": "MIT", + "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3297,6 +3304,7 @@ "resolved": "https://registry.npmjs.org/@jimp/plugin-rotate/-/plugin-rotate-0.22.12.tgz", "integrity": "sha512-9YNEt7BPAFfTls2FGfKBVgwwLUuKqy+E8bDGGEsOqHtbuhbshVGxN2WMZaD4gh5IDWvR+emmmPPWGgaYNYt1gA==", "license": "MIT", + "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3312,6 +3320,7 @@ "resolved": "https://registry.npmjs.org/@jimp/plugin-scale/-/plugin-scale-0.22.12.tgz", "integrity": "sha512-dghs92qM6MhHj0HrV2qAwKPMklQtjNpoYgAB94ysYpsXslhRTiPisueSIELRwZGEr0J0VUxpUY7HgJwlSIgGZw==", "license": "MIT", + "peer": true, "dependencies": { "@jimp/utils": "^0.22.12" }, @@ -3619,6 +3628,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.4.1.tgz", "integrity": "sha512-O2yRJce1GOc6PAy3QxFM4NzFiWzvScDC1/5ihYBL6BUEVdq0XMWN01sppE+H6bBXbaFYipjwFLEWLg5PaSOThA==", "license": "Apache-2.0", + "peer": true, "engines": { "node": ">=8.0.0" } @@ -3628,6 +3638,7 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api-logs/-/api-logs-0.49.1.tgz", "integrity": "sha512-kaNl/T7WzyMUQHQlVq7q0oV4Kev6+0xFwqzofryC66jgGMacd0QH5TwfpbUwSTby+SdAdprAe5UKMvBw4tKS5Q==", "license": "Apache-2.0", + "peer": true, "dependencies": { "@opentelemetry/api": "^1.0.0" }, @@ -7262,6 +7273,7 @@ "integrity": "sha512-6JSSaBZmsKvEkbRUkf7Zj7dru/8ZCrJxAqArcLaVMee5907JdtEbKGsZ7zNiIm/UAkpGUkaSMZEXShnN2D1HZA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.46.1", "@typescript-eslint/types": "8.46.1", @@ -7804,7 +7816,8 @@ "version": "5.5.0", "resolved": "https://registry.npmjs.org/@xterm/xterm/-/xterm-5.5.0.tgz", "integrity": "sha512-hqJHYaQb5OptNunnyAnkHyM8aCjZ1MEIDTQu1iIbbTD/xops91NB5yq1ZK/dC2JDbVWtF23zUtl9JE2NqwT87A==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/@xtuc/ieee754": { "version": "1.2.0", @@ -7854,6 +7867,7 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -8543,6 +8557,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.9", "caniuse-lite": "^1.0.30001746", @@ -8797,6 +8812,7 @@ "resolved": "https://registry.npmjs.org/chai/-/chai-5.3.3.tgz", "integrity": "sha512-4zNhdJD/iOjSH0A05ea+Ke6MU5mmpQcbQsSOkgdaUMJ9zTlDTD/GYlwohmIE2u0gaxHYiVHEn1Fw9mZ/ktJWgw==", "license": "MIT", + "peer": true, "dependencies": { "assertion-error": "^2.0.1", "check-error": "^2.1.1", @@ -10533,6 +10549,7 @@ "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.37.0.tgz", "integrity": "sha512-XyLmROnACWqSxiGYArdef1fItQd47weqB7iwtfr9JHwRrqIXZdcFMvvEcL9xHCmL0SNsOvF0c42lWyM1U5dgig==", "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -16305,6 +16322,7 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "license": "MIT", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -17985,6 +18003,7 @@ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -18211,6 +18230,7 @@ "integrity": "sha512-4nVGliEpxmhCL8DslSAUdxlB6+SMrhB0a1v5ijlh1xB1nEPuy1mxaHxysVucLHuWryAxLWg6a5ei+U4TLn/rFg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -18476,6 +18496,7 @@ "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.102.1.tgz", "integrity": "sha512-7h/weGm9d/ywQ6qzJ+Xy+r9n/3qgp/thalBbpOi5i223dPXKi04IBtqPN9nTd+jBc7QKfvDbaBnFipYp4sJAUQ==", "license": "MIT", + "peer": true, "dependencies": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.8", @@ -18525,6 +18546,7 @@ "integrity": "sha512-pIDJHIEI9LR0yxHXQ+Qh95k2EvXpWzZ5l+d+jIo+RdSm9MiHfzazIxwwni/p7+x4eJZuvG1AJwgC4TNQ7NRgsg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@discoveryjs/json-ext": "^0.5.0", "@webpack-cli/configtest": "^2.1.1", @@ -18756,6 +18778,7 @@ "resolved": "https://registry.npmjs.org/winston/-/winston-3.18.3.tgz", "integrity": "sha512-NoBZauFNNWENgsnC9YpgyYwOVrl2m58PpQ8lNHjV3kosGs7KJ7Npk9pCUE+WJlawVSe8mykWDKWFSVfs3QO9ww==", "license": "MIT", + "peer": true, "dependencies": { "@colors/colors": "^1.6.0", "@dabh/diagnostics": "^2.0.8", @@ -18960,6 +18983,7 @@ "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==", "license": "MIT", + "peer": true, "engines": { "node": ">=10.0.0" }, @@ -19293,6 +19317,7 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -20068,7 +20093,7 @@ }, "src/puter-js": { "name": "@heyputer/puter.js", - "version": "2.1.2", + "version": "2.1.6", "license": "Apache-2.0", "dependencies": { "@heyputer/kv.js": "^0.2.1", diff --git a/src/backend/src/modules/puterai/ClaudeService.js b/src/backend/src/modules/puterai/ClaudeService.js index 9115b16b8..4d513dbbb 100644 --- a/src/backend/src/modules/puterai/ClaudeService.js +++ b/src/backend/src/modules/puterai/ClaudeService.js @@ -25,6 +25,7 @@ const Messages = require('./lib/Messages'); const FSNodeParam = require('../../api/filesystem/FSNodeParam'); const { LLRead } = require('../../filesystem/ll_operations/ll_read'); const { Context } = require('../../util/context'); +const mime = require('mime-types'); /** * ClaudeService class extends BaseService to provide integration with Anthropic's Claude AI models. @@ -34,10 +35,6 @@ const { Context } = require('../../util/context'); * @extends BaseService */ class ClaudeService extends BaseService { - static MODULES = { - Anthropic: require('@anthropic-ai/sdk'), - }; - /** * @type {import('@anthropic-ai/sdk').Anthropic} */ @@ -53,7 +50,7 @@ class ClaudeService extends BaseService { /** @type {import('../../services/MeteringService/MeteringService').MeteringService} */ #meteringService; - async _init() { + async _init () { this.anthropic = new Anthropic({ apiKey: this.config.apiKey, // 10 minutes is the default; we need to override the timeout to @@ -75,7 +72,7 @@ class ClaudeService extends BaseService { * Returns the default model identifier for Claude API interactions * @returns {string} The default model ID 'claude-3-5-sonnet-latest' */ - get_default_model() { + get_default_model () { return 'claude-3-5-sonnet-latest'; } @@ -87,7 +84,7 @@ class ClaudeService extends BaseService { * * @returns Promise> Array of model details */ - async models() { + async models () { return this.models_(); }, @@ -97,7 +94,7 @@ class ClaudeService extends BaseService { * @description Retrieves all available model IDs and their aliases, * flattening them into a single array of strings that can be used for model selection */ - async list() { + async list () { const models = this.models_(); const model_names = []; for ( const model of models ) { @@ -118,10 +115,10 @@ class ClaudeService extends BaseService { * @returns {Object} Returns either a TypedValue with streaming response or a completion object * @this {ClaudeService} */ - async complete({ messages, stream, model, tools, max_tokens, temperature }) { + async complete ({ messages, stream, model, tools, max_tokens, temperature }) { tools = FunctionCalling.make_claude_tools(tools); // console.log("here are the messages: ", messages) - + let system_prompts; // unsure why system_prompts is an array but it always seems to only have exactly one element, // and the real array of system_prompts seems to be the [0].content -- NS @@ -140,12 +137,12 @@ class ClaudeService extends BaseService { } messages = messages.map(message => { - if (message.cache_control) { + if ( message.cache_control ) { message.content[0].cache_control = message.cache_control; } delete message.cache_control; - return message - }) + return message; + }); const sdk_params = { model: model ?? this.get_default_model(), @@ -153,19 +150,19 @@ class ClaudeService extends BaseService { (( model === 'claude-3-5-sonnet-20241022' || model === 'claude-3-5-sonnet-20240620' - ) ? 8192 : this.models_().filter(e=>e.name === model)[0].max_tokens || 4096), //required + ) ? 8192 : this.models_().filter(e => e.name === model)[0].max_tokens || 4096), //required temperature: temperature || 0, // required ...( (system_prompts && system_prompts[0]?.content) ? { - system: system_prompts[0]?.content + system: system_prompts[0]?.content, } : {}), tool_choice: { - type: "auto", - disable_parallel_tool_use: true + type: 'auto', + disable_parallel_tool_use: true, }, messages, ...(tools ? { tools } : {}), }; - console.log(sdk_params.max_tokens) + console.log(sdk_params.max_tokens); // console.log('\x1B[26;1m ===== SDK PARAMETERS', require('util').inspect(sdk_params, undefined, Infinity)); @@ -201,8 +198,6 @@ class ClaudeService extends BaseService { fsNode: task.node, }); - const require = this.require; - const mime = require('mime-types'); const mimeType = mime.contentType(await task.node.get('name')); beta_mode = true; @@ -228,12 +223,6 @@ class ClaudeService extends BaseService { return 'container_upload'; })(); - // { - // 'application/pdf': 'document', - // 'text/plain': 'document', - // 'image/': 'image' - // }[mimeType]; - delete task.contentPart.puter_path, task.contentPart.type = contentBlockTypeForFileBasedOnMime; task.contentPart.source = { @@ -251,7 +240,7 @@ class ClaudeService extends BaseService { try { await this.anthropic.beta.files.delete(task.file_id, { betas: ['files-api-2025-04-14'] }); - } catch (e) { + } catch (e) { this.errors.report('claude:file-delete-task', { source: e, trace: true, @@ -328,26 +317,7 @@ class ClaudeService extends BaseService { } chatStream.end(); - this.billForUsage(actor, model || this.get_default_model(), usageSum); - - // Log token usage statistics - const totalTokens = usageSum.input_tokens + usageSum.output_tokens; - const cachedTokens = usageSum.ephemeral_5m_input_tokens + usageSum.ephemeral_1h_input_tokens; - const cacheHits = usageSum.cache_read_input_tokens; - const uncachedTokens = usageSum.input_tokens - cacheHits - cachedTokens; - -// console.log(` -// ╔══════════════════════════════════════════════════════════════╗ -// ║ 🎯 Token Usage Statistics 🎯 ║ -// ╠══════════════════════════════════════════════════════════════╣ -// ║ 📊 Total Tokens Used: ${String(totalTokens).padStart(10)} 📊 ║ -// ║ 💾 Cached Tokens: ${String(cachedTokens).padStart(10)} 💾 ║ -// ║ ✅ Cache Hits: ${String(cacheHits).padStart(10)} ✅ ║ -// ║ 🔄 Uncached Tokens: ${String(uncachedTokens).padStart(10)} 🔄 ║ -// ║ 📥 Input Tokens: ${String(usageSum.input_tokens).padStart(10)} 📥 ║ -// ║ 📤 Output Tokens: ${String(usageSum.output_tokens).padStart(10)} 📤 ║ -// ╚══════════════════════════════════════════════════════════════╝ -// `); + this.#meteringService.utilRecordUsageObject(usageSum, actor, `claude:${this.models_().find(m => [m.id, ...(m.aliases || [])].includes(model || this.get_default_model())).id}`); }; return { @@ -361,26 +331,7 @@ class ClaudeService extends BaseService { await cleanup_files(); const usage = this.usageFormatterUtil(msg.usage); - this.billForUsage(actor, model || this.get_default_model(), usage); - - // Log token usage statistics - const totalTokens = usage.input_tokens + usage.output_tokens; - const cachedTokens = usage.ephemeral_5m_input_tokens + usage.ephemeral_1h_input_tokens; - const cacheHits = usage.cache_read_input_tokens; - const uncachedTokens = usage.input_tokens - cacheHits - cachedTokens; - -// console.log(` -// ╔══════════════════════════════════════════════════════════════╗ -// ║ 🎯 Token Usage Statistics 🎯 ║ -// ╠══════════════════════════════════════════════════════════════╣ -// ║ 📊 Total Tokens Used: ${String(totalTokens).padStart(10)} 📊 ║ -// ║ 💾 Cached Tokens: ${String(cachedTokens).padStart(10)} 💾 ║ -// ║ ✅ Cache Hits: ${String(cacheHits).padStart(10)} ✅ ║ -// ║ 🔄 Uncached Tokens: ${String(uncachedTokens).padStart(10)} 🔄 ║ -// ║ 📥 Input Tokens: ${String(usage.input_tokens).padStart(10)} 📥 ║ -// ║ 📤 Output Tokens: ${String(usage.output_tokens).padStart(10)} 📤 ║ -// ╚══════════════════════════════════════════════════════════════╝ -// `); + this.#meteringService.utilRecordUsageObject(usage, actor, `claude:${this.models_().find(m => [m.id, ...(m.aliases || [])].includes(model || this.get_default_model())).id}`); // TODO DS: cleanup old usage tracking return { @@ -394,7 +345,7 @@ class ClaudeService extends BaseService { // TODO DS: get this inside the class as a private method once the methods aren't exported directly /** @type {(usage: import("@anthropic-ai/sdk/resources/messages.js").Usage | import("@anthropic-ai/sdk/resources/beta/messages/messages.js").BetaUsage) => {}}) */ - usageFormatterUtil(usage) { + usageFormatterUtil (usage) { return { input_tokens: usage?.input_tokens || 0, ephemeral_5m_input_tokens: usage?.cache_creation?.ephemeral_5m_input_tokens || usage.cache_creation_input_tokens || 0, // this is because they're api is a bit inconsistent @@ -404,11 +355,6 @@ class ClaudeService extends BaseService { }; }; - // TODO DS: get this inside the class as a private method once the methods aren't exported directly - billForUsage(actor, model, usage) { - this.#meteringService.utilRecordUsageObject(usage, actor, `claude:${this.models_().find(m => [m.id, ...(m.aliases || [])].includes(model)).id}`); - }; - /** * Retrieves available Claude AI models and their specifications * @returns Array of model objects containing: @@ -421,7 +367,7 @@ class ClaudeService extends BaseService { * - max_output: Maximum output tokens * - training_cutoff: Training data cutoff date */ - models_() { + models_ () { return [ { id: 'claude-haiku-4-5-20251001', diff --git a/src/backend/src/modules/puterai/OllamaService.js b/src/backend/src/modules/puterai/OllamaService.js index 2de741264..f2985c6f2 100644 --- a/src/backend/src/modules/puterai/OllamaService.js +++ b/src/backend/src/modules/puterai/OllamaService.js @@ -18,11 +18,12 @@ */ // METADATA // {"ai-commented":{"service":"claude"}} -const APIError = require('../../api/APIError'); const BaseService = require('../../services/BaseService'); const OpenAIUtil = require('./lib/OpenAIUtil'); const { Context } = require('../../util/context'); - +const openai = require('openai'); +const uuidv4 = require('uuid').v4; +const axios = require('axios'); /** * OllamaService class - Provides integration with Ollama's API for chat completions * Extends BaseService to implement the puter-chat-completion interface. @@ -32,17 +33,14 @@ const { Context } = require('../../util/context'); */ class OllamaService extends BaseService { static MODULES = { - openai: require('openai'), kv: globalThis.kv, - uuidv4: require('uuid').v4, - axios: require('axios'), }; /** * Gets the system prompt used for AI interactions * @returns {string} The base system prompt that identifies the AI as running on Puter */ - adapt_model(model) { + adapt_model (model) { return model; } @@ -51,16 +49,16 @@ class OllamaService extends BaseService { * @private * @returns {Promise} Resolves when initialization is complete */ - async _init() { + async _init () { // Ollama typically runs on HTTP, not HTTPS this.api_base_url = this.config?.api_base_url || 'http://localhost:11434'; // OpenAI SDK is used to interact with the Ollama API - this.openai = new this.modules.openai.OpenAI({ - apiKey: "ollama", // Ollama doesn't use an API key, it uses the "ollama" string - baseURL: this.api_base_url + '/v1', + this.openai = new openai.OpenAI({ + apiKey: 'ollama', // Ollama doesn't use an API key, it uses the "ollama" string + baseURL: `${this.api_base_url }/v1`, }); - this.kvkey = this.modules.uuidv4(); + this.kvkey = uuidv4(); const svc_aiChat = this.services.get('ai-chat'); svc_aiChat.register_provider({ @@ -74,7 +72,7 @@ class OllamaService extends BaseService { * Returns the default model identifier for the Ollama service * @returns {string} The default model ID 'gpt-oss:20b' */ - get_default_model() { + get_default_model () { return 'gpt-oss:20b'; } @@ -86,7 +84,7 @@ class OllamaService extends BaseService { * * @returns Promise> Array of model details */ - async models() { + async models () { return await this.models_(); }, /** @@ -95,7 +93,7 @@ class OllamaService extends BaseService { * @description Retrieves all available model IDs and their aliases, * flattening them into a single array of strings that can be used for model selection */ - async list() { + async list () { const models = await this.models_(); const model_names = []; for ( const model of models ) { @@ -108,7 +106,7 @@ class OllamaService extends BaseService { * AI Chat completion method. * See AIChatService for more details. */ - async complete({ messages, stream, model, tools, max_tokens, temperature }) { + async complete ({ messages, stream, model, tools, max_tokens, temperature }) { model = this.adapt_model(model); if ( model.startsWith('ollama:') ) { @@ -128,19 +126,23 @@ class OllamaService extends BaseService { ...(stream ? { stream_options: { include_usage: true }, } : {}), - } + }; const completion = await this.openai.chat.completions.create(sdk_params); - const modelDetails = (await this.models_()).find(m => m.id === 'ollama:' + model); + const modelDetails = (await this.models_()).find(m => m.id === `ollama:${model}`); return OpenAIUtil.handle_completion_output({ usage_calculator: ({ usage }) => { - // custom open router logic because its free + const trackedUsage = { - prompt: 0, - completion: 0, - input_cache_read: 0, + prompt: (usage.prompt_tokens ?? 1 ) - (usage.prompt_tokens_details?.cached_tokens ?? 0), + completion: usage.completion_tokens ?? 1, + input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0, }; + const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => { + return [k, 0]; // override to 0 since local is free + })); + this.meteringService.utilRecordUsageObject(trackedUsage, actor, modelDetails.id, { costOverwrites }); const legacyCostCalculator = OpenAIUtil.create_usage_calculator({ model_details: modelDetails, }); @@ -162,31 +164,30 @@ class OllamaService extends BaseService { * - cost: Pricing information object with currency and rates * @private */ - async models_(rawPriceKeys = false) { - const axios = this.require('axios'); + async models_ (_rawPriceKeys = false) { let models = this.modules.kv.get(`${this.kvkey}:models`); - if ( !models ) { + if ( ! models ) { try { const resp = await axios.request({ method: 'GET', - url: this.api_base_url + '/api/tags', + url: `${this.api_base_url}/api/tags`, }); models = resp.data.models || []; if ( models.length > 0 ) { this.modules.kv.set(`${this.kvkey}:models`, models); } - } catch (error) { + } catch ( error ) { this.log.error('Failed to fetch models from Ollama:', error.message); // Return empty array if Ollama is not available return []; } } - + if ( !models || models.length === 0 ) { return []; } - + const coerced_models = []; for ( const model of models ) { // Ollama API returns models with 'name' property, not 'model' @@ -196,8 +197,8 @@ class OllamaService extends BaseService { output: 0, }; coerced_models.push({ - id: 'ollama:' + modelName, - name: modelName + ' (Ollama)', + id: `ollama:${ modelName}`, + name: `${modelName} (Ollama)`, max_tokens: model.size || model.max_context || 8192, cost: { currency: 'usd-cents', @@ -206,7 +207,7 @@ class OllamaService extends BaseService { }, }); } - console.log("coerced_models", coerced_models); + console.log('coerced_models', coerced_models); return coerced_models; } } diff --git a/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs b/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs index 159d3ef43..5ca5c32ba 100644 --- a/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs +++ b/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs @@ -51,7 +51,7 @@ export class OpenAICompletionService { /** @type {import('../../../services/MeteringService/MeteringService.js').MeteringService} */ #meteringService; - constructor({ serviceName, config, globalConfig, aiChatService, meteringService, models = OPEN_AI_MODELS, defaultModel = 'gpt-5-nano' }) { + constructor ({ serviceName, config, globalConfig, aiChatService, meteringService, models = OPEN_AI_MODELS, defaultModel = 'gpt-5-nano' }) { this.#models = models; this.#defaultModel = defaultModel; this.#meteringService = meteringService; @@ -60,7 +60,7 @@ export class OpenAICompletionService { globalConfig?.services?.openai?.apiKey; // Fallback to the old format for backward compatibility - if ( !apiKey ) { + if ( ! apiKey ) { apiKey = config?.openai?.secret_key ?? globalConfig?.openai?.secret_key; @@ -70,7 +70,7 @@ export class OpenAICompletionService { 'Please use `services.openai.apiKey` instead.'); } - if ( !apiKey ) { + if ( ! apiKey ) { throw new Error('OpenAI API key is missing in configuration.'); } @@ -89,11 +89,11 @@ export class OpenAICompletionService { * Each model object includes an ID and cost details (currency, tokens, input/output rates). * @returns {{id: string, cost: {currency: string, tokens: number, input: number, output: number}}[]} */ - models() { + models () { return this.#models; } - list() { + list () { const models = this.models(); const model_names = []; for ( const model of models ) { @@ -105,11 +105,11 @@ export class OpenAICompletionService { return model_names; } - get_default_model(){ + get_default_model () { return this.#defaultModel; } - async complete({ messages, stream, model, tools, max_tokens, temperature, reasoning, text, reasoning_effort, verbosity }) { + async complete ({ messages, stream, model, tools, max_tokens, temperature, reasoning, text, reasoning_effort, verbosity }) { return await this.#complete(messages, { model: model, tools, @@ -131,20 +131,20 @@ export class OpenAICompletionService { * @property {boolean} flagged - Whether the content was flagged as inappropriate * @property {Object} results - Raw moderation results from OpenAI API */ - async checkModeration(text) { + async checkModeration (text) { // create moderation const results = await this.#openAi.moderations.create({ - model: "omni-moderation-latest", + model: 'omni-moderation-latest', input: text, }); let flagged = false; for ( const result of results?.results ?? [] ) { - + // OpenAI does a crazy amount of false positives. We filter by their 80% interval const veryFlaggedEntries = Object.entries(result.category_scores).filter(e => e[1] > 0.8); - if (veryFlaggedEntries.length > 0 ) { + if ( veryFlaggedEntries.length > 0 ) { flagged = true; break; } @@ -166,7 +166,7 @@ export class OpenAICompletionService { * @returns {Promise} The completion response containing message and usage info * @throws {Error} If messages are invalid or content is flagged by moderation */ - async #complete(messages, { + async #complete (messages, { stream, moderation, model, tools, temperature, max_tokens, reasoning, text, reasoning_effort, verbosity, diff --git a/src/backend/src/modules/puterai/OpenAiCompletionService/index.mjs b/src/backend/src/modules/puterai/OpenAiCompletionService/index.mjs index 73ab2dc9a..2f6f8c3b2 100644 --- a/src/backend/src/modules/puterai/OpenAiCompletionService/index.mjs +++ b/src/backend/src/modules/puterai/OpenAiCompletionService/index.mjs @@ -26,7 +26,7 @@ export class OpenAICompletionServiceWrapper extends BaseService { /** @type {OpenAICompletionService} */ openAICompletionService; - _init(){ + _init () { this.openAICompletionService = new OpenAICompletionService({ serviceName: this.service_name, config: this.config, @@ -36,12 +36,12 @@ export class OpenAICompletionServiceWrapper extends BaseService { }); } - async check_moderation(text) { + async check_moderation (text) { return await this.openAICompletionService.checkModeration(text); } - get_default_model() { - return this.openAICompletionService.get_default_model(); + get_default_model () { + return this.openAICompletionService.get_default_model(); } static IMPLEMENTS = { @@ -49,7 +49,7 @@ export class OpenAICompletionServiceWrapper extends BaseService { .filter(n => n !== 'constructor') .reduce((acc, fn) => ({ ...acc, - [fn]: async function(...a) { + [fn]: async function (...a) { return await this.openAICompletionService[fn](...a); }, }), {}), diff --git a/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs b/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs index f68bdf313..5de33c4bd 100644 --- a/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs +++ b/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs @@ -11,7 +11,7 @@ export const OPEN_AI_MODELS = [ }, max_tokens: 128000, }, - { + { id: 'gpt-5.1-codex', cost: { currency: 'usd-cents', diff --git a/src/backend/src/modules/puterai/OpenRouterService.js b/src/backend/src/modules/puterai/OpenRouterService.js index 520eb2692..2128e15b6 100644 --- a/src/backend/src/modules/puterai/OpenRouterService.js +++ b/src/backend/src/modules/puterai/OpenRouterService.js @@ -22,7 +22,9 @@ const APIError = require('../../api/APIError'); const BaseService = require('../../services/BaseService'); const OpenAIUtil = require('./lib/OpenAIUtil'); const { Context } = require('../../util/context'); - +const openai = require('openai'); +const uuidv4 = require('uuid').v4; +const axios = require('axios'); /** * XAIService class - Provides integration with X.AI's API for chat completions * Extends BaseService to implement the puter-chat-completion interface. @@ -32,17 +34,29 @@ const { Context } = require('../../util/context'); */ class OpenRouterService extends BaseService { static MODULES = { - openai: require('openai'), kv: globalThis.kv, - uuidv4: require('uuid').v4, - axios: require('axios'), + }; + + // TODO DS: extract this into driver wrapper like openAiService + static IMPLEMENTS = { + ['puter-chat-completion']: { + async models () { + return await this.models(); + }, + async list () { + return await this.list(); + }, + async complete (...params) { + return await this.complete(...params); + }, + }, }; /** * Gets the system prompt used for AI interactions * @returns {string} The base system prompt that identifies the AI as running on Puter */ - adapt_model(model) { + adapt_model (model) { return model; } @@ -54,13 +68,13 @@ class OpenRouterService extends BaseService { * @private * @returns {Promise} Resolves when initialization is complete */ - async _init() { + async _init () { this.api_base_url = 'https://openrouter.ai/api/v1'; - this.openai = new this.modules.openai.OpenAI({ + this.openai = new openai.OpenAI({ apiKey: this.config.apiKey, baseURL: this.api_base_url, }); - this.kvkey = this.modules.uuidv4(); + this.kvkey = uuidv4(); const svc_aiChat = this.services.get('ai-chat'); svc_aiChat.register_provider({ @@ -74,97 +88,81 @@ class OpenRouterService extends BaseService { * Returns the default model identifier for the XAI service * @returns {string} The default model ID 'grok-beta' */ - get_default_model() { - return 'grok-beta'; + get_default_model () { } - - static IMPLEMENTS = { - ['puter-chat-completion']: { - /** - * Returns a list of available models and their details. - * See AIChatService for more information. - * - * @returns Promise> Array of model details - */ - async models() { - return await this.models_(); - }, - /** + /** * Returns a list of available model names including their aliases * @returns {Promise} Array of model identifiers and their aliases * @description Retrieves all available model IDs and their aliases, * flattening them into a single array of strings that can be used for model selection */ - async list() { - const models = await this.models_(); - const model_names = []; - for ( const model of models ) { - model_names.push(model.id); - } - return model_names; - }, + async list () { + const models = await this.models(); + const model_names = []; + for ( const model of models ) { + model_names.push(model.id); + } + return model_names; + } - /** + /** * AI Chat completion method. * See AIChatService for more details. */ - async complete({ messages, stream, model, tools, max_tokens, temperature }) { - model = this.adapt_model(model); + async complete ({ messages, stream, model, tools, max_tokens, temperature }) { + model = this.adapt_model(model); - if ( model.startsWith('openrouter:') ) { - model = model.slice('openrouter:'.length); - } + if ( model.startsWith('openrouter:') ) { + model = model.slice('openrouter:'.length); + } - if ( model === 'openrouter/auto' ) { - throw APIError.create('field_invalid', null, { - key: 'model', - expected: 'allowed model', - got: 'disallowed model', - }); - } + if ( model === 'openrouter/auto' ) { + throw APIError.create('field_invalid', null, { + key: 'model', + expected: 'allowed model', + got: 'disallowed model', + }); + } - const actor = Context.get('actor'); + const actor = Context.get('actor'); - messages = await OpenAIUtil.process_input_messages(messages); - const sdk_params = { - messages, - model: model ?? this.get_default_model(), - ...(tools ? { tools } : {}), - max_tokens, - temperature: temperature, // default to 1.0 - stream, - ...(stream ? { - stream_options: { include_usage: true }, - } : {}), - } + messages = await OpenAIUtil.process_input_messages(messages); - const completion = await this.openai.chat.completions.create(sdk_params); + const completion = await this.openai.chat.completions.create({ + messages, + model: model ?? this.get_default_model(), + ...(tools ? { tools } : {}), + max_tokens, + temperature: temperature, // default to 1.0 + stream, + ...(stream ? { + stream_options: { include_usage: true }, + } : {}), + }); - const modelDetails = (await this.models_()).find(m => m.id === 'openrouter:' + model); - const rawPriceModelDetails = (await this.models_(true)).find(m => m.id === 'openrouter:' + model); - return OpenAIUtil.handle_completion_output({ - usage_calculator: ({ usage }) => { - // custom open router logic because they're pricing are weird - const trackedUsage = { - prompt: usage.prompt_tokens ?? 0, - completion: usage.completion_tokens ?? 0, - input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0, - }; - const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => { - return [k, rawPriceModelDetails.cost[k] * trackedUsage[k]]; - })); - this.meteringService.utilRecordUsageObject(trackedUsage, actor, modelDetails.id, costOverwrites); - const legacyCostCalculator = OpenAIUtil.create_usage_calculator({ - model_details: modelDetails, - }); - return legacyCostCalculator({ usage }); - }, - stream, - completion, + const modelDetails = (await this.models()).find(m => m.id === `openrouter:${ model}`); + const rawPriceModelDetails = (await this.models(true)).find(m => m.id === `openrouter:${ model}`); + return OpenAIUtil.handle_completion_output({ + usage_calculator: ({ usage }) => { + // custom open router logic because they're pricing are weird + const trackedUsage = { + prompt: (usage.prompt_tokens ?? 0 ) - (usage.prompt_tokens_details?.cached_tokens ?? 0), + completion: usage.completion_tokens ?? 0, + input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0, + }; + const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => { + return [k, rawPriceModelDetails.cost[k] * trackedUsage[k]]; + })); + this.meteringService.utilRecordUsageObject(trackedUsage, actor, modelDetails.id, costOverwrites); + const legacyCostCalculator = OpenAIUtil.create_usage_calculator({ + model_details: modelDetails, }); + return legacyCostCalculator({ usage }); }, - }, - }; + stream, + completion, + }); + } /** * Retrieves available AI models and their specifications @@ -173,19 +171,21 @@ class OpenRouterService extends BaseService { * - name: Human readable model name * - context: Maximum context window size * - cost: Pricing information object with currency and rates - * @private */ - async models_(rawPriceKeys = false) { - const axios = this.require('axios'); - + async models (rawPriceKeys = false) { let models = this.modules.kv.get(`${this.kvkey}:models`); - if ( !models ) { - const resp = await axios.request({ - method: 'GET', - url: this.api_base_url + '/models', - }); - models = resp.data.data; - this.modules.kv.set(`${this.kvkey}:models`, models); + if ( ! models ) { + try { + const resp = await axios.request({ + method: 'GET', + url: `${this.api_base_url}/models`, + }); + + models = resp.data.data; + this.modules.kv.set(`${this.kvkey}:models`, models); + } catch (e) { + console.log(e); + } } const coerced_models = []; for ( const model of models ) { @@ -194,8 +194,8 @@ class OpenRouterService extends BaseService { output: Math.round(model.pricing.completion * 1_000_000 * 100), }; coerced_models.push({ - id: 'openrouter:' + model.id, - name: model.name + ' (OpenRouter)', + id: `openrouter:${ model.id}`, + name: `${model.name } (OpenRouter)`, max_tokens: model.top_provider.max_completion_tokens, cost: { currency: 'usd-cents', diff --git a/src/backend/src/modules/puterai/lib/OpenAIUtil.js b/src/backend/src/modules/puterai/lib/OpenAIUtil.js index 52d830567..a73123d42 100644 --- a/src/backend/src/modules/puterai/lib/OpenAIUtil.js +++ b/src/backend/src/modules/puterai/lib/OpenAIUtil.js @@ -24,7 +24,7 @@ const process_input_messages = async (messages) => { const content_block = content[i]; if ( content_block.type === 'tool_use' ) { - if ( !msg.tool_calls ) { + if ( ! msg.tool_calls ) { msg.tool_calls = []; is_tool_call = true; } @@ -108,12 +108,6 @@ const create_chat_stream_handler = ({ let last_usage = null; for await ( let chunk of completion ) { chunk = deviations.chunk_but_like_actually(chunk); - if ( process.env.DEBUG ) { - const delta = chunk?.choices?.[0]?.delta; - console.log(`AI CHUNK`, - chunk, - delta && JSON.stringify(delta)); - } const chunk_usage = deviations.index_usage_from_stream_chunk(chunk); if ( chunk_usage ) last_usage = chunk_usage; if ( chunk.choices.length < 1 ) continue; @@ -127,7 +121,7 @@ const create_chat_stream_handler = ({ // A: For now, reasoning_content and content never appear together, but I’m not sure if they’ll always be mutually exclusive. } - if ( choice.delta.content ){ + if ( choice.delta.content ) { if ( mode === 'tool' ) { toolblock.end(); mode = 'text'; @@ -205,7 +199,7 @@ const handle_completion_output = async ({ if ( finally_fn ) await finally_fn(); const is_empty = completion.choices?.[0]?.message?.content?.trim() === ''; - if ( is_empty && ! completion.choices?.[0]?.message?.tool_calls ) { + if ( is_empty && !completion.choices?.[0]?.message?.tool_calls ) { // GPT refuses to generate an empty response if you ask it to, // so this will probably only happen on an error condition. throw new Error('an empty response was generated'); diff --git a/src/backend/src/services/MeteringService/MeteringService.ts b/src/backend/src/services/MeteringService/MeteringService.ts index 7a4d832cd..273515bfe 100644 --- a/src/backend/src/services/MeteringService/MeteringService.ts +++ b/src/backend/src/services/MeteringService/MeteringService.ts @@ -353,7 +353,7 @@ export class MeteringService { } async getActorCurrentMonthUsageDetails (actor: Actor) { - if ( !actor.type?.user?.uuid ) { + if ( ! actor.type?.user?.uuid ) { throw new Error('Actor must be a user to get usage details'); } // batch get actor usage, per app usage, and actor app totals for the month @@ -375,7 +375,7 @@ export class MeteringService { filteredAppTotals[appKey] = appUsage; } else { Object.entries(appUsage).forEach(([usageKind, amount]) => { - if ( !othersTotal[usageKind as keyof AppTotals] ) { + if ( ! othersTotal[usageKind as keyof AppTotals] ) { othersTotal[usageKind as keyof AppTotals] = 0; } othersTotal[usageKind as keyof AppTotals] += amount; @@ -398,7 +398,7 @@ export class MeteringService { } async getActorCurrentMonthAppUsageDetails (actor: Actor, appId?: string) { - if ( !actor.type?.user?.uuid ) { + if ( ! actor.type?.user?.uuid ) { throw new Error('Actor must be a user to get usage details'); } appId = appId || actor.type?.app?.uid || GLOBAL_APP_KEY; @@ -453,7 +453,7 @@ export class MeteringService { async getActorSubscription (actor: Actor): Promise<(typeof SUB_POLICIES)[number]> { // TODO DS: maybe allow non-user actors to have subscriptions eventually - if ( !actor.type?.user.uuid ) { + if ( ! actor.type?.user.uuid ) { throw new Error('Actor must be a user to get policy'); } @@ -476,7 +476,7 @@ export class MeteringService { } async getActorAddons (actor: Actor) { - if ( !actor.type?.user?.uuid ) { + if ( ! actor.type?.user?.uuid ) { throw new Error('Actor must be a user to get policy addons'); } const key = `${POLICY_PREFIX}:actor:${actor.type.user?.uuid}:addons`; @@ -487,7 +487,7 @@ export class MeteringService { } async getActorAppUsage (actor: Actor, appId: string) { - if ( !actor.type?.user?.uuid ) { + if ( ! actor.type?.user?.uuid ) { throw new Error('Actor must be a user to get app usage'); } @@ -522,7 +522,7 @@ export class MeteringService { aggregatedUsage.total += total || 0; Object.entries((usage || {}) as Record).forEach(([usageKind, record]) => { - if ( !aggregatedUsage[usageKind] ) { + if ( ! aggregatedUsage[usageKind] ) { aggregatedUsage[usageKind] = { cost: 0, units: 0, count: 0 } as UsageRecord; } const aggregatedRecord = aggregatedUsage[usageKind] as UsageRecord; @@ -535,8 +535,8 @@ export class MeteringService { }); } - async updateAddonCredit (userId:string, tokenAmount: number) { - if ( !userId ) { + async updateAddonCredit (userId: string, tokenAmount: number) { + if ( ! userId ) { throw new Error('User needed to update extra credits'); } const key = `${POLICY_PREFIX}:actor:${userId}:addons`; diff --git a/src/backend/src/services/MeteringService/costMaps/openrouterCostMap.ts b/src/backend/src/services/MeteringService/costMaps/openrouterCostMap.ts index 8785cf274..2117ece59 100644 --- a/src/backend/src/services/MeteringService/costMaps/openrouterCostMap.ts +++ b/src/backend/src/services/MeteringService/costMaps/openrouterCostMap.ts @@ -1,15 +1,71 @@ export const OPENROUTER_COST_MAP = { - + 'openrouter:google/gemini-3-pro-preview:prompt': 200, + 'openrouter:google/gemini-3-pro-preview:completion': 1200, + 'openrouter:google/gemini-3-pro-preview:image': 825600, + 'openrouter:google/gemini-3-pro-preview:input_cache_read': 20, + 'openrouter:google/gemini-3-pro-preview:input_cache_write': 238, + 'openrouter:deepcogito/cogito-v2.1-671b:prompt': 125, + 'openrouter:deepcogito/cogito-v2.1-671b:completion': 125, + 'openrouter:openai/gpt-5.1:prompt': 125, + 'openrouter:openai/gpt-5.1:completion': 1000, + 'openrouter:openai/gpt-5.1:web_search': 1000000, + 'openrouter:openai/gpt-5.1:input_cache_read': 12, + 'openrouter:openai/gpt-5.1-chat:prompt': 125, + 'openrouter:openai/gpt-5.1-chat:completion': 1000, + 'openrouter:openai/gpt-5.1-chat:web_search': 1000000, + 'openrouter:openai/gpt-5.1-chat:input_cache_read': 12, + 'openrouter:openai/gpt-5.1-codex:prompt': 125, + 'openrouter:openai/gpt-5.1-codex:completion': 1000, + 'openrouter:openai/gpt-5.1-codex:input_cache_read': 12, + 'openrouter:openai/gpt-5.1-codex-mini:prompt': 25, + 'openrouter:openai/gpt-5.1-codex-mini:completion': 200, + 'openrouter:openai/gpt-5.1-codex-mini:input_cache_read': 3, + 'openrouter:moonshotai/kimi-linear-48b-a3b-instruct:prompt': 50, + 'openrouter:moonshotai/kimi-linear-48b-a3b-instruct:completion': 60, + 'openrouter:moonshotai/kimi-k2-thinking:prompt': 45, + 'openrouter:moonshotai/kimi-k2-thinking:completion': 235, + 'openrouter:amazon/nova-premier-v1:prompt': 250, + 'openrouter:amazon/nova-premier-v1:completion': 1250, + 'openrouter:amazon/nova-premier-v1:input_cache_read': 63, + 'openrouter:perplexity/sonar-pro-search:prompt': 300, + 'openrouter:perplexity/sonar-pro-search:completion': 1500, + 'openrouter:perplexity/sonar-pro-search:request': 1800000, + 'openrouter:mistralai/voxtral-small-24b-2507:prompt': 10, + 'openrouter:mistralai/voxtral-small-24b-2507:completion': 30, + 'openrouter:mistralai/voxtral-small-24b-2507:audio': 10000, + 'openrouter:openai/gpt-oss-safeguard-20b:prompt': 7, + 'openrouter:openai/gpt-oss-safeguard-20b:completion': 30, + 'openrouter:openai/gpt-oss-safeguard-20b:input_cache_read': 4, + 'openrouter:nvidia/nemotron-nano-12b-v2-vl:prompt': 20, + 'openrouter:nvidia/nemotron-nano-12b-v2-vl:completion': 60, + 'openrouter:minimax/minimax-m2:prompt': 26, + 'openrouter:minimax/minimax-m2:completion': 102, + 'openrouter:liquid/lfm2-8b-a1b:prompt': 5, + 'openrouter:liquid/lfm2-8b-a1b:completion': 10, + 'openrouter:liquid/lfm-2.2-6b:prompt': 5, + 'openrouter:liquid/lfm-2.2-6b:completion': 10, + 'openrouter:ibm-granite/granite-4.0-h-micro:prompt': 2, + 'openrouter:ibm-granite/granite-4.0-h-micro:completion': 11, + 'openrouter:deepcogito/cogito-v2-preview-llama-405b:prompt': 350, + 'openrouter:deepcogito/cogito-v2-preview-llama-405b:completion': 350, + 'openrouter:openai/gpt-5-image-mini:prompt': 250, + 'openrouter:openai/gpt-5-image-mini:completion': 200, + 'openrouter:openai/gpt-5-image-mini:image': 250, + 'openrouter:openai/gpt-5-image-mini:web_search': 1000000, + 'openrouter:openai/gpt-5-image-mini:input_cache_read': 25, 'openrouter:anthropic/claude-haiku-4.5:prompt': 100, 'openrouter:anthropic/claude-haiku-4.5:completion': 500, 'openrouter:anthropic/claude-haiku-4.5:input_cache_read': 10, 'openrouter:anthropic/claude-haiku-4.5:input_cache_write': 125, 'openrouter:qwen/qwen3-vl-8b-thinking:prompt': 18, 'openrouter:qwen/qwen3-vl-8b-thinking:completion': 210, - 'openrouter:qwen/qwen3-vl-8b-instruct:prompt': 18, - 'openrouter:qwen/qwen3-vl-8b-instruct:completion': 69, - 'openrouter:inclusionai/ling-1t:prompt': 100, - 'openrouter:inclusionai/ling-1t:completion': 300, + 'openrouter:qwen/qwen3-vl-8b-instruct:prompt': 8, + 'openrouter:qwen/qwen3-vl-8b-instruct:completion': 50, + 'openrouter:openai/gpt-5-image:prompt': 1000, + 'openrouter:openai/gpt-5-image:completion': 1000, + 'openrouter:openai/gpt-5-image:image': 1000, + 'openrouter:openai/gpt-5-image:web_search': 1000000, + 'openrouter:openai/gpt-5-image:input_cache_read': 125, 'openrouter:openai/o3-deep-research:prompt': 1000, 'openrouter:openai/o3-deep-research:completion': 4000, 'openrouter:openai/o3-deep-research:image': 765000, @@ -27,16 +83,21 @@ export const OPENROUTER_COST_MAP = { 'openrouter:google/gemini-2.5-flash-image:prompt': 30, 'openrouter:google/gemini-2.5-flash-image:completion': 250, 'openrouter:google/gemini-2.5-flash-image:image': 123800, - 'openrouter:qwen/qwen3-vl-30b-a3b-thinking:prompt': 29, + 'openrouter:qwen/qwen3-vl-30b-a3b-thinking:prompt': 20, 'openrouter:qwen/qwen3-vl-30b-a3b-thinking:completion': 100, - 'openrouter:qwen/qwen3-vl-30b-a3b-instruct:prompt': 29, - 'openrouter:qwen/qwen3-vl-30b-a3b-instruct:completion': 99, + 'openrouter:qwen/qwen3-vl-30b-a3b-instruct:prompt': 15, + 'openrouter:qwen/qwen3-vl-30b-a3b-instruct:completion': 60, 'openrouter:openai/gpt-5-pro:prompt': 1500, 'openrouter:openai/gpt-5-pro:completion': 12000, - 'openrouter:z-ai/glm-4.6:prompt': 50, + 'openrouter:openai/gpt-5-pro:web_search': 1000000, + 'openrouter:z-ai/glm-4.6:prompt': 40, 'openrouter:z-ai/glm-4.6:completion': 175, + 'openrouter:z-ai/glm-4.6:exacto:prompt': 45, + 'openrouter:z-ai/glm-4.6:exacto:completion': 190, 'openrouter:anthropic/claude-sonnet-4.5:prompt': 300, 'openrouter:anthropic/claude-sonnet-4.5:completion': 1500, + 'openrouter:anthropic/claude-sonnet-4.5:input_cache_read': 30, + 'openrouter:anthropic/claude-sonnet-4.5:input_cache_write': 375, 'openrouter:deepseek/deepseek-v3.2-exp:prompt': 27, 'openrouter:deepseek/deepseek-v3.2-exp:completion': 40, 'openrouter:thedrummer/cydonia-24b-v4.1:prompt': 30, @@ -46,14 +107,15 @@ export const OPENROUTER_COST_MAP = { 'openrouter:google/gemini-2.5-flash-preview-09-2025:prompt': 30, 'openrouter:google/gemini-2.5-flash-preview-09-2025:completion': 250, 'openrouter:google/gemini-2.5-flash-preview-09-2025:image': 123800, + 'openrouter:google/gemini-2.5-flash-preview-09-2025:audio': 100, 'openrouter:google/gemini-2.5-flash-preview-09-2025:input_cache_read': 7, 'openrouter:google/gemini-2.5-flash-preview-09-2025:input_cache_write': 38, 'openrouter:google/gemini-2.5-flash-lite-preview-09-2025:prompt': 10, 'openrouter:google/gemini-2.5-flash-lite-preview-09-2025:completion': 40, - 'openrouter:qwen/qwen3-vl-235b-a22b-thinking:prompt': 45, - 'openrouter:qwen/qwen3-vl-235b-a22b-thinking:completion': 350, - 'openrouter:qwen/qwen3-vl-235b-a22b-instruct:prompt': 30, - 'openrouter:qwen/qwen3-vl-235b-a22b-instruct:completion': 120, + 'openrouter:qwen/qwen3-vl-235b-a22b-thinking:prompt': 30, + 'openrouter:qwen/qwen3-vl-235b-a22b-thinking:completion': 120, + 'openrouter:qwen/qwen3-vl-235b-a22b-instruct:prompt': 21, + 'openrouter:qwen/qwen3-vl-235b-a22b-instruct:completion': 190, 'openrouter:qwen/qwen3-max:prompt': 120, 'openrouter:qwen/qwen3-max:completion': 600, 'openrouter:qwen/qwen3-max:input_cache_read': 24, @@ -65,6 +127,8 @@ export const OPENROUTER_COST_MAP = { 'openrouter:openai/gpt-5-codex:input_cache_read': 12, 'openrouter:deepseek/deepseek-v3.1-terminus:prompt': 23, 'openrouter:deepseek/deepseek-v3.1-terminus:completion': 90, + 'openrouter:deepseek/deepseek-v3.1-terminus:exacto:prompt': 27, + 'openrouter:deepseek/deepseek-v3.1-terminus:exacto:completion': 100, 'openrouter:x-ai/grok-4-fast:prompt': 20, 'openrouter:x-ai/grok-4-fast:completion': 50, 'openrouter:x-ai/grok-4-fast:input_cache_read': 5, @@ -77,7 +141,7 @@ export const OPENROUTER_COST_MAP = { 'openrouter:arcee-ai/afm-4.5b:completion': 15, 'openrouter:opengvlab/internvl3-78b:prompt': 7, 'openrouter:opengvlab/internvl3-78b:completion': 26, - 'openrouter:qwen/qwen3-next-80b-a3b-thinking:prompt': 14, + 'openrouter:qwen/qwen3-next-80b-a3b-thinking:prompt': 15, 'openrouter:qwen/qwen3-next-80b-a3b-thinking:completion': 120, 'openrouter:qwen/qwen3-next-80b-a3b-instruct:prompt': 10, 'openrouter:qwen/qwen3-next-80b-a3b-instruct:completion': 80, @@ -91,14 +155,18 @@ export const OPENROUTER_COST_MAP = { 'openrouter:nvidia/nemotron-nano-9b-v2:completion': 16, 'openrouter:moonshotai/kimi-k2-0905:prompt': 39, 'openrouter:moonshotai/kimi-k2-0905:completion': 190, + 'openrouter:moonshotai/kimi-k2-0905:exacto:prompt': 60, + 'openrouter:moonshotai/kimi-k2-0905:exacto:completion': 250, + 'openrouter:deepcogito/cogito-v2-preview-llama-70b:prompt': 88, + 'openrouter:deepcogito/cogito-v2-preview-llama-70b:completion': 88, 'openrouter:deepcogito/cogito-v2-preview-llama-109b-moe:prompt': 18, 'openrouter:deepcogito/cogito-v2-preview-llama-109b-moe:completion': 59, 'openrouter:deepcogito/cogito-v2-preview-deepseek-671b:prompt': 125, 'openrouter:deepcogito/cogito-v2-preview-deepseek-671b:completion': 125, 'openrouter:stepfun-ai/step3:prompt': 57, 'openrouter:stepfun-ai/step3:completion': 142, - 'openrouter:qwen/qwen3-30b-a3b-thinking-2507:prompt': 8, - 'openrouter:qwen/qwen3-30b-a3b-thinking-2507:completion': 29, + 'openrouter:qwen/qwen3-30b-a3b-thinking-2507:prompt': 5, + 'openrouter:qwen/qwen3-30b-a3b-thinking-2507:completion': 34, 'openrouter:x-ai/grok-code-fast-1:prompt': 20, 'openrouter:x-ai/grok-code-fast-1:completion': 150, 'openrouter:x-ai/grok-code-fast-1:input_cache_read': 2, @@ -129,20 +197,8 @@ export const OPENROUTER_COST_MAP = { 'openrouter:ai21/jamba-large-1.7:completion': 800, 'openrouter:openai/gpt-5-chat:prompt': 125, 'openrouter:openai/gpt-5-chat:completion': 1000, + 'openrouter:openai/gpt-5-chat:web_search': 1000000, 'openrouter:openai/gpt-5-chat:input_cache_read': 12, - 'openrouter:openai/gpt-5.1:prompt': 125, - 'openrouter:openai/gpt-5.1:completion': 1000, - 'openrouter:openai/gpt-5.1:web_search': 1000000, - 'openrouter:openai/gpt-5.1:input_cache_read': 12, - 'openrouter:openai/gpt-5.1-codex:prompt': 125, - 'openrouter:openai/gpt-5.1-codex:completion': 1000, - 'openrouter:openai/gpt-5.1-codex:input_cache_read': 12, - 'openrouter:openai/gpt-5.1-codex-mini:prompt': 25, - 'openrouter:openai/gpt-5.1-codex-mini:completion': 200, - 'openrouter:openai/gpt-5.1-codex-mini:input_cache_read': 3, - 'openrouter:openai/gpt-5.1-chat-latest:prompt': 125, - 'openrouter:openai/gpt-5.1-chat-latest:completion': 1000, - 'openrouter:openai/gpt-5.1-chat-latest:input_cache_read': 12, 'openrouter:openai/gpt-5:prompt': 125, 'openrouter:openai/gpt-5:completion': 1000, 'openrouter:openai/gpt-5:web_search': 1000000, @@ -157,6 +213,8 @@ export const OPENROUTER_COST_MAP = { 'openrouter:openai/gpt-5-nano:input_cache_read': 1, 'openrouter:openai/gpt-oss-120b:prompt': 4, 'openrouter:openai/gpt-oss-120b:completion': 40, + 'openrouter:openai/gpt-oss-120b:exacto:prompt': 5, + 'openrouter:openai/gpt-oss-120b:exacto:completion': 24, 'openrouter:openai/gpt-oss-20b:prompt': 3, 'openrouter:openai/gpt-oss-20b:completion': 14, 'openrouter:anthropic/claude-opus-4.1:prompt': 1500, @@ -171,15 +229,17 @@ export const OPENROUTER_COST_MAP = { 'openrouter:qwen/qwen3-30b-a3b-instruct-2507:prompt': 8, 'openrouter:qwen/qwen3-30b-a3b-instruct-2507:completion': 33, 'openrouter:z-ai/glm-4.5:prompt': 35, - 'openrouter:z-ai/glm-4.5:completion': 155, - 'openrouter:z-ai/glm-4.5-air:prompt': 14, - 'openrouter:z-ai/glm-4.5-air:completion': 86, + 'openrouter:z-ai/glm-4.5:completion': 150, + 'openrouter:z-ai/glm-4.5-air:prompt': 13, + 'openrouter:z-ai/glm-4.5-air:completion': 85, 'openrouter:qwen/qwen3-235b-a22b-thinking-2507:prompt': 11, 'openrouter:qwen/qwen3-235b-a22b-thinking-2507:completion': 60, 'openrouter:z-ai/glm-4-32b:prompt': 10, 'openrouter:z-ai/glm-4-32b:completion': 10, 'openrouter:qwen/qwen3-coder:prompt': 22, 'openrouter:qwen/qwen3-coder:completion': 95, + 'openrouter:qwen/qwen3-coder:exacto:prompt': 38, + 'openrouter:qwen/qwen3-coder:exacto:completion': 153, 'openrouter:bytedance/ui-tars-1.5-7b:prompt': 10, 'openrouter:bytedance/ui-tars-1.5-7b:completion': 20, 'openrouter:google/gemini-2.5-flash-lite:prompt': 10, @@ -190,8 +250,8 @@ export const OPENROUTER_COST_MAP = { 'openrouter:qwen/qwen3-235b-a22b-2507:completion': 55, 'openrouter:switchpoint/router:prompt': 85, 'openrouter:switchpoint/router:completion': 340, - 'openrouter:moonshotai/kimi-k2:prompt': 14, - 'openrouter:moonshotai/kimi-k2:completion': 249, + 'openrouter:moonshotai/kimi-k2:prompt': 50, + 'openrouter:moonshotai/kimi-k2:completion': 240, 'openrouter:thudm/glm-4.1v-9b-thinking:prompt': 4, 'openrouter:thudm/glm-4.1v-9b-thinking:completion': 14, 'openrouter:mistralai/devstral-medium:prompt': 40, @@ -201,8 +261,8 @@ export const OPENROUTER_COST_MAP = { 'openrouter:x-ai/grok-4:prompt': 300, 'openrouter:x-ai/grok-4:completion': 1500, 'openrouter:x-ai/grok-4:input_cache_read': 75, - 'openrouter:tencent/hunyuan-a13b-instruct:prompt': 3, - 'openrouter:tencent/hunyuan-a13b-instruct:completion': 3, + 'openrouter:tencent/hunyuan-a13b-instruct:prompt': 14, + 'openrouter:tencent/hunyuan-a13b-instruct:completion': 57, 'openrouter:tngtech/deepseek-r1t2-chimera:prompt': 30, 'openrouter:tngtech/deepseek-r1t2-chimera:completion': 120, 'openrouter:morph/morph-v3-large:prompt': 90, @@ -221,11 +281,6 @@ export const OPENROUTER_COST_MAP = { 'openrouter:mistralai/mistral-small-3.2-24b-instruct:completion': 18, 'openrouter:minimax/minimax-m1:prompt': 40, 'openrouter:minimax/minimax-m1:completion': 220, - 'openrouter:google/gemini-2.5-flash-lite-preview-06-17:prompt': 10, - 'openrouter:google/gemini-2.5-flash-lite-preview-06-17:completion': 40, - 'openrouter:google/gemini-2.5-flash-lite-preview-06-17:audio': 30, - 'openrouter:google/gemini-2.5-flash-lite-preview-06-17:input_cache_read': 3, - 'openrouter:google/gemini-2.5-flash-lite-preview-06-17:input_cache_write': 18, 'openrouter:google/gemini-2.5-flash:prompt': 30, 'openrouter:google/gemini-2.5-flash:completion': 250, 'openrouter:google/gemini-2.5-flash:image': 123800, @@ -250,19 +305,19 @@ export const OPENROUTER_COST_MAP = { 'openrouter:x-ai/grok-3:input_cache_read': 75, 'openrouter:mistralai/magistral-small-2506:prompt': 50, 'openrouter:mistralai/magistral-small-2506:completion': 150, - 'openrouter:mistralai/magistral-medium-2506:prompt': 200, - 'openrouter:mistralai/magistral-medium-2506:completion': 500, 'openrouter:mistralai/magistral-medium-2506:thinking:prompt': 200, 'openrouter:mistralai/magistral-medium-2506:thinking:completion': 500, + 'openrouter:mistralai/magistral-medium-2506:prompt': 200, + 'openrouter:mistralai/magistral-medium-2506:completion': 500, 'openrouter:google/gemini-2.5-pro-preview:prompt': 125, 'openrouter:google/gemini-2.5-pro-preview:completion': 1000, 'openrouter:google/gemini-2.5-pro-preview:image': 516000, 'openrouter:google/gemini-2.5-pro-preview:input_cache_read': 31, 'openrouter:google/gemini-2.5-pro-preview:input_cache_write': 163, - 'openrouter:deepseek/deepseek-r1-0528-qwen3-8b:prompt': 3, - 'openrouter:deepseek/deepseek-r1-0528-qwen3-8b:completion': 11, - 'openrouter:deepseek/deepseek-r1-0528:prompt': 40, - 'openrouter:deepseek/deepseek-r1-0528:completion': 175, + 'openrouter:deepseek/deepseek-r1-0528-qwen3-8b:prompt': 2, + 'openrouter:deepseek/deepseek-r1-0528-qwen3-8b:completion': 10, + 'openrouter:deepseek/deepseek-r1-0528:prompt': 20, + 'openrouter:deepseek/deepseek-r1-0528:completion': 450, 'openrouter:anthropic/claude-opus-4:prompt': 1500, 'openrouter:anthropic/claude-opus-4:completion': 7500, 'openrouter:anthropic/claude-opus-4:image': 2400000, @@ -273,8 +328,8 @@ export const OPENROUTER_COST_MAP = { 'openrouter:anthropic/claude-sonnet-4:image': 480000, 'openrouter:anthropic/claude-sonnet-4:input_cache_read': 30, 'openrouter:anthropic/claude-sonnet-4:input_cache_write': 375, - 'openrouter:mistralai/devstral-small-2505:prompt': 5, - 'openrouter:mistralai/devstral-small-2505:completion': 22, + 'openrouter:mistralai/devstral-small-2505:prompt': 6, + 'openrouter:mistralai/devstral-small-2505:completion': 12, 'openrouter:google/gemma-3n-e4b-it:prompt': 2, 'openrouter:google/gemma-3n-e4b-it:completion': 4, 'openrouter:openai/codex-mini:prompt': 150, @@ -319,8 +374,6 @@ export const OPENROUTER_COST_MAP = { 'openrouter:tngtech/deepseek-r1t-chimera:completion': 120, 'openrouter:microsoft/mai-ds-r1:prompt': 30, 'openrouter:microsoft/mai-ds-r1:completion': 120, - 'openrouter:thudm/glm-z1-32b:prompt': 5, - 'openrouter:thudm/glm-z1-32b:completion': 22, 'openrouter:openai/o4-mini-high:prompt': 110, 'openrouter:openai/o4-mini-high:completion': 440, 'openrouter:openai/o4-mini-high:image': 84150, @@ -336,8 +389,6 @@ export const OPENROUTER_COST_MAP = { 'openrouter:openai/o4-mini:image': 84150, 'openrouter:openai/o4-mini:web_search': 1000000, 'openrouter:openai/o4-mini:input_cache_read': 28, - 'openrouter:shisa-ai/shisa-v2-llama3.3-70b:prompt': 5, - 'openrouter:shisa-ai/shisa-v2-llama3.3-70b:completion': 22, 'openrouter:qwen/qwen2.5-coder-7b-instruct:prompt': 3, 'openrouter:qwen/qwen2.5-coder-7b-instruct:completion': 9, 'openrouter:openai/gpt-4.1:prompt': 200, @@ -358,8 +409,6 @@ export const OPENROUTER_COST_MAP = { 'openrouter:alfredpros/codellama-7b-instruct-solidity:completion': 120, 'openrouter:arliai/qwq-32b-arliai-rpr-v1:prompt': 3, 'openrouter:arliai/qwq-32b-arliai-rpr-v1:completion': 11, - 'openrouter:agentica-org/deepcoder-14b-preview:prompt': 1, - 'openrouter:agentica-org/deepcoder-14b-preview:completion': 1, 'openrouter:x-ai/grok-3-mini-beta:prompt': 30, 'openrouter:x-ai/grok-3-mini-beta:completion': 50, 'openrouter:x-ai/grok-3-mini-beta:input_cache_read': 7, @@ -374,8 +423,6 @@ export const OPENROUTER_COST_MAP = { 'openrouter:meta-llama/llama-4-scout:prompt': 8, 'openrouter:meta-llama/llama-4-scout:completion': 30, 'openrouter:meta-llama/llama-4-scout:image': 33420, - 'openrouter:allenai/molmo-7b-d:prompt': 10, - 'openrouter:allenai/molmo-7b-d:completion': 20, 'openrouter:qwen/qwen2.5-vl-32b-instruct:prompt': 5, 'openrouter:qwen/qwen2.5-vl-32b-instruct:completion': 22, 'openrouter:deepseek/deepseek-chat-v3-0324:prompt': 24, @@ -384,7 +431,7 @@ export const OPENROUTER_COST_MAP = { 'openrouter:openai/o1-pro:completion': 60000, 'openrouter:openai/o1-pro:image': 21675000, 'openrouter:mistralai/mistral-small-3.1-24b-instruct:prompt': 5, - 'openrouter:mistralai/mistral-small-3.1-24b-instruct:completion': 10, + 'openrouter:mistralai/mistral-small-3.1-24b-instruct:completion': 22, 'openrouter:allenai/olmo-2-0325-32b-instruct:prompt': 20, 'openrouter:allenai/olmo-2-0325-32b-instruct:completion': 35, 'openrouter:google/gemma-3-4b-it:prompt': 2, @@ -401,11 +448,10 @@ export const OPENROUTER_COST_MAP = { 'openrouter:openai/gpt-4o-search-preview:completion': 1000, 'openrouter:openai/gpt-4o-search-preview:request': 3500000, 'openrouter:openai/gpt-4o-search-preview:image': 361300, - 'openrouter:google/gemma-3-27b-it:prompt': 9, - 'openrouter:google/gemma-3-27b-it:completion': 16, - 'openrouter:google/gemma-3-27b-it:image': 2560, - 'openrouter:thedrummer/skyfall-36b-v2:prompt': 8, - 'openrouter:thedrummer/skyfall-36b-v2:completion': 33, + 'openrouter:google/gemma-3-27b-it:prompt': 7, + 'openrouter:google/gemma-3-27b-it:completion': 50, + 'openrouter:thedrummer/skyfall-36b-v2:prompt': 50, + 'openrouter:thedrummer/skyfall-36b-v2:completion': 80, 'openrouter:microsoft/phi-4-multimodal-instruct:prompt': 5, 'openrouter:microsoft/phi-4-multimodal-instruct:completion': 10, 'openrouter:microsoft/phi-4-multimodal-instruct:image': 17685, @@ -421,24 +467,20 @@ export const OPENROUTER_COST_MAP = { 'openrouter:perplexity/sonar-deep-research:internal_reasoning': 300, 'openrouter:qwen/qwq-32b:prompt': 15, 'openrouter:qwen/qwq-32b:completion': 40, - 'openrouter:nousresearch/deephermes-3-llama-3-8b-preview:prompt': 3, - 'openrouter:nousresearch/deephermes-3-llama-3-8b-preview:completion': 11, 'openrouter:google/gemini-2.0-flash-lite-001:prompt': 7, 'openrouter:google/gemini-2.0-flash-lite-001:completion': 30, - 'openrouter:anthropic/claude-3.7-sonnet:prompt': 300, - 'openrouter:anthropic/claude-3.7-sonnet:completion': 1500, - 'openrouter:anthropic/claude-3.7-sonnet:image': 480000, - 'openrouter:anthropic/claude-3.7-sonnet:input_cache_read': 30, - 'openrouter:anthropic/claude-3.7-sonnet:input_cache_write': 375, 'openrouter:anthropic/claude-3.7-sonnet:thinking:prompt': 300, 'openrouter:anthropic/claude-3.7-sonnet:thinking:completion': 1500, 'openrouter:anthropic/claude-3.7-sonnet:thinking:image': 480000, 'openrouter:anthropic/claude-3.7-sonnet:thinking:input_cache_read': 30, 'openrouter:anthropic/claude-3.7-sonnet:thinking:input_cache_write': 375, + 'openrouter:anthropic/claude-3.7-sonnet:prompt': 300, + 'openrouter:anthropic/claude-3.7-sonnet:completion': 1500, + 'openrouter:anthropic/claude-3.7-sonnet:image': 480000, + 'openrouter:anthropic/claude-3.7-sonnet:input_cache_read': 30, + 'openrouter:anthropic/claude-3.7-sonnet:input_cache_write': 375, 'openrouter:mistralai/mistral-saba:prompt': 20, 'openrouter:mistralai/mistral-saba:completion': 60, - 'openrouter:cognitivecomputations/dolphin3.0-mistral-24b:prompt': 4, - 'openrouter:cognitivecomputations/dolphin3.0-mistral-24b:completion': 17, 'openrouter:meta-llama/llama-guard-3-8b:prompt': 2, 'openrouter:meta-llama/llama-guard-3-8b:completion': 6, 'openrouter:openai/o3-mini-high:prompt': 110, @@ -488,14 +530,10 @@ export const OPENROUTER_COST_MAP = { 'openrouter:perplexity/sonar:prompt': 100, 'openrouter:perplexity/sonar:completion': 100, 'openrouter:perplexity/sonar:request': 500000, - 'openrouter:liquid/lfm-7b:prompt': 1, - 'openrouter:liquid/lfm-7b:completion': 1, - 'openrouter:liquid/lfm-3b:prompt': 2, - 'openrouter:liquid/lfm-3b:completion': 2, 'openrouter:deepseek/deepseek-r1-distill-llama-70b:prompt': 3, 'openrouter:deepseek/deepseek-r1-distill-llama-70b:completion': 13, - 'openrouter:deepseek/deepseek-r1:prompt': 40, - 'openrouter:deepseek/deepseek-r1:completion': 200, + 'openrouter:deepseek/deepseek-r1:prompt': 30, + 'openrouter:deepseek/deepseek-r1:completion': 120, 'openrouter:minimax/minimax-01:prompt': 20, 'openrouter:minimax/minimax-01:completion': 110, 'openrouter:mistralai/codestral-2501:prompt': 30, @@ -505,7 +543,7 @@ export const OPENROUTER_COST_MAP = { 'openrouter:sao10k/l3.1-70b-hanami-x1:prompt': 300, 'openrouter:sao10k/l3.1-70b-hanami-x1:completion': 300, 'openrouter:deepseek/deepseek-chat:prompt': 30, - 'openrouter:deepseek/deepseek-chat:completion': 85, + 'openrouter:deepseek/deepseek-chat:completion': 120, 'openrouter:sao10k/l3.3-euryale-70b:prompt': 65, 'openrouter:sao10k/l3.3-euryale-70b:completion': 75, 'openrouter:openai/o1:prompt': 1500, @@ -515,7 +553,7 @@ export const OPENROUTER_COST_MAP = { 'openrouter:cohere/command-r7b-12-2024:prompt': 4, 'openrouter:cohere/command-r7b-12-2024:completion': 15, 'openrouter:meta-llama/llama-3.3-70b-instruct:prompt': 13, - 'openrouter:meta-llama/llama-3.3-70b-instruct:completion': 39, + 'openrouter:meta-llama/llama-3.3-70b-instruct:completion': 38, 'openrouter:amazon/nova-lite-v1:prompt': 6, 'openrouter:amazon/nova-lite-v1:completion': 24, 'openrouter:amazon/nova-lite-v1:image': 9000, @@ -541,58 +579,50 @@ export const OPENROUTER_COST_MAP = { 'openrouter:raifle/sorcererlm-8x22b:completion': 450, 'openrouter:thedrummer/unslopnemo-12b:prompt': 40, 'openrouter:thedrummer/unslopnemo-12b:completion': 40, + 'openrouter:anthropic/claude-3.5-haiku-20241022:prompt': 80, + 'openrouter:anthropic/claude-3.5-haiku-20241022:completion': 400, + 'openrouter:anthropic/claude-3.5-haiku-20241022:input_cache_read': 8, + 'openrouter:anthropic/claude-3.5-haiku-20241022:input_cache_write': 100, 'openrouter:anthropic/claude-3.5-haiku:prompt': 80, 'openrouter:anthropic/claude-3.5-haiku:completion': 400, 'openrouter:anthropic/claude-3.5-haiku:web_search': 1000000, 'openrouter:anthropic/claude-3.5-haiku:input_cache_read': 8, 'openrouter:anthropic/claude-3.5-haiku:input_cache_write': 100, - 'openrouter:anthropic/claude-3.5-haiku-20241022:prompt': 80, - 'openrouter:anthropic/claude-3.5-haiku-20241022:completion': 400, - 'openrouter:anthropic/claude-3.5-haiku-20241022:input_cache_read': 8, - 'openrouter:anthropic/claude-3.5-haiku-20241022:input_cache_write': 100, - 'openrouter:anthracite-org/magnum-v4-72b:prompt': 250, + 'openrouter:anthracite-org/magnum-v4-72b:prompt': 300, 'openrouter:anthracite-org/magnum-v4-72b:completion': 500, 'openrouter:anthropic/claude-3.5-sonnet:prompt': 300, 'openrouter:anthropic/claude-3.5-sonnet:completion': 1500, 'openrouter:anthropic/claude-3.5-sonnet:image': 480000, 'openrouter:anthropic/claude-3.5-sonnet:input_cache_read': 30, 'openrouter:anthropic/claude-3.5-sonnet:input_cache_write': 375, - 'openrouter:mistralai/ministral-3b:prompt': 4, - 'openrouter:mistralai/ministral-3b:completion': 4, 'openrouter:mistralai/ministral-8b:prompt': 10, 'openrouter:mistralai/ministral-8b:completion': 10, + 'openrouter:mistralai/ministral-3b:prompt': 4, + 'openrouter:mistralai/ministral-3b:completion': 4, 'openrouter:qwen/qwen-2.5-7b-instruct:prompt': 4, 'openrouter:qwen/qwen-2.5-7b-instruct:completion': 10, - 'openrouter:nvidia/llama-3.1-nemotron-70b-instruct:prompt': 60, - 'openrouter:nvidia/llama-3.1-nemotron-70b-instruct:completion': 60, - 'openrouter:inflection/inflection-3-productivity:prompt': 250, - 'openrouter:inflection/inflection-3-productivity:completion': 1000, + 'openrouter:nvidia/llama-3.1-nemotron-70b-instruct:prompt': 120, + 'openrouter:nvidia/llama-3.1-nemotron-70b-instruct:completion': 120, 'openrouter:inflection/inflection-3-pi:prompt': 250, 'openrouter:inflection/inflection-3-pi:completion': 1000, + 'openrouter:inflection/inflection-3-productivity:prompt': 250, + 'openrouter:inflection/inflection-3-productivity:completion': 1000, 'openrouter:thedrummer/rocinante-12b:prompt': 17, 'openrouter:thedrummer/rocinante-12b:completion': 43, - 'openrouter:anthracite-org/magnum-v2-72b:prompt': 300, - 'openrouter:anthracite-org/magnum-v2-72b:completion': 300, 'openrouter:meta-llama/llama-3.2-3b-instruct:prompt': 2, 'openrouter:meta-llama/llama-3.2-3b-instruct:completion': 2, - 'openrouter:meta-llama/llama-3.2-1b-instruct:prompt': 1, - 'openrouter:meta-llama/llama-3.2-1b-instruct:completion': 1, - 'openrouter:meta-llama/llama-3.2-11b-vision-instruct:prompt': 5, - 'openrouter:meta-llama/llama-3.2-11b-vision-instruct:completion': 5, - 'openrouter:meta-llama/llama-3.2-11b-vision-instruct:image': 7948, + 'openrouter:meta-llama/llama-3.2-1b-instruct:prompt': 3, + 'openrouter:meta-llama/llama-3.2-1b-instruct:completion': 20, 'openrouter:meta-llama/llama-3.2-90b-vision-instruct:prompt': 35, 'openrouter:meta-llama/llama-3.2-90b-vision-instruct:completion': 40, 'openrouter:meta-llama/llama-3.2-90b-vision-instruct:image': 50580, + 'openrouter:meta-llama/llama-3.2-11b-vision-instruct:prompt': 5, + 'openrouter:meta-llama/llama-3.2-11b-vision-instruct:completion': 5, + 'openrouter:meta-llama/llama-3.2-11b-vision-instruct:image': 7948, 'openrouter:qwen/qwen-2.5-72b-instruct:prompt': 7, 'openrouter:qwen/qwen-2.5-72b-instruct:completion': 26, 'openrouter:neversleep/llama-3.1-lumimaid-8b:prompt': 9, 'openrouter:neversleep/llama-3.1-lumimaid-8b:completion': 60, - 'openrouter:openai/o1-mini-2024-09-12:prompt': 110, - 'openrouter:openai/o1-mini-2024-09-12:completion': 440, - 'openrouter:openai/o1-mini-2024-09-12:input_cache_read': 55, - 'openrouter:openai/o1-mini:prompt': 110, - 'openrouter:openai/o1-mini:completion': 440, - 'openrouter:openai/o1-mini:input_cache_read': 55, 'openrouter:mistralai/pixtral-12b:prompt': 10, 'openrouter:mistralai/pixtral-12b:completion': 10, 'openrouter:mistralai/pixtral-12b:image': 14450, @@ -624,10 +654,10 @@ export const OPENROUTER_COST_MAP = { 'openrouter:meta-llama/llama-3.1-405b:completion': 400, 'openrouter:meta-llama/llama-3.1-8b-instruct:prompt': 2, 'openrouter:meta-llama/llama-3.1-8b-instruct:completion': 3, + 'openrouter:meta-llama/llama-3.1-405b-instruct:prompt': 350, + 'openrouter:meta-llama/llama-3.1-405b-instruct:completion': 350, 'openrouter:meta-llama/llama-3.1-70b-instruct:prompt': 40, 'openrouter:meta-llama/llama-3.1-70b-instruct:completion': 40, - 'openrouter:meta-llama/llama-3.1-405b-instruct:prompt': 80, - 'openrouter:meta-llama/llama-3.1-405b-instruct:completion': 80, 'openrouter:mistralai/mistral-nemo:prompt': 2, 'openrouter:mistralai/mistral-nemo:completion': 4, 'openrouter:openai/gpt-4o-mini-2024-07-18:prompt': 15, @@ -640,25 +670,22 @@ export const OPENROUTER_COST_MAP = { 'openrouter:openai/gpt-4o-mini:input_cache_read': 7, 'openrouter:google/gemma-2-27b-it:prompt': 65, 'openrouter:google/gemma-2-27b-it:completion': 65, - 'openrouter:google/gemma-2-9b-it:prompt': 1, - 'openrouter:google/gemma-2-9b-it:completion': 3, - 'openrouter:anthropic/claude-3.5-sonnet-20240620:prompt': 300, - 'openrouter:anthropic/claude-3.5-sonnet-20240620:completion': 1500, - 'openrouter:anthropic/claude-3.5-sonnet-20240620:image': 480000, - 'openrouter:anthropic/claude-3.5-sonnet-20240620:input_cache_read': 30, - 'openrouter:anthropic/claude-3.5-sonnet-20240620:input_cache_write': 375, + 'openrouter:google/gemma-2-9b-it:prompt': 3, + 'openrouter:google/gemma-2-9b-it:completion': 9, 'openrouter:sao10k/l3-euryale-70b:prompt': 148, 'openrouter:sao10k/l3-euryale-70b:completion': 148, - 'openrouter:mistralai/mistral-7b-instruct-v0.3:prompt': 3, - 'openrouter:mistralai/mistral-7b-instruct-v0.3:completion': 5, - 'openrouter:mistralai/mistral-7b-instruct:prompt': 3, - 'openrouter:mistralai/mistral-7b-instruct:completion': 5, 'openrouter:nousresearch/hermes-2-pro-llama-3-8b:prompt': 3, 'openrouter:nousresearch/hermes-2-pro-llama-3-8b:completion': 8, + 'openrouter:mistralai/mistral-7b-instruct:prompt': 3, + 'openrouter:mistralai/mistral-7b-instruct:completion': 5, + 'openrouter:mistralai/mistral-7b-instruct-v0.3:prompt': 20, + 'openrouter:mistralai/mistral-7b-instruct-v0.3:completion': 20, 'openrouter:microsoft/phi-3-mini-128k-instruct:prompt': 10, 'openrouter:microsoft/phi-3-mini-128k-instruct:completion': 10, 'openrouter:microsoft/phi-3-medium-128k-instruct:prompt': 100, 'openrouter:microsoft/phi-3-medium-128k-instruct:completion': 100, + 'openrouter:meta-llama/llama-guard-2-8b:prompt': 20, + 'openrouter:meta-llama/llama-guard-2-8b:completion': 20, 'openrouter:openai/gpt-4o-2024-05-13:prompt': 500, 'openrouter:openai/gpt-4o-2024-05-13:completion': 1500, 'openrouter:openai/gpt-4o-2024-05-13:image': 722500, @@ -669,14 +696,12 @@ export const OPENROUTER_COST_MAP = { 'openrouter:openai/gpt-4o:extended:prompt': 600, 'openrouter:openai/gpt-4o:extended:completion': 1800, 'openrouter:openai/gpt-4o:extended:image': 722500, - 'openrouter:meta-llama/llama-guard-2-8b:prompt': 20, - 'openrouter:meta-llama/llama-guard-2-8b:completion': 20, - 'openrouter:meta-llama/llama-3-8b-instruct:prompt': 3, - 'openrouter:meta-llama/llama-3-8b-instruct:completion': 6, 'openrouter:meta-llama/llama-3-70b-instruct:prompt': 30, 'openrouter:meta-llama/llama-3-70b-instruct:completion': 40, - 'openrouter:mistralai/mixtral-8x22b-instruct:prompt': 90, - 'openrouter:mistralai/mixtral-8x22b-instruct:completion': 90, + 'openrouter:meta-llama/llama-3-8b-instruct:prompt': 3, + 'openrouter:meta-llama/llama-3-8b-instruct:completion': 6, + 'openrouter:mistralai/mixtral-8x22b-instruct:prompt': 200, + 'openrouter:mistralai/mixtral-8x22b-instruct:completion': 600, 'openrouter:microsoft/wizardlm-2-8x22b:prompt': 48, 'openrouter:microsoft/wizardlm-2-8x22b:completion': 48, 'openrouter:openai/gpt-4-turbo:prompt': 1000, @@ -694,10 +719,10 @@ export const OPENROUTER_COST_MAP = { 'openrouter:anthropic/claude-3-opus:input_cache_write': 1875, 'openrouter:mistralai/mistral-large:prompt': 200, 'openrouter:mistralai/mistral-large:completion': 600, - 'openrouter:openai/gpt-4-turbo-preview:prompt': 1000, - 'openrouter:openai/gpt-4-turbo-preview:completion': 3000, 'openrouter:openai/gpt-3.5-turbo-0613:prompt': 100, 'openrouter:openai/gpt-3.5-turbo-0613:completion': 200, + 'openrouter:openai/gpt-4-turbo-preview:prompt': 1000, + 'openrouter:openai/gpt-4-turbo-preview:completion': 3000, 'openrouter:mistralai/mistral-small:prompt': 20, 'openrouter:mistralai/mistral-small:completion': 60, 'openrouter:mistralai/mistral-tiny:prompt': 25, @@ -708,28 +733,28 @@ export const OPENROUTER_COST_MAP = { 'openrouter:mistralai/mixtral-8x7b-instruct:completion': 54, 'openrouter:neversleep/noromaid-20b:prompt': 100, 'openrouter:neversleep/noromaid-20b:completion': 175, - 'openrouter:alpindale/goliath-120b:prompt': 400, - 'openrouter:alpindale/goliath-120b:completion': 550, + 'openrouter:alpindale/goliath-120b:prompt': 600, + 'openrouter:alpindale/goliath-120b:completion': 800, 'openrouter:openrouter/auto:prompt': -100000000, 'openrouter:openrouter/auto:completion': -100000000, 'openrouter:openai/gpt-4-1106-preview:prompt': 1000, 'openrouter:openai/gpt-4-1106-preview:completion': 3000, - 'openrouter:mistralai/mistral-7b-instruct-v0.1:prompt': 11, - 'openrouter:mistralai/mistral-7b-instruct-v0.1:completion': 19, 'openrouter:openai/gpt-3.5-turbo-instruct:prompt': 150, 'openrouter:openai/gpt-3.5-turbo-instruct:completion': 200, + 'openrouter:mistralai/mistral-7b-instruct-v0.1:prompt': 11, + 'openrouter:mistralai/mistral-7b-instruct-v0.1:completion': 19, 'openrouter:openai/gpt-3.5-turbo-16k:prompt': 300, 'openrouter:openai/gpt-3.5-turbo-16k:completion': 400, 'openrouter:mancer/weaver:prompt': 113, 'openrouter:mancer/weaver:completion': 113, 'openrouter:undi95/remm-slerp-l2-13b:prompt': 45, 'openrouter:undi95/remm-slerp-l2-13b:completion': 65, - 'openrouter:gryphe/mythomax-l2-13b:prompt': 5, - 'openrouter:gryphe/mythomax-l2-13b:completion': 9, + 'openrouter:gryphe/mythomax-l2-13b:prompt': 6, + 'openrouter:gryphe/mythomax-l2-13b:completion': 6, + 'openrouter:openai/gpt-4-0314:prompt': 3000, + 'openrouter:openai/gpt-4-0314:completion': 6000, 'openrouter:openai/gpt-4:prompt': 3000, 'openrouter:openai/gpt-4:completion': 6000, 'openrouter:openai/gpt-3.5-turbo:prompt': 50, 'openrouter:openai/gpt-3.5-turbo:completion': 150, - 'openrouter:openai/gpt-4-0314:prompt': 3000, - 'openrouter:openai/gpt-4-0314:completion': 6000, };