diff --git a/src/backend/src/modules/puterai/GeminiService.js b/src/backend/src/modules/puterai/GeminiService.js deleted file mode 100644 index 92e16ee40..000000000 --- a/src/backend/src/modules/puterai/GeminiService.js +++ /dev/null @@ -1,204 +0,0 @@ -const BaseService = require('../../services/BaseService'); -const { GoogleGenerativeAI } = require('@google/generative-ai'); -const GeminiSquareHole = require('./lib/GeminiSquareHole'); -const FunctionCalling = require('./lib/FunctionCalling'); -const { Context } = require('../../util/context'); - -class GeminiService extends BaseService { - /** - * @type {import('../../services/MeteringService/MeteringService').MeteringService} - */ - meteringService = undefined; - - async _init () { - const svc_aiChat = this.services.get('ai-chat'); - svc_aiChat.register_provider({ - service_name: this.service_name, - alias: true, - }); - this.meteringService = this.services.get('meteringService').meteringService; - } - - static IMPLEMENTS = { - ['puter-chat-completion']: { - async models () { - return await this.models_(); - }, - async list () { - const models = await this.models_(); - const model_names = []; - for ( const model of models ) { - model_names.push(model.id); - if ( model.aliases ) { - model_names.push(...model.aliases); - } - } - return model_names; - }, - - async complete ({ messages, stream, model, tools, max_tokens, temperature }) { - tools = FunctionCalling.make_gemini_tools(tools); - - model = model ?? 'gemini-2.0-flash'; - const genAI = new GoogleGenerativeAI(this.config.apiKey); - const genModel = genAI.getGenerativeModel({ - model, - tools, - generationConfig: { - temperature: temperature, // Set temperature (0.0 to 1.0). Defaults to 0.7 - maxOutputTokens: max_tokens, // Note: it's maxOutputTokens, not max_tokens - }, - }); - - messages = await GeminiSquareHole.process_input_messages(messages); - - // History is separate, so the last message gets special treatment. - const last_message = messages.pop(); - const last_message_parts = last_message.parts.map(part => typeof part === 'string' ? part : - typeof part.text === 'string' ? part.text : - part); - - const chat = genModel.startChat({ - history: messages, - }); - - const usage_calculator = GeminiSquareHole.create_usage_calculator({ - model_details: (await this.models_()).find(m => m.id === model), - }); - - // Metering integration - const actor = Context.get('actor'); - const meteringPrefix = `gemini:${model}`; - if ( stream ) { - const genResult = await chat.sendMessageStream(last_message_parts); - const stream = genResult.stream; - - return { - stream: true, - init_chat_stream: - GeminiSquareHole.create_chat_stream_handler({ - stream, - usageCallback: (usageMetadata) => { - // TODO DS: dedup this logic - const trackedUsage = { - prompt_tokens: usageMetadata.promptTokenCount - (usageMetadata.cachedContentTokenCount || 0), - completion_tokens: usageMetadata.candidatesTokenCount, - cached_tokens: usageMetadata.cachedContentTokenCount || 0, - }; - this.meteringService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix); - }, - }), - }; - } else { - const genResult = await chat.sendMessage(last_message_parts); - - const message = genResult.response.candidates[0]; - message.content = message.content.parts; - message.role = 'assistant'; - - const result = { message }; - result.usage = usage_calculator(genResult.response); - // TODO DS: dedup this logic - const trackedUsage = { - prompt_tokens: genResult.response.usageMetadata.promptTokenCount - (genResult.cachedContentTokenCount || 0), - completion_tokens: genResult.response.usageMetadata.candidatesTokenCount, - cached_tokens: genResult.response.usageMetadata.cachedContentTokenCount || 0, - }; - this.meteringService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix); - return result; - } - }, - }, - }; - - async models_ () { - return [ - { - id: 'gemini-1.5-flash', - name: 'Gemini 1.5 Flash', - context: 131072, - cost: { - currency: 'usd-cents', - tokens: 1_000_000, - input: 7.5, - output: 30, - }, - max_tokens: 8192, - }, - { - id: 'gemini-2.0-flash', - name: 'Gemini 2.0 Flash', - context: 131072, - cost: { - currency: 'usd-cents', - tokens: 1_000_000, - input: 10, - output: 40, - }, - max_tokens: 8192, - }, - { - id: 'gemini-2.0-flash-lite', - name: 'Gemini 2.0 Flash-Lite', - context: 1_048_576, - cost: { - currency: 'usd-cents', - tokens: 1_000_000, - input: 8, - output: 32, - }, - max_tokens: 8192, - }, - { - id: 'gemini-2.5-flash', - name: 'Gemini 2.5 Flash', - context: 1_048_576, - cost: { - currency: 'usd-cents', - tokens: 1_000_000, - input: 12, - output: 48, - }, - max_tokens: 65536, - }, - { - id: 'gemini-2.5-flash-lite', - name: 'Gemini 2.5 Flash-Lite', - context: 1_048_576, - cost: { - currency: 'usd-cents', - tokens: 1_000_000, - input: 10, - output: 40, - }, - max_tokens: 65536, - }, - { - id: 'gemini-2.5-pro', - name: 'Gemini 2.5 Pro', - context: 1_048_576, - cost: { - currency: 'usd-cents', - tokens: 1_000_000, - input: 15, - output: 60, - }, - max_tokens: 65536, - }, - { - id: 'gemini-3-pro-preview', - name: 'Gemini 3 Pro', - context: 1_048_576, - cost: { - currency: 'usd-cents', - tokens: 1_000_000, - input: 25, - output: 100, - }, - max_tokens: 65536, - }, - ]; - } -} - -module.exports = { GeminiService }; \ No newline at end of file diff --git a/src/backend/src/modules/puterai/GeminiService/GeminiService.mjs b/src/backend/src/modules/puterai/GeminiService/GeminiService.mjs new file mode 100644 index 000000000..179019c80 --- /dev/null +++ b/src/backend/src/modules/puterai/GeminiService/GeminiService.mjs @@ -0,0 +1,114 @@ +// Preamble: Before this we used Gemini's SDK directly and as we found out +// its actually kind of terrible. So we use the openai sdk now +import BaseService from '../../../services/BaseService.js'; +import openai from 'openai'; +import OpenAIUtil from '../lib/OpenAIUtil.js'; +import { Context } from '../../../util/context.js'; +import { models } from './models.mjs'; + + +export class GeminiService extends BaseService { + /** + * @type {import('../../services/MeteringService/MeteringService').MeteringService} + */ + meteringService = undefined; + + defaultModel = 'gemini-2.5-flash'; + + static IMPLEMENTS = { + ['puter-chat-completion']: { + async models () { + return await this.models(); + }, + async complete (...args) { + return await this.complete(...args); + }, + async list () { + return await this.list(); + }, + }, + }; + + async _init () { + this.openai = new openai.OpenAI({ + apiKey: this.config.apiKey, + baseURL: 'https://generativelanguage.googleapis.com/v1beta/openai/', + }); + + const svc_aiChat = this.services.get('ai-chat'); + svc_aiChat.register_provider({ + service_name: this.service_name, + alias: true, + }); + this.meteringService = this.services.get('meteringService').meteringService; + } + + get_default_model () { + return this.defaultModel; + } + + async models () { + return models; + } + async list () { + const model_names = []; + for ( const model of models ) { + model_names.push(model.id); + if ( model.aliases ) { + model_names.push(...model.aliases); + } + } + return model_names; + } + async complete ({ messages, stream, model, tools, max_tokens, temperature }) { + const actor = Context.get('actor'); + messages = await OpenAIUtil.process_input_messages(messages); + + // delete cache_control + messages = messages.map(m => { + delete m.cache_control; + return m; + }); + + const sdk_params = { + messages: messages, + model: model, + ...(tools ? { tools } : {}), + ...(max_tokens ? { max_completion_tokens: max_tokens } : {}), + ...(temperature ? { temperature } : {}), + stream, + ...(stream ? { + stream_options: { include_usage: true }, + } : {}), + }; + + let completion; + try { + completion = await this.openai.chat.completions.create(sdk_params); + } catch (e) { + console.error('Gemini completion error: ', e); + throw e; + } + + const modelDetails = (await this.models()).find(m => m.id === model); + return OpenAIUtil.handle_completion_output({ + usage_calculator: ({ usage }) => { + const trackedUsage = { + prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0), + completion_tokens: usage.completion_tokens ?? 0, + cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0, + }; + + this.meteringService.utilRecordUsageObject(trackedUsage, actor, `gemini:${modelDetails.id}`); + const legacyCostCalculator = OpenAIUtil.create_usage_calculator({ + model_details: modelDetails, + }); + + return legacyCostCalculator({ usage }); + }, + stream, + completion, + }); + + } +} diff --git a/src/backend/src/modules/puterai/GeminiService/models.mjs b/src/backend/src/modules/puterai/GeminiService/models.mjs new file mode 100644 index 000000000..a918a23ad --- /dev/null +++ b/src/backend/src/modules/puterai/GeminiService/models.mjs @@ -0,0 +1,86 @@ +export const models = [ + { + id: 'gemini-1.5-flash', + name: 'Gemini 1.5 Flash', + context: 131072, + cost: { + currency: 'usd-cents', + tokens: 1_000_000, + input: 7.5, + output: 30, + }, + max_tokens: 8192, + }, + { + id: 'gemini-2.0-flash', + name: 'Gemini 2.0 Flash', + context: 131072, + cost: { + currency: 'usd-cents', + tokens: 1_000_000, + input: 10, + output: 40, + }, + max_tokens: 8192, + }, + { + id: 'gemini-2.0-flash-lite', + name: 'Gemini 2.0 Flash-Lite', + context: 1_048_576, + cost: { + currency: 'usd-cents', + tokens: 1_000_000, + input: 8, + output: 32, + }, + max_tokens: 8192, + }, + { + id: 'gemini-2.5-flash', + name: 'Gemini 2.5 Flash', + context: 1_048_576, + cost: { + currency: 'usd-cents', + tokens: 1_000_000, + input: 12, + output: 48, + }, + max_tokens: 65536, + }, + { + id: 'gemini-2.5-flash-lite', + name: 'Gemini 2.5 Flash-Lite', + context: 1_048_576, + cost: { + currency: 'usd-cents', + tokens: 1_000_000, + input: 10, + output: 40, + }, + max_tokens: 65536, + }, + { + id: 'gemini-2.5-pro', + name: 'Gemini 2.5 Pro', + context: 1_048_576, + cost: { + currency: 'usd-cents', + tokens: 1_000_000, + input: 15, + output: 60, + }, + max_tokens: 65536, + }, + { + id: 'gemini-3-pro-preview', + name: 'Gemini 3 Pro', + context: 1_048_576, + cost: { + currency: 'usd-cents', + tokens: 1_000_000, + input: 25, + output: 100, + }, + max_tokens: 65536, + }, +]; \ No newline at end of file diff --git a/src/backend/src/modules/puterai/PuterAIModule.js b/src/backend/src/modules/puterai/PuterAIModule.js index 9ac034723..e0dc89742 100644 --- a/src/backend/src/modules/puterai/PuterAIModule.js +++ b/src/backend/src/modules/puterai/PuterAIModule.js @@ -116,7 +116,7 @@ class PuterAIModule extends AdvancedBase { services.registerService('deepseek', DeepSeekService); } if ( config?.services?.['gemini'] ) { - const { GeminiService } = require('./GeminiService'); + const { GeminiService } = require('./GeminiService/GeminiService.mjs'); const { GeminiImageGenerationService } = require('./GeminiImageGenerationService'); services.registerService('gemini', GeminiService); @@ -129,13 +129,13 @@ class PuterAIModule extends AdvancedBase { // Autodiscover Ollama service and then check if its disabled in the config // if config.services.ollama.enabled is undefined, it means the user hasn't set it, so we should default to true - const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(data => { + const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(_data => { const ollama_enabled = config?.services?.['ollama']?.enabled; if ( ollama_enabled === undefined ) { return true; } return ollama_enabled; - }).catch(err => { + }).catch(_err => { return false; }); // User can disable ollama in the config, but by default it should be enabled if discovery is successful diff --git a/src/backend/src/modules/puterai/lib/FunctionCalling.js b/src/backend/src/modules/puterai/lib/FunctionCalling.js index ef2ad1217..f71801f6b 100644 --- a/src/backend/src/modules/puterai/lib/FunctionCalling.js +++ b/src/backend/src/modules/puterai/lib/FunctionCalling.js @@ -119,19 +119,4 @@ module.exports = class FunctionCalling { }; }); } - - static make_gemini_tools (tools) { - if ( Array.isArray(tools) ) { - return [ - { - function_declarations: tools.map(t => { - const tool = t.function; - delete tool.parameters.additionalProperties; - return tool; - }), - }, - ]; - }; - - } }; diff --git a/src/backend/src/modules/puterai/lib/GeminiSquareHole.js b/src/backend/src/modules/puterai/lib/GeminiSquareHole.js deleted file mode 100644 index 76cd20395..000000000 --- a/src/backend/src/modules/puterai/lib/GeminiSquareHole.js +++ /dev/null @@ -1,159 +0,0 @@ -/** - * Technically this should be called "GeminiUtil", - * but Google's AI API defies all the established conventions - * so it made sense to defy them here as well. - */ - -/** - * Utility class for handling Google Gemini API message transformations and streaming. - */ -module.exports = class GeminiSquareHole { - /** - * Transforms messages from standard format to Gemini API format. - * Converts 'content' to 'parts', 'assistant' role to 'model', and transforms - * tool_use/tool_result/text parts into Gemini's expected structure. - * - * @param {Array} messages - Array of message objects to transform - * @returns {Promise} Transformed messages compatible with Gemini API - */ - static process_input_messages = async (messages) => { - messages = messages.slice(); - - for ( const msg of messages ) { - msg.parts = msg.content; - delete msg.content; - - if ( msg.role === 'assistant' ) { - msg.role = 'model'; - } - - for ( let i = 0 ; i < msg.parts.length ; i++ ) { - const part = msg.parts[i]; - if ( part.type === 'tool_use' ) { - msg.parts[i] = { - functionCall: { - name: part.id, - args: part.input, - }, - }; - } - if ( part.type === 'tool_result' ) { - msg.parts[i] = { - functionResponse: { - name: part.tool_use_id, - response: { - name: part.tool_use_id, - content: part.content, - }, - }, - }; - } - if ( part.type === 'text' ) { - msg.parts[i] = { - text: part.text, - }; - } - } - } - - return messages; - }; - - /** - * Creates a function that calculates token usage and associated costs from Gemini API response metadata. - * - * @param {Object} params - Configuration object - * @param {Object} params.model_details - Model details including id and cost structure - * @returns {Function} Function that takes usageMetadata and returns an array of token usage objects with costs - */ - static create_usage_calculator = ({ model_details }) => { - return ({ usageMetadata }) => { - const tokens = []; - - tokens.push({ - type: 'prompt', - model: model_details.id, - amount: usageMetadata.promptTokenCount, - cost: model_details.cost.input * usageMetadata.promptTokenCount, - }); - - tokens.push({ - type: 'completion', - model: model_details.id, - amount: usageMetadata.candidatesTokenCount, - cost: model_details.cost.output * usageMetadata.candidatesTokenCount, - }); - - return tokens; - }; - }; - - /** - * Creates a handler function for processing Gemini API streaming chat responses. - * The handler processes chunks from the stream, managing text and tool call content blocks, - * and resolves usage metadata when streaming completes. - * - * @param {Object} params - Configuration object - * @param {Object} params.stream - Gemini GenerateContentStreamResult stream - * @param {Function} params.usageCallback - Callback function to handle usage metadata - * @returns {Function} Async function that processes the chat stream and manages content blocks - */ - static create_chat_stream_handler = ({ - stream, // GenerateContentStreamResult:stream - usageCallback, - }) => async ({ chatStream }) => { - const message = chatStream.message(); - - let textblock = message.contentBlock({ type: 'text' }); - let toolblock = null; - let mode = 'text'; - - let last_usage = null; - for await ( const chunk of stream ) { - // This is spread across several lines so that the stack trace - // is more helpful if we get an exception because of an - // inconsistent response from the model. - const candidate = chunk.candidates[0]; - const content = candidate.content; - const parts = content.parts; - for ( const part of parts ) { - if ( part.functionCall ) { - if ( mode === 'text' ) { - mode = 'tool'; - textblock.end(); - } - - toolblock = message.contentBlock({ - type: 'tool_use', - id: part.functionCall.name, - name: part.functionCall.name, - }); - toolblock.addPartialJSON(JSON.stringify(part.functionCall.args)); - - continue; - } - - if ( mode === 'tool' ) { - mode = 'text'; - toolblock.end(); - textblock = message.contentBlock({ type: 'text' }); - } - - // assume text as default - const text = part.text; - if ( text ) { - textblock.addText(text); - } - } - - last_usage = chunk.usageMetadata; - } - - usageCallback(last_usage); - - if ( mode === 'text' ) textblock.end(); - if ( mode === 'tool' ) toolblock.end(); - message.end(); - chatStream.end(); - }; -}; diff --git a/src/backend/src/modules/puterai/lib/OpenAIUtil.js b/src/backend/src/modules/puterai/lib/OpenAIUtil.js index a73123d42..24942ff40 100644 --- a/src/backend/src/modules/puterai/lib/OpenAIUtil.js +++ b/src/backend/src/modules/puterai/lib/OpenAIUtil.js @@ -35,6 +35,7 @@ const process_input_messages = async (messages) => { name: content_block.name, arguments: JSON.stringify(content_block.input), }, + ...(content_block.extra_content?{extra_content: content_block.extra_content}:{}) }); content.splice(i, 1); } @@ -131,6 +132,14 @@ const create_chat_stream_handler = ({ continue; } + if (choice.delta.extra_content) { + // Gemini specific thing for metadata, we will basically be appending onto the current message by abusing .addText a little + // Apps have to choose to handle extra_content themselves, it doesn't seem like theres a way we can do it in a backwards + // compatible fashion since most streaming apps will handle chat history by continuously updating content themselves + // This doesn't present us a chance to add in an extra object for gemini's chat continuing features + textblock.addExtraContent(choice.delta.extra_content); + } + const tool_calls = deviations.index_tool_calls_from_stream_choice(choice); if ( tool_calls ) { if ( mode === 'text' ) { @@ -143,6 +152,7 @@ const create_chat_stream_handler = ({ type: 'tool_use', id: tool_call.id, name: tool_call.function.name, + ...(tool_call.extra_content ? {extra_content: tool_call.extra_content}: {}) }); tool_call_blocks[tool_call.index] = toolblock; } else { diff --git a/src/backend/src/modules/puterai/lib/Streaming.js b/src/backend/src/modules/puterai/lib/Streaming.js index e13c45fa4..a0ecaa444 100644 --- a/src/backend/src/modules/puterai/lib/Streaming.js +++ b/src/backend/src/modules/puterai/lib/Streaming.js @@ -29,9 +29,10 @@ class AIChatConstructStream { } class AIChatTextStream extends AIChatConstructStream { - addText (text) { + addText (text, extra_content) { const json = JSON.stringify({ type: 'text', text, + ...(extra_content?{extra_content}:{}) }); this.chatStream.stream.write(`${json }\n`); } @@ -42,6 +43,14 @@ class AIChatTextStream extends AIChatConstructStream { }); this.chatStream.stream.write(`${json }\n`); } + + addExtraContent(extra_content) { + const json = JSON.stringify({ + type: 'extra_content', + extra_content + }); + this.chatStream.stream.write(`${json }\n`); + } } class AIChatToolUseStream extends AIChatConstructStream { diff --git a/src/backend/src/services/MeteringService/costMaps/geminiCostMap.ts b/src/backend/src/services/MeteringService/costMaps/geminiCostMap.ts index 781fad19e..4c45e8a41 100644 --- a/src/backend/src/services/MeteringService/costMaps/geminiCostMap.ts +++ b/src/backend/src/services/MeteringService/costMaps/geminiCostMap.ts @@ -9,9 +9,19 @@ */ export const GEMINI_COST_MAP = { // Gemini api usage types (costs per token in microcents) + 'gemini:gemini-1.5-flash:promptTokenCount': 7.5, + 'gemini:gemini-1.5-flash:candidatesTokenCount': 30, 'gemini:gemini-2.0-flash:promptTokenCount': 10, 'gemini:gemini-2.0-flash:candidatesTokenCount': 40, - 'gemini:gemini-1.5-flash:promptTokenCount': 3, - 'gemini:gemini-1.5-flash:candidatesTokenCount': 2, + 'gemini:gemini-2.0-flash-lite:promptTokenCount': 8, + 'gemini:gemini-2.0-flash-lite:candidatesTokenCount': 32, + 'gemini:gemini-2.5-flash:promptTokenCount': 12, + 'gemini:gemini-2.5-flash:candidatesTokenCount': 48, + 'gemini:gemini-2.5-flash-lite:promptTokenCount': 10, + 'gemini:gemini-2.5-flash-lite:candidatesTokenCount': 40, + 'gemini:gemini-2.5-pro:promptTokenCount': 15, + 'gemini:gemini-2.5-pro:candidatesTokenCount': 60, + 'gemini:gemini-3-pro-preview:promptTokenCount': 25, + 'gemini:gemini-3-pro-preview:candidatesTokenCount': 100, 'gemini:gemini-2.5-flash-image-preview:1024x1024': 3_900_000, };