Gemini to OpenAI SDK refactor (#2014)

* WIP Gemini OpenAI refactor * refactor: gemini open ai service + geminiCostMap * Gemini Service fixes * Cleaning up old Gemini cruft --------- Co-authored-by: Daniel Salazar <daniel.salazar@puter.com>
2026-05-04 00:20:45 +00:00 · 2025-11-24 09:55:19 +04:00
parent 7e1d50d0d1
commit ed2d1368ca
9 changed files with 235 additions and 384 deletions
@@ -1,204 +0,0 @@
-const BaseService = require('../../services/BaseService');
-const { GoogleGenerativeAI } = require('@google/generative-ai');
-const GeminiSquareHole = require('./lib/GeminiSquareHole');
-const FunctionCalling = require('./lib/FunctionCalling');
-const { Context } = require('../../util/context');
-
-class GeminiService extends BaseService {
-    /**
-    * @type {import('../../services/MeteringService/MeteringService').MeteringService}
-    */
-    meteringService = undefined;
-
-    async _init () {
-        const svc_aiChat = this.services.get('ai-chat');
-        svc_aiChat.register_provider({
-            service_name: this.service_name,
-            alias: true,
-        });
-        this.meteringService = this.services.get('meteringService').meteringService;
-    }
-
-    static IMPLEMENTS = {
-        ['puter-chat-completion']: {
-            async models () {
-                return await this.models_();
-            },
-            async list () {
-                const models = await this.models_();
-                const model_names = [];
-                for ( const model of models ) {
-                    model_names.push(model.id);
-                    if ( model.aliases ) {
-                        model_names.push(...model.aliases);
-                    }
-                }
-                return model_names;
-            },
-
-            async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
-                tools = FunctionCalling.make_gemini_tools(tools);
-
-                model = model ?? 'gemini-2.0-flash';
-                const genAI = new GoogleGenerativeAI(this.config.apiKey);
-                const genModel = genAI.getGenerativeModel({
-                    model,
-                    tools,
-                    generationConfig: {
-                        temperature: temperature, // Set temperature (0.0 to 1.0). Defaults to 0.7
-                        maxOutputTokens: max_tokens, // Note: it's maxOutputTokens, not max_tokens
-                    },
-                });
-
-                messages = await GeminiSquareHole.process_input_messages(messages);
-
-                // History is separate, so the last message gets special treatment.
-                const last_message = messages.pop();
-                const last_message_parts = last_message.parts.map(part => typeof part === 'string' ? part :
-                    typeof part.text === 'string' ? part.text :
-                        part);
-
-                const chat = genModel.startChat({
-                    history: messages,
-                });
-
-                const usage_calculator = GeminiSquareHole.create_usage_calculator({
-                    model_details: (await this.models_()).find(m => m.id === model),
-                });
-
-                // Metering integration
-                const actor = Context.get('actor');
-                const meteringPrefix = `gemini:${model}`;
-                if ( stream ) {
-                    const genResult = await chat.sendMessageStream(last_message_parts);
-                    const stream = genResult.stream;
-
-                    return {
-                        stream: true,
-                        init_chat_stream:
-                            GeminiSquareHole.create_chat_stream_handler({
-                                stream,
-                                usageCallback: (usageMetadata) => {
-                                    // TODO DS: dedup this logic
-                                    const trackedUsage = {
-                                        prompt_tokens: usageMetadata.promptTokenCount - (usageMetadata.cachedContentTokenCount || 0),
-                                        completion_tokens: usageMetadata.candidatesTokenCount,
-                                        cached_tokens: usageMetadata.cachedContentTokenCount || 0,
-                                    };
-                                    this.meteringService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix);
-                                },
-                            }),
-                    };
-                } else {
-                    const genResult = await chat.sendMessage(last_message_parts);
-
-                    const message = genResult.response.candidates[0];
-                    message.content = message.content.parts;
-                    message.role = 'assistant';
-
-                    const result = { message };
-                    result.usage = usage_calculator(genResult.response);
-                    // TODO DS: dedup this logic
-                    const trackedUsage = {
-                        prompt_tokens: genResult.response.usageMetadata.promptTokenCount - (genResult.cachedContentTokenCount || 0),
-                        completion_tokens: genResult.response.usageMetadata.candidatesTokenCount,
-                        cached_tokens: genResult.response.usageMetadata.cachedContentTokenCount || 0,
-                    };
-                    this.meteringService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix);
-                    return result;
-                }
-            },
-        },
-    };
-
-    async models_ () {
-        return [
-            {
-                id: 'gemini-1.5-flash',
-                name: 'Gemini 1.5 Flash',
-                context: 131072,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 7.5,
-                    output: 30,
-                },
-                max_tokens: 8192,
-            },
-            {
-                id: 'gemini-2.0-flash',
-                name: 'Gemini 2.0 Flash',
-                context: 131072,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 10,
-                    output: 40,
-                },
-                max_tokens: 8192,
-            },
-            {
-                id: 'gemini-2.0-flash-lite',
-                name: 'Gemini 2.0 Flash-Lite',
-                context: 1_048_576,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 8,
-                    output: 32,
-                },
-                max_tokens: 8192,
-            },
-            {
-                id: 'gemini-2.5-flash',
-                name: 'Gemini 2.5 Flash',
-                context: 1_048_576,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 12,
-                    output: 48,
-                },
-                max_tokens: 65536,
-            },
-            {
-                id: 'gemini-2.5-flash-lite',
-                name: 'Gemini 2.5 Flash-Lite',
-                context: 1_048_576,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 10,
-                    output: 40,
-                },
-                max_tokens: 65536,
-            },
-            {
-                id: 'gemini-2.5-pro',
-                name: 'Gemini 2.5 Pro',
-                context: 1_048_576,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 15,
-                    output: 60,
-                },
-                max_tokens: 65536,
-            },
-            {
-                id: 'gemini-3-pro-preview',
-                name: 'Gemini 3 Pro',
-                context: 1_048_576,
-                cost: {
-                    currency: 'usd-cents',
-                    tokens: 1_000_000,
-                    input: 25,
-                    output: 100,
-                },
-                max_tokens: 65536,
-            },
-        ];
-    }
-}
-
-module.exports = { GeminiService };
@@ -0,0 +1,114 @@
+// Preamble: Before this we used Gemini's SDK directly and as we found out
+// its actually kind of terrible. So we use the openai sdk now
+import BaseService from '../../../services/BaseService.js';
+import openai from 'openai';
+import OpenAIUtil from '../lib/OpenAIUtil.js';
+import { Context } from '../../../util/context.js';
+import { models } from './models.mjs';
+
+
+export class GeminiService extends BaseService {
+    /**
+    * @type {import('../../services/MeteringService/MeteringService').MeteringService}
+    */
+    meteringService = undefined;
+
+    defaultModel = 'gemini-2.5-flash';
+
+    static IMPLEMENTS = {
+        ['puter-chat-completion']: {
+            async models () {
+                return await this.models();
+            },
+            async complete (...args) {
+                return await this.complete(...args);
+            },
+            async list () {
+                return await this.list();
+            },
+        },
+    };
+
+    async _init () {
+        this.openai = new openai.OpenAI({
+            apiKey: this.config.apiKey,
+            baseURL: 'https://generativelanguage.googleapis.com/v1beta/openai/',
+        });
+
+        const svc_aiChat = this.services.get('ai-chat');
+        svc_aiChat.register_provider({
+            service_name: this.service_name,
+            alias: true,
+        });
+        this.meteringService = this.services.get('meteringService').meteringService;
+    }
+
+    get_default_model () {
+        return this.defaultModel;
+    }
+
+    async models () {
+        return models;
+    }
+    async list () {
+        const model_names = [];
+        for ( const model of models ) {
+            model_names.push(model.id);
+            if ( model.aliases ) {
+                model_names.push(...model.aliases);
+            }
+        }
+        return model_names;
+    }
+    async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
+        const actor = Context.get('actor');
+        messages = await OpenAIUtil.process_input_messages(messages);
+
+        // delete cache_control
+        messages = messages.map(m => {
+            delete m.cache_control;
+            return m;
+        });
+
+        const sdk_params = {
+            messages: messages,
+            model: model,
+            ...(tools ? { tools } : {}),
+            ...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
+            ...(temperature ? { temperature } : {}),
+            stream,
+            ...(stream ? {
+                stream_options: { include_usage: true },
+            } : {}),
+        };
+
+        let completion;
+        try {
+            completion = await this.openai.chat.completions.create(sdk_params);
+        } catch (e) {
+            console.error('Gemini completion error: ', e);
+            throw e;
+        }
+        
+        const modelDetails =  (await this.models()).find(m => m.id === model);
+        return OpenAIUtil.handle_completion_output({
+            usage_calculator: ({ usage }) => {
+                const trackedUsage = {
+                    prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
+                    completion_tokens: usage.completion_tokens ?? 0,
+                    cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
+                };
+
+                this.meteringService.utilRecordUsageObject(trackedUsage, actor, `gemini:${modelDetails.id}`);
+                const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
+                    model_details: modelDetails,
+                });
+
+                return legacyCostCalculator({ usage });
+            },
+            stream,
+            completion,
+        });
+
+    }
+}
@@ -0,0 +1,86 @@
+export const models = [
+    {
+        id: 'gemini-1.5-flash',
+        name: 'Gemini 1.5 Flash',
+        context: 131072,
+        cost: {
+            currency: 'usd-cents',
+            tokens: 1_000_000,
+            input: 7.5,
+            output: 30,
+        },
+        max_tokens: 8192,
+    },
+    {
+        id: 'gemini-2.0-flash',
+        name: 'Gemini 2.0 Flash',
+        context: 131072,
+        cost: {
+            currency: 'usd-cents',
+            tokens: 1_000_000,
+            input: 10,
+            output: 40,
+        },
+        max_tokens: 8192,
+    },
+    {
+        id: 'gemini-2.0-flash-lite',
+        name: 'Gemini 2.0 Flash-Lite',
+        context: 1_048_576,
+        cost: {
+            currency: 'usd-cents',
+            tokens: 1_000_000,
+            input: 8,
+            output: 32,
+        },
+        max_tokens: 8192,
+    },
+    {
+        id: 'gemini-2.5-flash',
+        name: 'Gemini 2.5 Flash',
+        context: 1_048_576,
+        cost: {
+            currency: 'usd-cents',
+            tokens: 1_000_000,
+            input: 12,
+            output: 48,
+        },
+        max_tokens: 65536,
+    },
+    {
+        id: 'gemini-2.5-flash-lite',
+        name: 'Gemini 2.5 Flash-Lite',
+        context: 1_048_576,
+        cost: {
+            currency: 'usd-cents',
+            tokens: 1_000_000,
+            input: 10,
+            output: 40,
+        },
+        max_tokens: 65536,
+    },
+    {
+        id: 'gemini-2.5-pro',
+        name: 'Gemini 2.5 Pro',
+        context: 1_048_576,
+        cost: {
+            currency: 'usd-cents',
+            tokens: 1_000_000,
+            input: 15,
+            output: 60,
+        },
+        max_tokens: 65536,
+    },
+    {
+        id: 'gemini-3-pro-preview',
+        name: 'Gemini 3 Pro',
+        context: 1_048_576,
+        cost: {
+            currency: 'usd-cents',
+            tokens: 1_000_000,
+            input: 25,
+            output: 100,
+        },
+        max_tokens: 65536,
+    },
+];
@@ -116,7 +116,7 @@ class PuterAIModule extends AdvancedBase {
            services.registerService('deepseek', DeepSeekService);
        }
        if ( config?.services?.['gemini'] ) {
-            const { GeminiService } = require('./GeminiService');
+            const { GeminiService } =  require('./GeminiService/GeminiService.mjs');
            const { GeminiImageGenerationService } = require('./GeminiImageGenerationService');

            services.registerService('gemini', GeminiService);
@@ -129,13 +129,13 @@ class PuterAIModule extends AdvancedBase {

        // Autodiscover Ollama service and then check if its disabled in the config
        // if config.services.ollama.enabled is undefined, it means the user hasn't set it, so we should default to true
-        const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(data => {
+        const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(_data => {
            const ollama_enabled = config?.services?.['ollama']?.enabled;
            if ( ollama_enabled === undefined ) {
                return true;
            }
            return ollama_enabled;
-        }).catch(err => {
+        }).catch(_err => {
            return false;
        });
        // User can disable ollama in the config, but by default it should be enabled if discovery is successful
@@ -119,19 +119,4 @@ module.exports = class FunctionCalling {
            };
        });
    }
-
-    static make_gemini_tools (tools) {
-        if ( Array.isArray(tools) ) {
-            return [
-                {
-                    function_declarations: tools.map(t => {
-                        const tool = t.function;
-                        delete tool.parameters.additionalProperties;
-                        return tool;
-                    }),
-                },
-            ];
-        };
-
-    }
 };
@@ -1,159 +0,0 @@
-/**
- * Technically this should be called "GeminiUtil",
- * but Google's AI API defies all the established conventions
- * so it made sense to defy them here as well.
- */
-
-/**
- * Utility class for handling Google Gemini API message transformations and streaming.
- */
-module.exports = class GeminiSquareHole {
-    /**
-     * Transforms messages from standard format to Gemini API format.
-     * Converts 'content' to 'parts', 'assistant' role to 'model', and transforms
-     * tool_use/tool_result/text parts into Gemini's expected structure.
-     *
-     * @param {Array} messages - Array of message objects to transform
-     * @returns {Promise<Array>} Transformed messages compatible with Gemini API
-     */
-    static process_input_messages = async (messages) => {
-        messages = messages.slice();
-
-        for ( const msg of messages ) {
-            msg.parts = msg.content;
-            delete msg.content;
-
-            if ( msg.role === 'assistant' ) {
-                msg.role = 'model';
-            }
-
-            for ( let i = 0 ; i < msg.parts.length ; i++ ) {
-                const part = msg.parts[i];
-                if ( part.type === 'tool_use' ) {
-                    msg.parts[i] = {
-                        functionCall: {
-                            name: part.id,
-                            args: part.input,
-                        },
-                    };
-                }
-                if ( part.type === 'tool_result' ) {
-                    msg.parts[i] = {
-                        functionResponse: {
-                            name: part.tool_use_id,
-                            response: {
-                                name: part.tool_use_id,
-                                content: part.content,
-                            },
-                        },
-                    };
-                }
-                if ( part.type === 'text' ) {
-                    msg.parts[i] = {
-                        text: part.text,
-                    };
-                }
-            }
-        }
-
-        return messages;
-    };
-
-    /**
-     * Creates a function that calculates token usage and associated costs from Gemini API response metadata.
-     *
-     * @param {Object} params - Configuration object
-     * @param {Object} params.model_details - Model details including id and cost structure
-     * @returns {Function} Function that takes usageMetadata and returns an array of token usage objects with costs
-     */
-    static create_usage_calculator = ({ model_details }) => {
-        return ({ usageMetadata }) => {
-            const tokens = [];
-
-            tokens.push({
-                type: 'prompt',
-                model: model_details.id,
-                amount: usageMetadata.promptTokenCount,
-                cost: model_details.cost.input * usageMetadata.promptTokenCount,
-            });
-
-            tokens.push({
-                type: 'completion',
-                model: model_details.id,
-                amount: usageMetadata.candidatesTokenCount,
-                cost: model_details.cost.output * usageMetadata.candidatesTokenCount,
-            });
-
-            return tokens;
-        };
-    };
-
-    /**
-     * Creates a handler function for processing Gemini API streaming chat responses.
-     * The handler processes chunks from the stream, managing text and tool call content blocks,
-     * and resolves usage metadata when streaming completes.
-     *
-     * @param {Object} params - Configuration object
-     * @param {Object} params.stream - Gemini GenerateContentStreamResult stream
-     * @param {Function} params.usageCallback - Callback function to handle usage metadata
-     * @returns {Function} Async function that processes the chat stream and manages content blocks
-     */
-    static create_chat_stream_handler = ({
-        stream, // GenerateContentStreamResult:stream
-        usageCallback,
-    }) => async ({ chatStream }) => {
-        const message = chatStream.message();
-
-        let textblock = message.contentBlock({ type: 'text' });
-        let toolblock = null;
-        let mode = 'text';
-
-        let last_usage = null;
-        for await ( const chunk of stream ) {
-            // This is spread across several lines so that the stack trace
-            // is more helpful if we get an exception because of an
-            // inconsistent response from the model.
-            const candidate = chunk.candidates[0];
-            const content = candidate.content;
-            const parts = content.parts;
-            for ( const part of parts ) {
-                if ( part.functionCall ) {
-                    if ( mode === 'text' ) {
-                        mode = 'tool';
-                        textblock.end();
-                    }
-
-                    toolblock = message.contentBlock({
-                        type: 'tool_use',
-                        id: part.functionCall.name,
-                        name: part.functionCall.name,
-                    });
-                    toolblock.addPartialJSON(JSON.stringify(part.functionCall.args));
-
-                    continue;
-                }
-
-                if ( mode === 'tool' ) {
-                    mode = 'text';
-                    toolblock.end();
-                    textblock = message.contentBlock({ type: 'text' });
-                }
-
-                // assume text as default
-                const text = part.text;
-                if ( text ) {
-                    textblock.addText(text);
-                }
-            }
-
-            last_usage = chunk.usageMetadata;
-        }
-
-        usageCallback(last_usage);
-
-        if ( mode === 'text' ) textblock.end();
-        if ( mode === 'tool' ) toolblock.end();
-        message.end();
-        chatStream.end();
-    };
-};
@@ -35,6 +35,7 @@ const process_input_messages = async (messages) => {
                        name: content_block.name,
                        arguments: JSON.stringify(content_block.input),
                    },
+                    ...(content_block.extra_content?{extra_content: content_block.extra_content}:{})
                });
                content.splice(i, 1);
            }
@@ -131,6 +132,14 @@ const create_chat_stream_handler = ({
            continue;
        }

+        if (choice.delta.extra_content) {
+            // Gemini specific thing for metadata, we will basically be appending onto the current message by abusing .addText a little
+            // Apps have to choose to handle extra_content themselves, it doesn't seem like theres a way we can do it in a backwards 
+            // compatible fashion since most streaming apps will handle chat history by continuously updating content themselves
+            // This doesn't present us a chance to add in an extra object for gemini's chat continuing features
+            textblock.addExtraContent(choice.delta.extra_content);
+        }
+
        const tool_calls = deviations.index_tool_calls_from_stream_choice(choice);
        if ( tool_calls ) {
            if ( mode === 'text' ) {
@@ -143,6 +152,7 @@ const create_chat_stream_handler = ({
                        type: 'tool_use',
                        id: tool_call.id,
                        name: tool_call.function.name,
+                        ...(tool_call.extra_content ? {extra_content: tool_call.extra_content}: {})
                    });
                    tool_call_blocks[tool_call.index] = toolblock;
                } else {
@@ -29,9 +29,10 @@ class AIChatConstructStream {
 }

 class AIChatTextStream extends AIChatConstructStream {
-    addText (text) {
+    addText (text, extra_content) {
        const json = JSON.stringify({
            type: 'text', text,
+            ...(extra_content?{extra_content}:{})
        });
        this.chatStream.stream.write(`${json }\n`);
    }
@@ -42,6 +43,14 @@ class AIChatTextStream extends AIChatConstructStream {
        });
        this.chatStream.stream.write(`${json }\n`);
    }
+
+    addExtraContent(extra_content) {
+        const json = JSON.stringify({
+            type: 'extra_content',
+            extra_content
+        });
+        this.chatStream.stream.write(`${json }\n`);
+    }
 }

 class AIChatToolUseStream extends AIChatConstructStream {
@@ -9,9 +9,19 @@
 */
 export const GEMINI_COST_MAP = {
    // Gemini api usage types (costs per token in microcents)
+    'gemini:gemini-1.5-flash:promptTokenCount': 7.5,
+    'gemini:gemini-1.5-flash:candidatesTokenCount': 30,
    'gemini:gemini-2.0-flash:promptTokenCount': 10,
    'gemini:gemini-2.0-flash:candidatesTokenCount': 40,
-    'gemini:gemini-1.5-flash:promptTokenCount': 3,
-    'gemini:gemini-1.5-flash:candidatesTokenCount': 2,
+    'gemini:gemini-2.0-flash-lite:promptTokenCount': 8,
+    'gemini:gemini-2.0-flash-lite:candidatesTokenCount': 32,
+    'gemini:gemini-2.5-flash:promptTokenCount': 12,
+    'gemini:gemini-2.5-flash:candidatesTokenCount': 48,
+    'gemini:gemini-2.5-flash-lite:promptTokenCount': 10,
+    'gemini:gemini-2.5-flash-lite:candidatesTokenCount': 40,
+    'gemini:gemini-2.5-pro:promptTokenCount': 15,
+    'gemini:gemini-2.5-pro:candidatesTokenCount': 60,
+    'gemini:gemini-3-pro-preview:promptTokenCount': 25,
+    'gemini:gemini-3-pro-preview:candidatesTokenCount': 100,
    'gemini:gemini-2.5-flash-image-preview:1024x1024': 3_900_000,
 };