Gemini to OpenAI SDK refactor (#2014)

* WIP Gemini OpenAI refactor

* refactor: gemini open ai service + geminiCostMap

* Gemini Service fixes

* Cleaning up old Gemini cruft

---------

Co-authored-by: Daniel Salazar <daniel.salazar@puter.com>
This commit is contained in:
Neal Shah
2025-11-24 09:55:19 +04:00
committed by GitHub
parent 7e1d50d0d1
commit ed2d1368ca
9 changed files with 235 additions and 384 deletions
@@ -1,204 +0,0 @@
const BaseService = require('../../services/BaseService');
const { GoogleGenerativeAI } = require('@google/generative-ai');
const GeminiSquareHole = require('./lib/GeminiSquareHole');
const FunctionCalling = require('./lib/FunctionCalling');
const { Context } = require('../../util/context');
class GeminiService extends BaseService {
/**
* @type {import('../../services/MeteringService/MeteringService').MeteringService}
*/
meteringService = undefined;
async _init () {
const svc_aiChat = this.services.get('ai-chat');
svc_aiChat.register_provider({
service_name: this.service_name,
alias: true,
});
this.meteringService = this.services.get('meteringService').meteringService;
}
static IMPLEMENTS = {
['puter-chat-completion']: {
async models () {
return await this.models_();
},
async list () {
const models = await this.models_();
const model_names = [];
for ( const model of models ) {
model_names.push(model.id);
if ( model.aliases ) {
model_names.push(...model.aliases);
}
}
return model_names;
},
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
tools = FunctionCalling.make_gemini_tools(tools);
model = model ?? 'gemini-2.0-flash';
const genAI = new GoogleGenerativeAI(this.config.apiKey);
const genModel = genAI.getGenerativeModel({
model,
tools,
generationConfig: {
temperature: temperature, // Set temperature (0.0 to 1.0). Defaults to 0.7
maxOutputTokens: max_tokens, // Note: it's maxOutputTokens, not max_tokens
},
});
messages = await GeminiSquareHole.process_input_messages(messages);
// History is separate, so the last message gets special treatment.
const last_message = messages.pop();
const last_message_parts = last_message.parts.map(part => typeof part === 'string' ? part :
typeof part.text === 'string' ? part.text :
part);
const chat = genModel.startChat({
history: messages,
});
const usage_calculator = GeminiSquareHole.create_usage_calculator({
model_details: (await this.models_()).find(m => m.id === model),
});
// Metering integration
const actor = Context.get('actor');
const meteringPrefix = `gemini:${model}`;
if ( stream ) {
const genResult = await chat.sendMessageStream(last_message_parts);
const stream = genResult.stream;
return {
stream: true,
init_chat_stream:
GeminiSquareHole.create_chat_stream_handler({
stream,
usageCallback: (usageMetadata) => {
// TODO DS: dedup this logic
const trackedUsage = {
prompt_tokens: usageMetadata.promptTokenCount - (usageMetadata.cachedContentTokenCount || 0),
completion_tokens: usageMetadata.candidatesTokenCount,
cached_tokens: usageMetadata.cachedContentTokenCount || 0,
};
this.meteringService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix);
},
}),
};
} else {
const genResult = await chat.sendMessage(last_message_parts);
const message = genResult.response.candidates[0];
message.content = message.content.parts;
message.role = 'assistant';
const result = { message };
result.usage = usage_calculator(genResult.response);
// TODO DS: dedup this logic
const trackedUsage = {
prompt_tokens: genResult.response.usageMetadata.promptTokenCount - (genResult.cachedContentTokenCount || 0),
completion_tokens: genResult.response.usageMetadata.candidatesTokenCount,
cached_tokens: genResult.response.usageMetadata.cachedContentTokenCount || 0,
};
this.meteringService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix);
return result;
}
},
},
};
async models_ () {
return [
{
id: 'gemini-1.5-flash',
name: 'Gemini 1.5 Flash',
context: 131072,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 7.5,
output: 30,
},
max_tokens: 8192,
},
{
id: 'gemini-2.0-flash',
name: 'Gemini 2.0 Flash',
context: 131072,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 10,
output: 40,
},
max_tokens: 8192,
},
{
id: 'gemini-2.0-flash-lite',
name: 'Gemini 2.0 Flash-Lite',
context: 1_048_576,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 8,
output: 32,
},
max_tokens: 8192,
},
{
id: 'gemini-2.5-flash',
name: 'Gemini 2.5 Flash',
context: 1_048_576,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 12,
output: 48,
},
max_tokens: 65536,
},
{
id: 'gemini-2.5-flash-lite',
name: 'Gemini 2.5 Flash-Lite',
context: 1_048_576,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 10,
output: 40,
},
max_tokens: 65536,
},
{
id: 'gemini-2.5-pro',
name: 'Gemini 2.5 Pro',
context: 1_048_576,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 15,
output: 60,
},
max_tokens: 65536,
},
{
id: 'gemini-3-pro-preview',
name: 'Gemini 3 Pro',
context: 1_048_576,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 25,
output: 100,
},
max_tokens: 65536,
},
];
}
}
module.exports = { GeminiService };
@@ -0,0 +1,114 @@
// Preamble: Before this we used Gemini's SDK directly and as we found out
// its actually kind of terrible. So we use the openai sdk now
import BaseService from '../../../services/BaseService.js';
import openai from 'openai';
import OpenAIUtil from '../lib/OpenAIUtil.js';
import { Context } from '../../../util/context.js';
import { models } from './models.mjs';
export class GeminiService extends BaseService {
/**
* @type {import('../../services/MeteringService/MeteringService').MeteringService}
*/
meteringService = undefined;
defaultModel = 'gemini-2.5-flash';
static IMPLEMENTS = {
['puter-chat-completion']: {
async models () {
return await this.models();
},
async complete (...args) {
return await this.complete(...args);
},
async list () {
return await this.list();
},
},
};
async _init () {
this.openai = new openai.OpenAI({
apiKey: this.config.apiKey,
baseURL: 'https://generativelanguage.googleapis.com/v1beta/openai/',
});
const svc_aiChat = this.services.get('ai-chat');
svc_aiChat.register_provider({
service_name: this.service_name,
alias: true,
});
this.meteringService = this.services.get('meteringService').meteringService;
}
get_default_model () {
return this.defaultModel;
}
async models () {
return models;
}
async list () {
const model_names = [];
for ( const model of models ) {
model_names.push(model.id);
if ( model.aliases ) {
model_names.push(...model.aliases);
}
}
return model_names;
}
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
const actor = Context.get('actor');
messages = await OpenAIUtil.process_input_messages(messages);
// delete cache_control
messages = messages.map(m => {
delete m.cache_control;
return m;
});
const sdk_params = {
messages: messages,
model: model,
...(tools ? { tools } : {}),
...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
...(temperature ? { temperature } : {}),
stream,
...(stream ? {
stream_options: { include_usage: true },
} : {}),
};
let completion;
try {
completion = await this.openai.chat.completions.create(sdk_params);
} catch (e) {
console.error('Gemini completion error: ', e);
throw e;
}
const modelDetails = (await this.models()).find(m => m.id === model);
return OpenAIUtil.handle_completion_output({
usage_calculator: ({ usage }) => {
const trackedUsage = {
prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
completion_tokens: usage.completion_tokens ?? 0,
cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
};
this.meteringService.utilRecordUsageObject(trackedUsage, actor, `gemini:${modelDetails.id}`);
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
model_details: modelDetails,
});
return legacyCostCalculator({ usage });
},
stream,
completion,
});
}
}
@@ -0,0 +1,86 @@
export const models = [
{
id: 'gemini-1.5-flash',
name: 'Gemini 1.5 Flash',
context: 131072,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 7.5,
output: 30,
},
max_tokens: 8192,
},
{
id: 'gemini-2.0-flash',
name: 'Gemini 2.0 Flash',
context: 131072,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 10,
output: 40,
},
max_tokens: 8192,
},
{
id: 'gemini-2.0-flash-lite',
name: 'Gemini 2.0 Flash-Lite',
context: 1_048_576,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 8,
output: 32,
},
max_tokens: 8192,
},
{
id: 'gemini-2.5-flash',
name: 'Gemini 2.5 Flash',
context: 1_048_576,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 12,
output: 48,
},
max_tokens: 65536,
},
{
id: 'gemini-2.5-flash-lite',
name: 'Gemini 2.5 Flash-Lite',
context: 1_048_576,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 10,
output: 40,
},
max_tokens: 65536,
},
{
id: 'gemini-2.5-pro',
name: 'Gemini 2.5 Pro',
context: 1_048_576,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 15,
output: 60,
},
max_tokens: 65536,
},
{
id: 'gemini-3-pro-preview',
name: 'Gemini 3 Pro',
context: 1_048_576,
cost: {
currency: 'usd-cents',
tokens: 1_000_000,
input: 25,
output: 100,
},
max_tokens: 65536,
},
];
@@ -116,7 +116,7 @@ class PuterAIModule extends AdvancedBase {
services.registerService('deepseek', DeepSeekService);
}
if ( config?.services?.['gemini'] ) {
const { GeminiService } = require('./GeminiService');
const { GeminiService } = require('./GeminiService/GeminiService.mjs');
const { GeminiImageGenerationService } = require('./GeminiImageGenerationService');
services.registerService('gemini', GeminiService);
@@ -129,13 +129,13 @@ class PuterAIModule extends AdvancedBase {
// Autodiscover Ollama service and then check if its disabled in the config
// if config.services.ollama.enabled is undefined, it means the user hasn't set it, so we should default to true
const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(data => {
const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(_data => {
const ollama_enabled = config?.services?.['ollama']?.enabled;
if ( ollama_enabled === undefined ) {
return true;
}
return ollama_enabled;
}).catch(err => {
}).catch(_err => {
return false;
});
// User can disable ollama in the config, but by default it should be enabled if discovery is successful
@@ -119,19 +119,4 @@ module.exports = class FunctionCalling {
};
});
}
static make_gemini_tools (tools) {
if ( Array.isArray(tools) ) {
return [
{
function_declarations: tools.map(t => {
const tool = t.function;
delete tool.parameters.additionalProperties;
return tool;
}),
},
];
};
}
};
@@ -1,159 +0,0 @@
/**
* Technically this should be called "GeminiUtil",
* but Google's AI API defies all the established conventions
* so it made sense to defy them here as well.
*/
/**
* Utility class for handling Google Gemini API message transformations and streaming.
*/
module.exports = class GeminiSquareHole {
/**
* Transforms messages from standard format to Gemini API format.
* Converts 'content' to 'parts', 'assistant' role to 'model', and transforms
* tool_use/tool_result/text parts into Gemini's expected structure.
*
* @param {Array} messages - Array of message objects to transform
* @returns {Promise<Array>} Transformed messages compatible with Gemini API
*/
static process_input_messages = async (messages) => {
messages = messages.slice();
for ( const msg of messages ) {
msg.parts = msg.content;
delete msg.content;
if ( msg.role === 'assistant' ) {
msg.role = 'model';
}
for ( let i = 0 ; i < msg.parts.length ; i++ ) {
const part = msg.parts[i];
if ( part.type === 'tool_use' ) {
msg.parts[i] = {
functionCall: {
name: part.id,
args: part.input,
},
};
}
if ( part.type === 'tool_result' ) {
msg.parts[i] = {
functionResponse: {
name: part.tool_use_id,
response: {
name: part.tool_use_id,
content: part.content,
},
},
};
}
if ( part.type === 'text' ) {
msg.parts[i] = {
text: part.text,
};
}
}
}
return messages;
};
/**
* Creates a function that calculates token usage and associated costs from Gemini API response metadata.
*
* @param {Object} params - Configuration object
* @param {Object} params.model_details - Model details including id and cost structure
* @returns {Function} Function that takes usageMetadata and returns an array of token usage objects with costs
*/
static create_usage_calculator = ({ model_details }) => {
return ({ usageMetadata }) => {
const tokens = [];
tokens.push({
type: 'prompt',
model: model_details.id,
amount: usageMetadata.promptTokenCount,
cost: model_details.cost.input * usageMetadata.promptTokenCount,
});
tokens.push({
type: 'completion',
model: model_details.id,
amount: usageMetadata.candidatesTokenCount,
cost: model_details.cost.output * usageMetadata.candidatesTokenCount,
});
return tokens;
};
};
/**
* Creates a handler function for processing Gemini API streaming chat responses.
* The handler processes chunks from the stream, managing text and tool call content blocks,
* and resolves usage metadata when streaming completes.
*
* @param {Object} params - Configuration object
* @param {Object} params.stream - Gemini GenerateContentStreamResult stream
* @param {Function} params.usageCallback - Callback function to handle usage metadata
* @returns {Function} Async function that processes the chat stream and manages content blocks
*/
static create_chat_stream_handler = ({
stream, // GenerateContentStreamResult:stream
usageCallback,
}) => async ({ chatStream }) => {
const message = chatStream.message();
let textblock = message.contentBlock({ type: 'text' });
let toolblock = null;
let mode = 'text';
let last_usage = null;
for await ( const chunk of stream ) {
// This is spread across several lines so that the stack trace
// is more helpful if we get an exception because of an
// inconsistent response from the model.
const candidate = chunk.candidates[0];
const content = candidate.content;
const parts = content.parts;
for ( const part of parts ) {
if ( part.functionCall ) {
if ( mode === 'text' ) {
mode = 'tool';
textblock.end();
}
toolblock = message.contentBlock({
type: 'tool_use',
id: part.functionCall.name,
name: part.functionCall.name,
});
toolblock.addPartialJSON(JSON.stringify(part.functionCall.args));
continue;
}
if ( mode === 'tool' ) {
mode = 'text';
toolblock.end();
textblock = message.contentBlock({ type: 'text' });
}
// assume text as default
const text = part.text;
if ( text ) {
textblock.addText(text);
}
}
last_usage = chunk.usageMetadata;
}
usageCallback(last_usage);
if ( mode === 'text' ) textblock.end();
if ( mode === 'tool' ) toolblock.end();
message.end();
chatStream.end();
};
};
@@ -35,6 +35,7 @@ const process_input_messages = async (messages) => {
name: content_block.name,
arguments: JSON.stringify(content_block.input),
},
...(content_block.extra_content?{extra_content: content_block.extra_content}:{})
});
content.splice(i, 1);
}
@@ -131,6 +132,14 @@ const create_chat_stream_handler = ({
continue;
}
if (choice.delta.extra_content) {
// Gemini specific thing for metadata, we will basically be appending onto the current message by abusing .addText a little
// Apps have to choose to handle extra_content themselves, it doesn't seem like theres a way we can do it in a backwards
// compatible fashion since most streaming apps will handle chat history by continuously updating content themselves
// This doesn't present us a chance to add in an extra object for gemini's chat continuing features
textblock.addExtraContent(choice.delta.extra_content);
}
const tool_calls = deviations.index_tool_calls_from_stream_choice(choice);
if ( tool_calls ) {
if ( mode === 'text' ) {
@@ -143,6 +152,7 @@ const create_chat_stream_handler = ({
type: 'tool_use',
id: tool_call.id,
name: tool_call.function.name,
...(tool_call.extra_content ? {extra_content: tool_call.extra_content}: {})
});
tool_call_blocks[tool_call.index] = toolblock;
} else {
@@ -29,9 +29,10 @@ class AIChatConstructStream {
}
class AIChatTextStream extends AIChatConstructStream {
addText (text) {
addText (text, extra_content) {
const json = JSON.stringify({
type: 'text', text,
...(extra_content?{extra_content}:{})
});
this.chatStream.stream.write(`${json }\n`);
}
@@ -42,6 +43,14 @@ class AIChatTextStream extends AIChatConstructStream {
});
this.chatStream.stream.write(`${json }\n`);
}
addExtraContent(extra_content) {
const json = JSON.stringify({
type: 'extra_content',
extra_content
});
this.chatStream.stream.write(`${json }\n`);
}
}
class AIChatToolUseStream extends AIChatConstructStream {
@@ -9,9 +9,19 @@
*/
export const GEMINI_COST_MAP = {
// Gemini api usage types (costs per token in microcents)
'gemini:gemini-1.5-flash:promptTokenCount': 7.5,
'gemini:gemini-1.5-flash:candidatesTokenCount': 30,
'gemini:gemini-2.0-flash:promptTokenCount': 10,
'gemini:gemini-2.0-flash:candidatesTokenCount': 40,
'gemini:gemini-1.5-flash:promptTokenCount': 3,
'gemini:gemini-1.5-flash:candidatesTokenCount': 2,
'gemini:gemini-2.0-flash-lite:promptTokenCount': 8,
'gemini:gemini-2.0-flash-lite:candidatesTokenCount': 32,
'gemini:gemini-2.5-flash:promptTokenCount': 12,
'gemini:gemini-2.5-flash:candidatesTokenCount': 48,
'gemini:gemini-2.5-flash-lite:promptTokenCount': 10,
'gemini:gemini-2.5-flash-lite:candidatesTokenCount': 40,
'gemini:gemini-2.5-pro:promptTokenCount': 15,
'gemini:gemini-2.5-pro:candidatesTokenCount': 60,
'gemini:gemini-3-pro-preview:promptTokenCount': 25,
'gemini:gemini-3-pro-preview:candidatesTokenCount': 100,
'gemini:gemini-2.5-flash-image-preview:1024x1024': 3_900_000,
};