mirror of
https://github.com/HeyPuter/puter.git
synced 2026-05-04 00:20:45 +00:00
Gemini to OpenAI SDK refactor (#2014)
* WIP Gemini OpenAI refactor * refactor: gemini open ai service + geminiCostMap * Gemini Service fixes * Cleaning up old Gemini cruft --------- Co-authored-by: Daniel Salazar <daniel.salazar@puter.com>
This commit is contained in:
@@ -1,204 +0,0 @@
|
||||
const BaseService = require('../../services/BaseService');
|
||||
const { GoogleGenerativeAI } = require('@google/generative-ai');
|
||||
const GeminiSquareHole = require('./lib/GeminiSquareHole');
|
||||
const FunctionCalling = require('./lib/FunctionCalling');
|
||||
const { Context } = require('../../util/context');
|
||||
|
||||
class GeminiService extends BaseService {
|
||||
/**
|
||||
* @type {import('../../services/MeteringService/MeteringService').MeteringService}
|
||||
*/
|
||||
meteringService = undefined;
|
||||
|
||||
async _init () {
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringService = this.services.get('meteringService').meteringService;
|
||||
}
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['puter-chat-completion']: {
|
||||
async models () {
|
||||
return await this.models_();
|
||||
},
|
||||
async list () {
|
||||
const models = await this.models_();
|
||||
const model_names = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
model_names.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return model_names;
|
||||
},
|
||||
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
tools = FunctionCalling.make_gemini_tools(tools);
|
||||
|
||||
model = model ?? 'gemini-2.0-flash';
|
||||
const genAI = new GoogleGenerativeAI(this.config.apiKey);
|
||||
const genModel = genAI.getGenerativeModel({
|
||||
model,
|
||||
tools,
|
||||
generationConfig: {
|
||||
temperature: temperature, // Set temperature (0.0 to 1.0). Defaults to 0.7
|
||||
maxOutputTokens: max_tokens, // Note: it's maxOutputTokens, not max_tokens
|
||||
},
|
||||
});
|
||||
|
||||
messages = await GeminiSquareHole.process_input_messages(messages);
|
||||
|
||||
// History is separate, so the last message gets special treatment.
|
||||
const last_message = messages.pop();
|
||||
const last_message_parts = last_message.parts.map(part => typeof part === 'string' ? part :
|
||||
typeof part.text === 'string' ? part.text :
|
||||
part);
|
||||
|
||||
const chat = genModel.startChat({
|
||||
history: messages,
|
||||
});
|
||||
|
||||
const usage_calculator = GeminiSquareHole.create_usage_calculator({
|
||||
model_details: (await this.models_()).find(m => m.id === model),
|
||||
});
|
||||
|
||||
// Metering integration
|
||||
const actor = Context.get('actor');
|
||||
const meteringPrefix = `gemini:${model}`;
|
||||
if ( stream ) {
|
||||
const genResult = await chat.sendMessageStream(last_message_parts);
|
||||
const stream = genResult.stream;
|
||||
|
||||
return {
|
||||
stream: true,
|
||||
init_chat_stream:
|
||||
GeminiSquareHole.create_chat_stream_handler({
|
||||
stream,
|
||||
usageCallback: (usageMetadata) => {
|
||||
// TODO DS: dedup this logic
|
||||
const trackedUsage = {
|
||||
prompt_tokens: usageMetadata.promptTokenCount - (usageMetadata.cachedContentTokenCount || 0),
|
||||
completion_tokens: usageMetadata.candidatesTokenCount,
|
||||
cached_tokens: usageMetadata.cachedContentTokenCount || 0,
|
||||
};
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix);
|
||||
},
|
||||
}),
|
||||
};
|
||||
} else {
|
||||
const genResult = await chat.sendMessage(last_message_parts);
|
||||
|
||||
const message = genResult.response.candidates[0];
|
||||
message.content = message.content.parts;
|
||||
message.role = 'assistant';
|
||||
|
||||
const result = { message };
|
||||
result.usage = usage_calculator(genResult.response);
|
||||
// TODO DS: dedup this logic
|
||||
const trackedUsage = {
|
||||
prompt_tokens: genResult.response.usageMetadata.promptTokenCount - (genResult.cachedContentTokenCount || 0),
|
||||
completion_tokens: genResult.response.usageMetadata.candidatesTokenCount,
|
||||
cached_tokens: genResult.response.usageMetadata.cachedContentTokenCount || 0,
|
||||
};
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, meteringPrefix);
|
||||
return result;
|
||||
}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
async models_ () {
|
||||
return [
|
||||
{
|
||||
id: 'gemini-1.5-flash',
|
||||
name: 'Gemini 1.5 Flash',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 7.5,
|
||||
output: 30,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.0-flash',
|
||||
name: 'Gemini 2.0 Flash',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 40,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.0-flash-lite',
|
||||
name: 'Gemini 2.0 Flash-Lite',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 8,
|
||||
output: 32,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-flash',
|
||||
name: 'Gemini 2.5 Flash',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 12,
|
||||
output: 48,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-flash-lite',
|
||||
name: 'Gemini 2.5 Flash-Lite',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 40,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-pro',
|
||||
name: 'Gemini 2.5 Pro',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 15,
|
||||
output: 60,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'gemini-3-pro-preview',
|
||||
name: 'Gemini 3 Pro',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 25,
|
||||
output: 100,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { GeminiService };
|
||||
@@ -0,0 +1,114 @@
|
||||
// Preamble: Before this we used Gemini's SDK directly and as we found out
|
||||
// its actually kind of terrible. So we use the openai sdk now
|
||||
import BaseService from '../../../services/BaseService.js';
|
||||
import openai from 'openai';
|
||||
import OpenAIUtil from '../lib/OpenAIUtil.js';
|
||||
import { Context } from '../../../util/context.js';
|
||||
import { models } from './models.mjs';
|
||||
|
||||
|
||||
export class GeminiService extends BaseService {
|
||||
/**
|
||||
* @type {import('../../services/MeteringService/MeteringService').MeteringService}
|
||||
*/
|
||||
meteringService = undefined;
|
||||
|
||||
defaultModel = 'gemini-2.5-flash';
|
||||
|
||||
static IMPLEMENTS = {
|
||||
['puter-chat-completion']: {
|
||||
async models () {
|
||||
return await this.models();
|
||||
},
|
||||
async complete (...args) {
|
||||
return await this.complete(...args);
|
||||
},
|
||||
async list () {
|
||||
return await this.list();
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
async _init () {
|
||||
this.openai = new openai.OpenAI({
|
||||
apiKey: this.config.apiKey,
|
||||
baseURL: 'https://generativelanguage.googleapis.com/v1beta/openai/',
|
||||
});
|
||||
|
||||
const svc_aiChat = this.services.get('ai-chat');
|
||||
svc_aiChat.register_provider({
|
||||
service_name: this.service_name,
|
||||
alias: true,
|
||||
});
|
||||
this.meteringService = this.services.get('meteringService').meteringService;
|
||||
}
|
||||
|
||||
get_default_model () {
|
||||
return this.defaultModel;
|
||||
}
|
||||
|
||||
async models () {
|
||||
return models;
|
||||
}
|
||||
async list () {
|
||||
const model_names = [];
|
||||
for ( const model of models ) {
|
||||
model_names.push(model.id);
|
||||
if ( model.aliases ) {
|
||||
model_names.push(...model.aliases);
|
||||
}
|
||||
}
|
||||
return model_names;
|
||||
}
|
||||
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
|
||||
const actor = Context.get('actor');
|
||||
messages = await OpenAIUtil.process_input_messages(messages);
|
||||
|
||||
// delete cache_control
|
||||
messages = messages.map(m => {
|
||||
delete m.cache_control;
|
||||
return m;
|
||||
});
|
||||
|
||||
const sdk_params = {
|
||||
messages: messages,
|
||||
model: model,
|
||||
...(tools ? { tools } : {}),
|
||||
...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
|
||||
...(temperature ? { temperature } : {}),
|
||||
stream,
|
||||
...(stream ? {
|
||||
stream_options: { include_usage: true },
|
||||
} : {}),
|
||||
};
|
||||
|
||||
let completion;
|
||||
try {
|
||||
completion = await this.openai.chat.completions.create(sdk_params);
|
||||
} catch (e) {
|
||||
console.error('Gemini completion error: ', e);
|
||||
throw e;
|
||||
}
|
||||
|
||||
const modelDetails = (await this.models()).find(m => m.id === model);
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: ({ usage }) => {
|
||||
const trackedUsage = {
|
||||
prompt_tokens: (usage.prompt_tokens ?? 0) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
|
||||
completion_tokens: usage.completion_tokens ?? 0,
|
||||
cached_tokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
|
||||
};
|
||||
|
||||
this.meteringService.utilRecordUsageObject(trackedUsage, actor, `gemini:${modelDetails.id}`);
|
||||
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
|
||||
model_details: modelDetails,
|
||||
});
|
||||
|
||||
return legacyCostCalculator({ usage });
|
||||
},
|
||||
stream,
|
||||
completion,
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
export const models = [
|
||||
{
|
||||
id: 'gemini-1.5-flash',
|
||||
name: 'Gemini 1.5 Flash',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 7.5,
|
||||
output: 30,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.0-flash',
|
||||
name: 'Gemini 2.0 Flash',
|
||||
context: 131072,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 40,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.0-flash-lite',
|
||||
name: 'Gemini 2.0 Flash-Lite',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 8,
|
||||
output: 32,
|
||||
},
|
||||
max_tokens: 8192,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-flash',
|
||||
name: 'Gemini 2.5 Flash',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 12,
|
||||
output: 48,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-flash-lite',
|
||||
name: 'Gemini 2.5 Flash-Lite',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 10,
|
||||
output: 40,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'gemini-2.5-pro',
|
||||
name: 'Gemini 2.5 Pro',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 15,
|
||||
output: 60,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
{
|
||||
id: 'gemini-3-pro-preview',
|
||||
name: 'Gemini 3 Pro',
|
||||
context: 1_048_576,
|
||||
cost: {
|
||||
currency: 'usd-cents',
|
||||
tokens: 1_000_000,
|
||||
input: 25,
|
||||
output: 100,
|
||||
},
|
||||
max_tokens: 65536,
|
||||
},
|
||||
];
|
||||
@@ -116,7 +116,7 @@ class PuterAIModule extends AdvancedBase {
|
||||
services.registerService('deepseek', DeepSeekService);
|
||||
}
|
||||
if ( config?.services?.['gemini'] ) {
|
||||
const { GeminiService } = require('./GeminiService');
|
||||
const { GeminiService } = require('./GeminiService/GeminiService.mjs');
|
||||
const { GeminiImageGenerationService } = require('./GeminiImageGenerationService');
|
||||
|
||||
services.registerService('gemini', GeminiService);
|
||||
@@ -129,13 +129,13 @@ class PuterAIModule extends AdvancedBase {
|
||||
|
||||
// Autodiscover Ollama service and then check if its disabled in the config
|
||||
// if config.services.ollama.enabled is undefined, it means the user hasn't set it, so we should default to true
|
||||
const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(data => {
|
||||
const ollama_available = await fetch('http://localhost:11434/api/tags').then(resp => resp.json()).then(_data => {
|
||||
const ollama_enabled = config?.services?.['ollama']?.enabled;
|
||||
if ( ollama_enabled === undefined ) {
|
||||
return true;
|
||||
}
|
||||
return ollama_enabled;
|
||||
}).catch(err => {
|
||||
}).catch(_err => {
|
||||
return false;
|
||||
});
|
||||
// User can disable ollama in the config, but by default it should be enabled if discovery is successful
|
||||
|
||||
@@ -119,19 +119,4 @@ module.exports = class FunctionCalling {
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
static make_gemini_tools (tools) {
|
||||
if ( Array.isArray(tools) ) {
|
||||
return [
|
||||
{
|
||||
function_declarations: tools.map(t => {
|
||||
const tool = t.function;
|
||||
delete tool.parameters.additionalProperties;
|
||||
return tool;
|
||||
}),
|
||||
},
|
||||
];
|
||||
};
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
/**
|
||||
* Technically this should be called "GeminiUtil",
|
||||
* but Google's AI API defies all the established conventions
|
||||
* so it made sense to defy them here as well.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Utility class for handling Google Gemini API message transformations and streaming.
|
||||
*/
|
||||
module.exports = class GeminiSquareHole {
|
||||
/**
|
||||
* Transforms messages from standard format to Gemini API format.
|
||||
* Converts 'content' to 'parts', 'assistant' role to 'model', and transforms
|
||||
* tool_use/tool_result/text parts into Gemini's expected structure.
|
||||
*
|
||||
* @param {Array} messages - Array of message objects to transform
|
||||
* @returns {Promise<Array>} Transformed messages compatible with Gemini API
|
||||
*/
|
||||
static process_input_messages = async (messages) => {
|
||||
messages = messages.slice();
|
||||
|
||||
for ( const msg of messages ) {
|
||||
msg.parts = msg.content;
|
||||
delete msg.content;
|
||||
|
||||
if ( msg.role === 'assistant' ) {
|
||||
msg.role = 'model';
|
||||
}
|
||||
|
||||
for ( let i = 0 ; i < msg.parts.length ; i++ ) {
|
||||
const part = msg.parts[i];
|
||||
if ( part.type === 'tool_use' ) {
|
||||
msg.parts[i] = {
|
||||
functionCall: {
|
||||
name: part.id,
|
||||
args: part.input,
|
||||
},
|
||||
};
|
||||
}
|
||||
if ( part.type === 'tool_result' ) {
|
||||
msg.parts[i] = {
|
||||
functionResponse: {
|
||||
name: part.tool_use_id,
|
||||
response: {
|
||||
name: part.tool_use_id,
|
||||
content: part.content,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
if ( part.type === 'text' ) {
|
||||
msg.parts[i] = {
|
||||
text: part.text,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return messages;
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a function that calculates token usage and associated costs from Gemini API response metadata.
|
||||
*
|
||||
* @param {Object} params - Configuration object
|
||||
* @param {Object} params.model_details - Model details including id and cost structure
|
||||
* @returns {Function} Function that takes usageMetadata and returns an array of token usage objects with costs
|
||||
*/
|
||||
static create_usage_calculator = ({ model_details }) => {
|
||||
return ({ usageMetadata }) => {
|
||||
const tokens = [];
|
||||
|
||||
tokens.push({
|
||||
type: 'prompt',
|
||||
model: model_details.id,
|
||||
amount: usageMetadata.promptTokenCount,
|
||||
cost: model_details.cost.input * usageMetadata.promptTokenCount,
|
||||
});
|
||||
|
||||
tokens.push({
|
||||
type: 'completion',
|
||||
model: model_details.id,
|
||||
amount: usageMetadata.candidatesTokenCount,
|
||||
cost: model_details.cost.output * usageMetadata.candidatesTokenCount,
|
||||
});
|
||||
|
||||
return tokens;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a handler function for processing Gemini API streaming chat responses.
|
||||
* The handler processes chunks from the stream, managing text and tool call content blocks,
|
||||
* and resolves usage metadata when streaming completes.
|
||||
*
|
||||
* @param {Object} params - Configuration object
|
||||
* @param {Object} params.stream - Gemini GenerateContentStreamResult stream
|
||||
* @param {Function} params.usageCallback - Callback function to handle usage metadata
|
||||
* @returns {Function} Async function that processes the chat stream and manages content blocks
|
||||
*/
|
||||
static create_chat_stream_handler = ({
|
||||
stream, // GenerateContentStreamResult:stream
|
||||
usageCallback,
|
||||
}) => async ({ chatStream }) => {
|
||||
const message = chatStream.message();
|
||||
|
||||
let textblock = message.contentBlock({ type: 'text' });
|
||||
let toolblock = null;
|
||||
let mode = 'text';
|
||||
|
||||
let last_usage = null;
|
||||
for await ( const chunk of stream ) {
|
||||
// This is spread across several lines so that the stack trace
|
||||
// is more helpful if we get an exception because of an
|
||||
// inconsistent response from the model.
|
||||
const candidate = chunk.candidates[0];
|
||||
const content = candidate.content;
|
||||
const parts = content.parts;
|
||||
for ( const part of parts ) {
|
||||
if ( part.functionCall ) {
|
||||
if ( mode === 'text' ) {
|
||||
mode = 'tool';
|
||||
textblock.end();
|
||||
}
|
||||
|
||||
toolblock = message.contentBlock({
|
||||
type: 'tool_use',
|
||||
id: part.functionCall.name,
|
||||
name: part.functionCall.name,
|
||||
});
|
||||
toolblock.addPartialJSON(JSON.stringify(part.functionCall.args));
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( mode === 'tool' ) {
|
||||
mode = 'text';
|
||||
toolblock.end();
|
||||
textblock = message.contentBlock({ type: 'text' });
|
||||
}
|
||||
|
||||
// assume text as default
|
||||
const text = part.text;
|
||||
if ( text ) {
|
||||
textblock.addText(text);
|
||||
}
|
||||
}
|
||||
|
||||
last_usage = chunk.usageMetadata;
|
||||
}
|
||||
|
||||
usageCallback(last_usage);
|
||||
|
||||
if ( mode === 'text' ) textblock.end();
|
||||
if ( mode === 'tool' ) toolblock.end();
|
||||
message.end();
|
||||
chatStream.end();
|
||||
};
|
||||
};
|
||||
@@ -35,6 +35,7 @@ const process_input_messages = async (messages) => {
|
||||
name: content_block.name,
|
||||
arguments: JSON.stringify(content_block.input),
|
||||
},
|
||||
...(content_block.extra_content?{extra_content: content_block.extra_content}:{})
|
||||
});
|
||||
content.splice(i, 1);
|
||||
}
|
||||
@@ -131,6 +132,14 @@ const create_chat_stream_handler = ({
|
||||
continue;
|
||||
}
|
||||
|
||||
if (choice.delta.extra_content) {
|
||||
// Gemini specific thing for metadata, we will basically be appending onto the current message by abusing .addText a little
|
||||
// Apps have to choose to handle extra_content themselves, it doesn't seem like theres a way we can do it in a backwards
|
||||
// compatible fashion since most streaming apps will handle chat history by continuously updating content themselves
|
||||
// This doesn't present us a chance to add in an extra object for gemini's chat continuing features
|
||||
textblock.addExtraContent(choice.delta.extra_content);
|
||||
}
|
||||
|
||||
const tool_calls = deviations.index_tool_calls_from_stream_choice(choice);
|
||||
if ( tool_calls ) {
|
||||
if ( mode === 'text' ) {
|
||||
@@ -143,6 +152,7 @@ const create_chat_stream_handler = ({
|
||||
type: 'tool_use',
|
||||
id: tool_call.id,
|
||||
name: tool_call.function.name,
|
||||
...(tool_call.extra_content ? {extra_content: tool_call.extra_content}: {})
|
||||
});
|
||||
tool_call_blocks[tool_call.index] = toolblock;
|
||||
} else {
|
||||
|
||||
@@ -29,9 +29,10 @@ class AIChatConstructStream {
|
||||
}
|
||||
|
||||
class AIChatTextStream extends AIChatConstructStream {
|
||||
addText (text) {
|
||||
addText (text, extra_content) {
|
||||
const json = JSON.stringify({
|
||||
type: 'text', text,
|
||||
...(extra_content?{extra_content}:{})
|
||||
});
|
||||
this.chatStream.stream.write(`${json }\n`);
|
||||
}
|
||||
@@ -42,6 +43,14 @@ class AIChatTextStream extends AIChatConstructStream {
|
||||
});
|
||||
this.chatStream.stream.write(`${json }\n`);
|
||||
}
|
||||
|
||||
addExtraContent(extra_content) {
|
||||
const json = JSON.stringify({
|
||||
type: 'extra_content',
|
||||
extra_content
|
||||
});
|
||||
this.chatStream.stream.write(`${json }\n`);
|
||||
}
|
||||
}
|
||||
|
||||
class AIChatToolUseStream extends AIChatConstructStream {
|
||||
|
||||
@@ -9,9 +9,19 @@
|
||||
*/
|
||||
export const GEMINI_COST_MAP = {
|
||||
// Gemini api usage types (costs per token in microcents)
|
||||
'gemini:gemini-1.5-flash:promptTokenCount': 7.5,
|
||||
'gemini:gemini-1.5-flash:candidatesTokenCount': 30,
|
||||
'gemini:gemini-2.0-flash:promptTokenCount': 10,
|
||||
'gemini:gemini-2.0-flash:candidatesTokenCount': 40,
|
||||
'gemini:gemini-1.5-flash:promptTokenCount': 3,
|
||||
'gemini:gemini-1.5-flash:candidatesTokenCount': 2,
|
||||
'gemini:gemini-2.0-flash-lite:promptTokenCount': 8,
|
||||
'gemini:gemini-2.0-flash-lite:candidatesTokenCount': 32,
|
||||
'gemini:gemini-2.5-flash:promptTokenCount': 12,
|
||||
'gemini:gemini-2.5-flash:candidatesTokenCount': 48,
|
||||
'gemini:gemini-2.5-flash-lite:promptTokenCount': 10,
|
||||
'gemini:gemini-2.5-flash-lite:candidatesTokenCount': 40,
|
||||
'gemini:gemini-2.5-pro:promptTokenCount': 15,
|
||||
'gemini:gemini-2.5-pro:candidatesTokenCount': 60,
|
||||
'gemini:gemini-3-pro-preview:promptTokenCount': 25,
|
||||
'gemini:gemini-3-pro-preview:candidatesTokenCount': 100,
|
||||
'gemini:gemini-2.5-flash-image-preview:1024x1024': 3_900_000,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user