mirror of
https://github.com/HeyPuter/puter.git
synced 2026-05-29 21:01:27 +00:00
dev: include cost information in usage reports
Docker Image CI / build-and-push-image (push) Waiting to run
Maintain Release Merge PR / update-release-pr (push) Waiting to run
release-please / release-please (push) Waiting to run
test / test (18.x) (push) Waiting to run
test / test (20.x) (push) Waiting to run
test / test (22.x) (push) Waiting to run
Docker Image CI / build-and-push-image (push) Waiting to run
Maintain Release Merge PR / update-release-pr (push) Waiting to run
release-please / release-please (push) Waiting to run
test / test (18.x) (push) Waiting to run
test / test (20.x) (push) Waiting to run
test / test (22.x) (push) Waiting to run
This adds an incremental migration from models reporting input and output tokens, to models reporting full token information. This means AIChatService doesn't need to lookup the model details or know about the different types of tokens supported by every model.
This commit is contained in:
@@ -85,36 +85,47 @@ class AIChatService extends BaseService {
|
||||
app_id: details.actor?.type?.app?.id ?? null,
|
||||
service_name: details.service_used,
|
||||
model_name: details.model_used,
|
||||
value_uint_1: details.usage?.input_tokens,
|
||||
value_uint_2: details.usage?.output_tokens,
|
||||
};
|
||||
|
||||
let model_details = this.detail_model_map[details.model_used];
|
||||
if ( Array.isArray(model_details) ) {
|
||||
for ( const model of model_details ) {
|
||||
if ( model.provider === details.service_used ) {
|
||||
model_details = model;
|
||||
break;
|
||||
// New format
|
||||
if ( Array.isArray(details.usage) ) {
|
||||
values.cost = details.usage.reduce((acc, u) => {
|
||||
return acc + u.cost;
|
||||
}, 0);
|
||||
} else {
|
||||
values.value_uint_1 = details.usage?.input_tokens;
|
||||
values.value_uint_2 = details.usage?.output_tokens;
|
||||
|
||||
let model_details = this.detail_model_map[details.model_used];
|
||||
if ( Array.isArray(model_details) ) {
|
||||
for ( const model of model_details ) {
|
||||
if ( model.provider === details.service_used ) {
|
||||
model_details = model;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( Array.isArray(model_details) ) {
|
||||
model_details = model_details[0];
|
||||
}
|
||||
if ( model_details ) {
|
||||
values.cost = 0 + // for formatting
|
||||
if ( Array.isArray(model_details) ) {
|
||||
model_details = model_details[0];
|
||||
}
|
||||
if ( model_details ) {
|
||||
values.cost = 0 + // for formatting
|
||||
|
||||
model_details.cost.input * details.usage.input_tokens
|
||||
// cents/MTok tokens
|
||||
+
|
||||
model_details.cost.input * details.usage.input_tokens
|
||||
// cents/MTok tokens
|
||||
+
|
||||
|
||||
model_details.cost.output * details.usage.output_tokens
|
||||
// cents/MTok tokens
|
||||
;
|
||||
} else {
|
||||
this.log.error('could not find model details', { details });
|
||||
model_details.cost.output * details.usage.output_tokens
|
||||
// cents/MTok tokens
|
||||
;
|
||||
} else {
|
||||
this.log.error('could not find model details', { details });
|
||||
}
|
||||
}
|
||||
|
||||
this.log.noticeme('COST INFO', values);
|
||||
|
||||
|
||||
await this.db.insert('ai_usage', values);
|
||||
|
||||
// USD cost from microcents
|
||||
|
||||
@@ -260,6 +260,9 @@ class OpenAICompletionService extends BaseService {
|
||||
});
|
||||
|
||||
return OpenAIUtil.handle_completion_output({
|
||||
usage_calculator: OpenAIUtil.create_usage_calculator({
|
||||
model_details: (await this.models_()).find(m => m.id === model),
|
||||
}),
|
||||
stream, completion,
|
||||
moderate: moderation && this.check_moderation.bind(this),
|
||||
});
|
||||
|
||||
@@ -57,6 +57,28 @@ module.exports = class OpenAIUtil {
|
||||
return messages;
|
||||
}
|
||||
|
||||
static create_usage_calculator = ({ model_details }) => {
|
||||
return ({ usage }) => {
|
||||
const tokens = [];
|
||||
|
||||
tokens.push({
|
||||
type: 'prompt',
|
||||
model: model_details.id,
|
||||
amount: usage.prompt_tokens,
|
||||
cost: model_details.cost.input * usage.prompt_tokens,
|
||||
});
|
||||
|
||||
tokens.push({
|
||||
type: 'completion',
|
||||
model: model_details.id,
|
||||
amount: usage.completion_tokens,
|
||||
cost: model_details.cost.output * usage.completion_tokens,
|
||||
});
|
||||
|
||||
return tokens;
|
||||
};
|
||||
};
|
||||
|
||||
static create_chat_stream_handler = ({
|
||||
completion, usage_promise,
|
||||
}) => async ({ chatStream }) => {
|
||||
@@ -106,10 +128,7 @@ module.exports = class OpenAIUtil {
|
||||
}
|
||||
}
|
||||
}
|
||||
usage_promise.resolve({
|
||||
input_tokens: last_usage.prompt_tokens,
|
||||
output_tokens: last_usage.completion_tokens,
|
||||
});
|
||||
usage_promise.resolve(last_usage);
|
||||
|
||||
if ( mode === 'text' ) textblock.end();
|
||||
if ( mode === 'tool' ) toolblock.end();
|
||||
@@ -118,7 +137,8 @@ module.exports = class OpenAIUtil {
|
||||
};
|
||||
|
||||
static async handle_completion_output ({
|
||||
stream, completion, moderate
|
||||
stream, completion, moderate,
|
||||
usage_calculator,
|
||||
}) {
|
||||
if ( stream ) {
|
||||
let usage_promise = new putility.libs.promise.TeePromise();
|
||||
@@ -127,12 +147,18 @@ module.exports = class OpenAIUtil {
|
||||
OpenAIUtil.create_chat_stream_handler({
|
||||
completion,
|
||||
usage_promise,
|
||||
usage_calculator,
|
||||
});
|
||||
|
||||
return new TypedValue({ $: 'ai-chat-intermediate' }, {
|
||||
stream: true,
|
||||
init_chat_stream,
|
||||
usage_promise: usage_promise,
|
||||
usage_promise: usage_promise.then(usage => {
|
||||
return usage_calculator ? usage_calculator({ usage }) : {
|
||||
input_tokens: usage.prompt_tokens,
|
||||
output_tokens: usage.completion_tokens,
|
||||
};
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -153,7 +179,7 @@ module.exports = class OpenAIUtil {
|
||||
}
|
||||
|
||||
const ret = completion.choices[0];
|
||||
ret.usage = {
|
||||
ret.usage = usage_calculator ? usage_calculator(completion) : {
|
||||
input_tokens: completion.usage.prompt_tokens,
|
||||
output_tokens: completion.usage.completion_tokens,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user