fix: usage limited error handling (#2423)

This commit is contained in:
Daniel Salazar
2026-02-05 15:15:27 -08:00
committed by GitHub
parent da0907d3c5
commit 5fdfae6087
3 changed files with 11 additions and 153 deletions
+2 -2
View File
@@ -80,7 +80,7 @@ class ChatAPIService extends BaseService {
});
// Return the list of models
res.json({ models: models.filter(e => !['costly', 'fake', 'abuse', 'usage-limited', 'model-fallback-test-1'].includes(e)) });
res.json({ models: models.filter(e => !['costly', 'fake', 'abuse', 'model-fallback-test-1'].includes(e)) });
} catch ( error ) {
this.log.error('Error fetching models:', error);
throw APIError.create('internal_server_error');
@@ -103,7 +103,7 @@ class ChatAPIService extends BaseService {
});
// Return the detailed list of models
res.json({ models: models.filter((e) => !['costly', 'fake', 'abuse', 'usage-limited', 'model-fallback-test-1'].includes(e.id)) });
res.json({ models: models.filter((e) => !['costly', 'fake', 'abuse', 'model-fallback-test-1'].includes(e.id)) });
} catch ( error ) {
this.log.error('Error fetching model details:', error);
throw APIError.create('internal_server_error');
@@ -44,7 +44,6 @@ import { OpenAiResponsesChatProvider } from './providers/OpenAiProvider/OpenAiCh
import { OpenRouterProvider } from './providers/OpenRouterProvider/OpenRouterProvider.js';
import { TogetherAIProvider } from './providers/TogetherAiProvider/TogetherAIProvider.js';
import { IChatModel, IChatProvider, ICompleteArguments } from './providers/types.js';
import { UsageLimitedChatProvider } from './providers/UsageLimitedChatProvider.js';
import { XAIProvider } from './providers/XAIProvider/XAIProvider.js';
import { redisClient } from '../../../clients/redis/redisSingleton.js';
@@ -179,9 +178,8 @@ export class AIChatService extends BaseService {
this.#providers['ollama'] = new OllamaChatProvider(ollamaConfig, this.meteringService);
}
// fake and usage-limited providers last
// fake providers last
this.#providers['fake-chat'] = new FakeChatProvider();
this.#providers['usage-limited-chat'] = new UsageLimitedChatProvider();
// emit event for extensions to add providers
const extensionProviders = {} as Record<string, IChatProvider>;
@@ -310,7 +308,6 @@ export class AIChatService extends BaseService {
}
let model = this.getModel({ modelId: parameters.model, provider: intendedProvider }) || await this.getFallbackModel(parameters.model, [], []);
const abuseModel = this.getModel({ modelId: 'abuse' });
const usageLimitedModel = this.getModel({ modelId: 'usage-limited' });
const completionId = cuid2();
const event = {
@@ -377,7 +374,10 @@ export class AIChatService extends BaseService {
// Handle usage limits reached case
if ( ! usageAllowed ) {
model = usageLimitedModel;
throw APIError.create('insufficient_funds', new Error('No usage left for request.'), {
delegate: 'usage-limited-chat',
message: 'No usage left for request.',
});
}
// block non subscriber only models for non-subscribers
@@ -494,7 +494,10 @@ export class AIChatService extends BaseService {
const fallbackUsageAllowed = await this.meteringService.hasEnoughCredits(actor, 1); // we checked earlier, assume same costs
if ( ! fallbackUsageAllowed ) {
fallBackModel = usageLimitedModel;
throw APIError.create('insufficient_funds', new Error('No usage left for request.'), {
delegate: 'usage-limited-chat',
message: 'No usage left for request.',
});
}
const provider = this.#providers[fallBackModel.provider!];
@@ -519,11 +522,6 @@ export class AIChatService extends BaseService {
resMetadata.service_used = model.provider; // legacy field
resMetadata.providerUsed = model.id;
// Add flag if we're using the usage-limited service
if ( model.provider === 'usage-limited-chat' ) {
resMetadata.usage_limited = true;
}
const username = actor.type?.user?.username;
if ( ! res! ) {
@@ -1,140 +0,0 @@
/*
* Copyright (C) 2024-present Puter Technologies Inc.
*
* This file is part of Puter.
*
* Puter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import dedent from 'dedent';
import { PassThrough } from 'stream';
import Streaming from '../../utils/Streaming.js';
import { IChatProvider, ICompleteArguments } from './types.js';
/**
* UsageLimitedChatService - A specialized chat service that returns resource exhaustion messages.
* Extends BaseService to provide responses indicating the user has exceeded their usage limits.
* Follows the same response format as real AI providers but with a custom message about upgrading.
* Can handle both streaming and non-streaming requests consistently.
*/
export class UsageLimitedChatProvider implements IChatProvider {
models (): ReturnType<IChatProvider['models']> {
return [{
id: 'usage-limited',
name: 'Usage Limited',
context: 16384,
costs_currency: 'usd-cents',
input_cost_key: 'input',
output_cost_key: 'output',
max_tokens: 16384,
costs: {
tokens: 1_000_000,
input: 0,
output: 0,
},
}];
}
list () {
return ['usage-limited'];
}
async complete ({ stream, customLimitMessage }: ICompleteArguments): ReturnType<IChatProvider['complete']> {
const limitMessage = customLimitMessage || dedent(`
You have reached your AI usage limit for this account.
`);
// If streaming is requested, return a streaming response
if ( stream ) {
const streamObj = new PassThrough();
const chatStream = new Streaming.AIChatStream({
stream: streamObj,
});
// Schedule the streaming response
setTimeout(() => {
chatStream.write({
type: 'content_block_start',
index: 0,
});
chatStream.write({
type: 'content_block_delta',
index: 0,
delta: {
type: 'text',
text: limitMessage,
},
});
chatStream.write({
type: 'content_block_stop',
index: 0,
});
chatStream.write({
type: 'message_stop',
stop_reason: 'end_turn',
});
chatStream.end();
}, 10);
return {
stream: true,
init_chat_stream: async ({ chatStream: cs }) => {
// Copy contents from our stream to the provided one
chatStream.stream.pipe(cs.stream);
},
finally_fn: async () => {
// No-op
},
};
}
// Non-streaming response
return {
message: {
id: '00000000-0000-0000-0000-000000000000',
type: 'message',
role: 'assistant',
model: 'usage-limited',
content: [
{
'type': 'text',
'text': limitMessage,
},
],
stop_reason: 'end_turn',
stop_sequence: null,
usage: {
'input_tokens': 0,
'output_tokens': 1,
},
},
usage: {
'input_tokens': 0,
'output_tokens': 1,
},
finish_reason: 'stop',
};
}
checkModeration (_text: string): ReturnType<IChatProvider['checkModeration']> {
throw new Error('Method not implemented.');
}
getDefaultModel () {
return 'usage-limited';
}
}