From 7a1a885e890c7a41cefcf249c569495fff4f2c31 Mon Sep 17 00:00:00 2001 From: Devendra Reddy Pennabadi Date: Wed, 6 May 2026 23:23:52 +0530 Subject: [PATCH] fix: inline http(s) image URLs for Moonshot vision models (#2929) (#2931) Co-authored-by: devareddy05 --- .../providers/moonshot/MoonshotProvider.ts | 8 + .../providers/moonshot/imageHandling.test.ts | 208 ++++++++++++++++++ .../providers/moonshot/imageHandling.ts | 112 ++++++++++ 3 files changed, 328 insertions(+) create mode 100644 src/backend/drivers/ai-chat/providers/moonshot/imageHandling.test.ts create mode 100644 src/backend/drivers/ai-chat/providers/moonshot/imageHandling.ts diff --git a/src/backend/drivers/ai-chat/providers/moonshot/MoonshotProvider.ts b/src/backend/drivers/ai-chat/providers/moonshot/MoonshotProvider.ts index eaddc35cf..079a74601 100644 --- a/src/backend/drivers/ai-chat/providers/moonshot/MoonshotProvider.ts +++ b/src/backend/drivers/ai-chat/providers/moonshot/MoonshotProvider.ts @@ -27,6 +27,7 @@ import type { ICompleteArguments, IChatCompleteResult, } from '../../types.js'; +import { inlineHttpImageUrls } from './imageHandling.js'; import { MOONSHOT_MODELS } from './models.js'; export class MoonshotProvider implements IChatProvider { @@ -74,6 +75,13 @@ export class MoonshotProvider implements IChatProvider { availableModels.find((m) => [m.id, ...(m.aliases || [])].includes(model), ) || availableModels.find((m) => m.id === this.getDefaultModel())!; + + // Moonshot's vision API doesn't fetch http(s) URLs; inline them + // so callers can pass plain links like other vision providers. + if (modelUsed.modalities?.input?.includes('image')) { + await inlineHttpImageUrls(messages); + } + messages = await OpenAIUtil.process_input_messages(messages); let completion; try { diff --git a/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.test.ts b/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.test.ts new file mode 100644 index 000000000..988ee02bf --- /dev/null +++ b/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.test.ts @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import { afterEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../../../../util/secureHttp.js', () => ({ + secureFetch: vi.fn(), +})); + +import { secureFetch } from '../../../../util/secureHttp.js'; +import { inlineHttpImageUrls, MAX_IMAGE_BYTES } from './imageHandling.js'; + +const mockedSecureFetch = vi.mocked(secureFetch); + +const buildResponse = ( + body: Buffer | ArrayBuffer, + { + status = 200, + contentType = 'image/png', + contentLength, + }: { + status?: number; + contentType?: string | null; + contentLength?: string; + } = {}, +): Response => { + const buf = Buffer.isBuffer(body) ? body : Buffer.from(body); + const headers = new Headers(); + if (contentType) headers.set('content-type', contentType); + if (contentLength) headers.set('content-length', contentLength); + return { + ok: status >= 200 && status < 300, + status, + headers, + arrayBuffer: async () => + buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength), + } as unknown as Response; +}; + +describe('inlineHttpImageUrls', () => { + afterEach(() => { + mockedSecureFetch.mockReset(); + }); + + it('rewrites http(s) image URLs to base64 data URIs', async () => { + const png = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]); + mockedSecureFetch.mockResolvedValueOnce( + buildResponse(png, { contentType: 'image/png' }), + ); + + const messages = [ + { + role: 'user', + content: [ + { type: 'text', text: 'what is this' }, + { image_url: { url: 'https://example.com/cat.png' } }, + ], + }, + ]; + + await inlineHttpImageUrls(messages); + + expect(mockedSecureFetch).toHaveBeenCalledWith( + 'https://example.com/cat.png', + ); + const part = messages[0].content[1] as { + type?: string; + image_url?: { url?: string }; + }; + expect(part.type).toBe('image_url'); + expect(part.image_url?.url).toBe( + `data:image/png;base64,${png.toString('base64')}`, + ); + }); + + it('leaves data URIs untouched and skips fetching', async () => { + const messages = [ + { + role: 'user', + content: [ + { + type: 'image_url', + image_url: { + url: 'data:image/png;base64,AAAA', + }, + }, + ], + }, + ]; + + await inlineHttpImageUrls(messages); + + expect(mockedSecureFetch).not.toHaveBeenCalled(); + const part = messages[0].content[0] as { image_url?: { url?: string } }; + expect(part.image_url?.url).toBe('data:image/png;base64,AAAA'); + }); + + it('replaces oversized images with a text error block', async () => { + const oversize = Buffer.alloc(10); + mockedSecureFetch.mockResolvedValueOnce( + buildResponse(oversize, { + contentType: 'image/jpeg', + contentLength: String(MAX_IMAGE_BYTES + 1), + }), + ); + + const messages = [ + { + role: 'user', + content: [ + { image_url: { url: 'https://example.com/huge.jpg' } }, + ], + }, + ]; + + await inlineHttpImageUrls(messages); + + const part = messages[0].content[0] as { + type?: string; + text?: string; + image_url?: unknown; + }; + expect(part.type).toBe('text'); + expect(part.image_url).toBeUndefined(); + expect(part.text).toContain('exceeds maximum'); + }); + + it('replaces non-image responses with a text error block', async () => { + mockedSecureFetch.mockResolvedValueOnce( + buildResponse(Buffer.from(''), { + contentType: 'text/html', + }), + ); + + const messages = [ + { + role: 'user', + content: [ + { image_url: { url: 'https://example.com/page' } }, + ], + }, + ]; + + await inlineHttpImageUrls(messages); + + const part = messages[0].content[0] as { + type?: string; + text?: string; + }; + expect(part.type).toBe('text'); + expect(part.text).toContain('expected an image'); + }); + + it('replaces fetch failures with a text error block', async () => { + mockedSecureFetch.mockRejectedValueOnce(new Error('boom')); + + const messages = [ + { + role: 'user', + content: [ + { image_url: { url: 'https://example.com/x.png' } }, + ], + }, + ]; + + await inlineHttpImageUrls(messages); + + const part = messages[0].content[0] as { + type?: string; + text?: string; + }; + expect(part.type).toBe('text'); + expect(part.text).toContain('boom'); + }); + + it('ignores non-image-url parts and string content', async () => { + const messages = [ + { role: 'user', content: 'plain text' }, + { + role: 'user', + content: [ + { type: 'text', text: 'still here' }, + { type: 'tool_use', id: 't', name: 'x', input: {} }, + ], + }, + ]; + + await inlineHttpImageUrls(messages); + + expect(mockedSecureFetch).not.toHaveBeenCalled(); + }); +}); diff --git a/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.ts b/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.ts new file mode 100644 index 000000000..36b80f2af --- /dev/null +++ b/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.ts @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2024-present Puter Technologies Inc. + * + * This file is part of Puter. + * + * Puter is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import { secureFetch } from '../../../../util/secureHttp.js'; + +// Matches the OpenAI Chat-Completions inline-upload cap. +export const MAX_IMAGE_BYTES = 5 * 1_000_000; + +interface ImageContentPart { + type?: string; + text?: string; + image_url?: { url?: string }; +} + +interface MessageWithContent { + content?: unknown; +} + +// Moonshot's vision API rejects http(s) image URLs and only accepts base64 +// data URIs or file-id refs, so any web URL must be fetched and inlined. +// Failures become inline text-error parts (same shape as openai/fileUpload.ts). +export async function inlineHttpImageUrls( + messages: MessageWithContent[], +): Promise { + const tasks: Array> = []; + for (const message of messages) { + if (!Array.isArray(message.content)) continue; + for (const part of message.content as ImageContentPart[]) { + const url = part?.image_url?.url; + if (!url) continue; + if (!url.startsWith('http://') && !url.startsWith('https://')) { + continue; + } + tasks.push(inlineOne(part, url)); + } + } + await Promise.all(tasks); +} + +async function inlineOne(part: ImageContentPart, url: string): Promise { + try { + const response = await secureFetch(url); + if (!response.ok) { + setTextError( + part, + `failed to fetch image (status ${response.status})`, + ); + return; + } + const contentLength = Number( + response.headers.get('content-length') ?? NaN, + ); + if (Number.isFinite(contentLength) && contentLength > MAX_IMAGE_BYTES) { + setTextError( + part, + `image exceeds maximum of ${MAX_IMAGE_BYTES} bytes`, + ); + return; + } + + const arrayBuf = await response.arrayBuffer(); + if (arrayBuf.byteLength > MAX_IMAGE_BYTES) { + setTextError( + part, + `image exceeds maximum of ${MAX_IMAGE_BYTES} bytes`, + ); + return; + } + + const mimeType = (response.headers.get('content-type') ?? '') + .split(';')[0] + ?.trim(); + if (!mimeType || !mimeType.startsWith('image/')) { + setTextError( + part, + `expected an image, got ${mimeType || 'unknown MIME type'}`, + ); + return; + } + + const base64 = Buffer.from(arrayBuf).toString('base64'); + part.type = 'image_url'; + part.image_url = { url: `data:${mimeType};base64,${base64}` }; + } catch (err) { + const message = (err as Error)?.message || 'failed to fetch image'; + setTextError(part, message); + } +} + +function setTextError(part: ImageContentPart, reason: string): void { + delete part.image_url; + part.type = 'text'; + // Phrasing matches openai/fileUpload.ts so the model reads it as a + // system note, not user input. + part.text = `{error: ${reason}; the user did not write this message}`; +}