diff --git a/src/backend/drivers/ai-chat/providers/moonshot/MoonshotProvider.ts b/src/backend/drivers/ai-chat/providers/moonshot/MoonshotProvider.ts
index eaddc35cf..079a74601 100644
--- a/src/backend/drivers/ai-chat/providers/moonshot/MoonshotProvider.ts
+++ b/src/backend/drivers/ai-chat/providers/moonshot/MoonshotProvider.ts
@@ -27,6 +27,7 @@ import type {
ICompleteArguments,
IChatCompleteResult,
} from '../../types.js';
+import { inlineHttpImageUrls } from './imageHandling.js';
import { MOONSHOT_MODELS } from './models.js';
export class MoonshotProvider implements IChatProvider {
@@ -74,6 +75,13 @@ export class MoonshotProvider implements IChatProvider {
availableModels.find((m) =>
[m.id, ...(m.aliases || [])].includes(model),
) || availableModels.find((m) => m.id === this.getDefaultModel())!;
+
+ // Moonshot's vision API doesn't fetch http(s) URLs; inline them
+ // so callers can pass plain links like other vision providers.
+ if (modelUsed.modalities?.input?.includes('image')) {
+ await inlineHttpImageUrls(messages);
+ }
+
messages = await OpenAIUtil.process_input_messages(messages);
let completion;
try {
diff --git a/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.test.ts b/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.test.ts
new file mode 100644
index 000000000..988ee02bf
--- /dev/null
+++ b/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.test.ts
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+vi.mock('../../../../util/secureHttp.js', () => ({
+ secureFetch: vi.fn(),
+}));
+
+import { secureFetch } from '../../../../util/secureHttp.js';
+import { inlineHttpImageUrls, MAX_IMAGE_BYTES } from './imageHandling.js';
+
+const mockedSecureFetch = vi.mocked(secureFetch);
+
+const buildResponse = (
+ body: Buffer | ArrayBuffer,
+ {
+ status = 200,
+ contentType = 'image/png',
+ contentLength,
+ }: {
+ status?: number;
+ contentType?: string | null;
+ contentLength?: string;
+ } = {},
+): Response => {
+ const buf = Buffer.isBuffer(body) ? body : Buffer.from(body);
+ const headers = new Headers();
+ if (contentType) headers.set('content-type', contentType);
+ if (contentLength) headers.set('content-length', contentLength);
+ return {
+ ok: status >= 200 && status < 300,
+ status,
+ headers,
+ arrayBuffer: async () =>
+ buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength),
+ } as unknown as Response;
+};
+
+describe('inlineHttpImageUrls', () => {
+ afterEach(() => {
+ mockedSecureFetch.mockReset();
+ });
+
+ it('rewrites http(s) image URLs to base64 data URIs', async () => {
+ const png = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
+ mockedSecureFetch.mockResolvedValueOnce(
+ buildResponse(png, { contentType: 'image/png' }),
+ );
+
+ const messages = [
+ {
+ role: 'user',
+ content: [
+ { type: 'text', text: 'what is this' },
+ { image_url: { url: 'https://example.com/cat.png' } },
+ ],
+ },
+ ];
+
+ await inlineHttpImageUrls(messages);
+
+ expect(mockedSecureFetch).toHaveBeenCalledWith(
+ 'https://example.com/cat.png',
+ );
+ const part = messages[0].content[1] as {
+ type?: string;
+ image_url?: { url?: string };
+ };
+ expect(part.type).toBe('image_url');
+ expect(part.image_url?.url).toBe(
+ `data:image/png;base64,${png.toString('base64')}`,
+ );
+ });
+
+ it('leaves data URIs untouched and skips fetching', async () => {
+ const messages = [
+ {
+ role: 'user',
+ content: [
+ {
+ type: 'image_url',
+ image_url: {
+ url: 'data:image/png;base64,AAAA',
+ },
+ },
+ ],
+ },
+ ];
+
+ await inlineHttpImageUrls(messages);
+
+ expect(mockedSecureFetch).not.toHaveBeenCalled();
+ const part = messages[0].content[0] as { image_url?: { url?: string } };
+ expect(part.image_url?.url).toBe('data:image/png;base64,AAAA');
+ });
+
+ it('replaces oversized images with a text error block', async () => {
+ const oversize = Buffer.alloc(10);
+ mockedSecureFetch.mockResolvedValueOnce(
+ buildResponse(oversize, {
+ contentType: 'image/jpeg',
+ contentLength: String(MAX_IMAGE_BYTES + 1),
+ }),
+ );
+
+ const messages = [
+ {
+ role: 'user',
+ content: [
+ { image_url: { url: 'https://example.com/huge.jpg' } },
+ ],
+ },
+ ];
+
+ await inlineHttpImageUrls(messages);
+
+ const part = messages[0].content[0] as {
+ type?: string;
+ text?: string;
+ image_url?: unknown;
+ };
+ expect(part.type).toBe('text');
+ expect(part.image_url).toBeUndefined();
+ expect(part.text).toContain('exceeds maximum');
+ });
+
+ it('replaces non-image responses with a text error block', async () => {
+ mockedSecureFetch.mockResolvedValueOnce(
+ buildResponse(Buffer.from('
'), {
+ contentType: 'text/html',
+ }),
+ );
+
+ const messages = [
+ {
+ role: 'user',
+ content: [
+ { image_url: { url: 'https://example.com/page' } },
+ ],
+ },
+ ];
+
+ await inlineHttpImageUrls(messages);
+
+ const part = messages[0].content[0] as {
+ type?: string;
+ text?: string;
+ };
+ expect(part.type).toBe('text');
+ expect(part.text).toContain('expected an image');
+ });
+
+ it('replaces fetch failures with a text error block', async () => {
+ mockedSecureFetch.mockRejectedValueOnce(new Error('boom'));
+
+ const messages = [
+ {
+ role: 'user',
+ content: [
+ { image_url: { url: 'https://example.com/x.png' } },
+ ],
+ },
+ ];
+
+ await inlineHttpImageUrls(messages);
+
+ const part = messages[0].content[0] as {
+ type?: string;
+ text?: string;
+ };
+ expect(part.type).toBe('text');
+ expect(part.text).toContain('boom');
+ });
+
+ it('ignores non-image-url parts and string content', async () => {
+ const messages = [
+ { role: 'user', content: 'plain text' },
+ {
+ role: 'user',
+ content: [
+ { type: 'text', text: 'still here' },
+ { type: 'tool_use', id: 't', name: 'x', input: {} },
+ ],
+ },
+ ];
+
+ await inlineHttpImageUrls(messages);
+
+ expect(mockedSecureFetch).not.toHaveBeenCalled();
+ });
+});
diff --git a/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.ts b/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.ts
new file mode 100644
index 000000000..36b80f2af
--- /dev/null
+++ b/src/backend/drivers/ai-chat/providers/moonshot/imageHandling.ts
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2024-present Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import { secureFetch } from '../../../../util/secureHttp.js';
+
+// Matches the OpenAI Chat-Completions inline-upload cap.
+export const MAX_IMAGE_BYTES = 5 * 1_000_000;
+
+interface ImageContentPart {
+ type?: string;
+ text?: string;
+ image_url?: { url?: string };
+}
+
+interface MessageWithContent {
+ content?: unknown;
+}
+
+// Moonshot's vision API rejects http(s) image URLs and only accepts base64
+// data URIs or file-id refs, so any web URL must be fetched and inlined.
+// Failures become inline text-error parts (same shape as openai/fileUpload.ts).
+export async function inlineHttpImageUrls(
+ messages: MessageWithContent[],
+): Promise {
+ const tasks: Array> = [];
+ for (const message of messages) {
+ if (!Array.isArray(message.content)) continue;
+ for (const part of message.content as ImageContentPart[]) {
+ const url = part?.image_url?.url;
+ if (!url) continue;
+ if (!url.startsWith('http://') && !url.startsWith('https://')) {
+ continue;
+ }
+ tasks.push(inlineOne(part, url));
+ }
+ }
+ await Promise.all(tasks);
+}
+
+async function inlineOne(part: ImageContentPart, url: string): Promise {
+ try {
+ const response = await secureFetch(url);
+ if (!response.ok) {
+ setTextError(
+ part,
+ `failed to fetch image (status ${response.status})`,
+ );
+ return;
+ }
+ const contentLength = Number(
+ response.headers.get('content-length') ?? NaN,
+ );
+ if (Number.isFinite(contentLength) && contentLength > MAX_IMAGE_BYTES) {
+ setTextError(
+ part,
+ `image exceeds maximum of ${MAX_IMAGE_BYTES} bytes`,
+ );
+ return;
+ }
+
+ const arrayBuf = await response.arrayBuffer();
+ if (arrayBuf.byteLength > MAX_IMAGE_BYTES) {
+ setTextError(
+ part,
+ `image exceeds maximum of ${MAX_IMAGE_BYTES} bytes`,
+ );
+ return;
+ }
+
+ const mimeType = (response.headers.get('content-type') ?? '')
+ .split(';')[0]
+ ?.trim();
+ if (!mimeType || !mimeType.startsWith('image/')) {
+ setTextError(
+ part,
+ `expected an image, got ${mimeType || 'unknown MIME type'}`,
+ );
+ return;
+ }
+
+ const base64 = Buffer.from(arrayBuf).toString('base64');
+ part.type = 'image_url';
+ part.image_url = { url: `data:${mimeType};base64,${base64}` };
+ } catch (err) {
+ const message = (err as Error)?.message || 'failed to fetch image';
+ setTextError(part, message);
+ }
+}
+
+function setTextError(part: ImageContentPart, reason: string): void {
+ delete part.image_url;
+ part.type = 'text';
+ // Phrasing matches openai/fileUpload.ts so the model reads it as a
+ // system note, not user input.
+ part.text = `{error: ${reason}; the user did not write this message}`;
+}