diff --git a/src/backend/src/modules/puterai/GeminiImageGenerationService.js b/src/backend/src/modules/puterai/GeminiImageGenerationService.js index ee4dc9efd..1744f40c6 100644 --- a/src/backend/src/modules/puterai/GeminiImageGenerationService.js +++ b/src/backend/src/modules/puterai/GeminiImageGenerationService.js @@ -88,8 +88,10 @@ class GeminiImageGenerationService extends BaseService { input_image_mime_type }); + // Determine if this is a data URL or web URL + const isDataUrl = url.startsWith('data:'); const image = new TypedValue({ - $: 'string:url:web', + $: isDataUrl ? 'string:url:data' : 'string:url:web', content_type: 'image' }, url); diff --git a/src/backend/src/services/drivers/CoercionService.js b/src/backend/src/services/drivers/CoercionService.js index 13f429bf3..a3a3a13b7 100644 --- a/src/backend/src/services/drivers/CoercionService.js +++ b/src/backend/src/services/drivers/CoercionService.js @@ -90,6 +90,37 @@ class CoercionService extends BaseService { }, response.data); } }); + + // Add coercion for data URLs to streams + this.coercions_.push({ + produces: { + $: 'stream', + content_type: 'image' + }, + consumes: { + $: 'string:url:data', + content_type: 'image' + }, + coerce: async typed_value => { + this.log.noticeme('data URL coercion is running!'); + + const data_url = typed_value.value; + const data = data_url.split(',')[1]; + const buffer = Buffer.from(data, 'base64'); + + const { PassThrough } = require('stream'); + const stream = new PassThrough(); + stream.end(buffer); + + // Extract content type from data URL + const contentType = data_url.match(/data:([^;]+)/)?.[1] || 'image/png'; + + return new TypedValue({ + $: 'stream', + content_type: contentType, + }, stream); + } + }); } /** diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js index a97a46f35..e1051e340 100644 --- a/src/puter-js/src/modules/AI.js +++ b/src/puter-js/src/modules/AI.js @@ -604,6 +604,31 @@ class AI{ }).call(this, requestParams); } + /** + * Generate images from text prompts or perform image-to-image generation + * + * @param {string|object} prompt - Text prompt or options object + * @param {object|boolean} [options] - Generation options or test mode flag + * @param {string} [options.prompt] - Text description of the image to generate + * @param {string} [options.model] - Model to use (e.g., "gemini-2.5-flash-image-preview") + * @param {object} [options.ratio] - Image dimensions (e.g., {w: 1024, h: 1024}) + * @param {string} [options.input_image] - Base64 encoded input image for image-to-image generation + * @param {string} [options.input_image_mime_type] - MIME type of input image (e.g., "image/png") + * @returns {Promise} Generated image object with src property + * + * @example + * // Text-to-image + * const img = await puter.ai.txt2img("A beautiful sunset"); + * + * @example + * // Image-to-image + * const img = await puter.ai.txt2img({ + * prompt: "Transform this into a watercolor painting", + * input_image: base64ImageData, + * input_image_mime_type: "image/png", + * model: "gemini-2.5-flash-image-preview" + * }); + */ txt2img = async (...args) => { let options = {}; let testMode = false;