support video inputs in .chat (#2740)
Docker Image CI / build-and-push-image (push) Has been cancelled
Maintain Release Merge PR / update-release-pr (push) Has been cancelled
Notify HeyPuter / notify (push) Has been cancelled
release-please / release-please (push) Has been cancelled
test / test-backend (24.x) (push) Has been cancelled
test / API tests (node env, api-test) (24.x) (push) Has been cancelled
test / puterjs (node env, vitest) (24.x) (push) Has been cancelled

* support video inputs in .chat

* update docs for .chat
This commit is contained in:
Shruc
2026-03-30 17:26:34 +03:00
committed by GitHub
parent 7fc9a97269
commit cea40b3e78
7 changed files with 83 additions and 28 deletions
@@ -13,9 +13,12 @@ export const process_input_messages = async (messages) => {
const content = msg.content;
for ( const o of content ) {
if ( ! o['image_url'] ) continue;
if ( o.type ) continue;
o.type = 'image_url';
if ( o['image_url'] && !o.type ) {
o.type = 'image_url';
}
if ( o['video_url'] && !o.type ) {
o.type = 'video_url';
}
}
// coerce tool calls
@@ -93,9 +96,12 @@ export const process_input_messages_responses_api = async (messages) => {
const content = msg.content;
for ( const o of content ) {
if ( ! o['image_url'] ) continue;
if ( o.type ) continue;
o.type = 'image_url';
if ( o['image_url'] && !o.type ) {
o.type = 'image_url';
}
if ( o['video_url'] && !o.type ) {
o.type = 'video_url';
}
}
// coerce tool calls
+24 -7
View File
@@ -1,6 +1,6 @@
---
title: puter.ai.chat()
description: Chat with AI models, analyze images, and perform function calls using 500+ models from OpenAI, Anthropic, Google, and more.
description: Chat with AI models, analyze images and videos, and perform function calls using 500+ models from OpenAI, Anthropic, Google, and more.
platforms: [websites, apps, nodejs, workers]
---
@@ -12,8 +12,8 @@ Given a prompt returns the completion that best matches the prompt.
puter.ai.chat(prompt)
puter.ai.chat(prompt, options = {})
puter.ai.chat(prompt, testMode = false, options = {})
puter.ai.chat(prompt, image, testMode = false, options = {})
puter.ai.chat(prompt, [imageURLArray], testMode = false, options = {})
puter.ai.chat(prompt, media, testMode = false, options = {})
puter.ai.chat(prompt, [mediaURLArray], testMode = false, options = {})
puter.ai.chat([messages], testMode = false, options = {})
```
@@ -39,13 +39,13 @@ An object containing the following properties:
A boolean indicating whether you want to use the test API. Defaults to `false`. This is useful for testing your code without using up API credits.
#### `image` (String | File)
#### `media` (String | File)
A string containing the URL or Puter path of the image, or a `File` object containing the image you want to provide as context for the completion.
A string containing the URL or Puter path of an image or video, or a `File` object containing the media you want to provide as context for the completion.
#### `imageURLArray` (Array)
#### `mediaURLArray` (Array)
An array of strings containing the URLs of images you want to provide as context for the completion.
An array of strings containing the URLs of images or videos you want to provide as context for the completion.
#### `messages` (Array)
@@ -216,6 +216,23 @@ You can find the implementation in our [prompt caching example](/playground/ai-c
</html>
```
<strong class="example-title">Video Analysis</strong>
```html;ai-video-analysis
<html>
<body>
<script src="https://js.puter.com/v2/"></script>
<script>
puter.ai
.chat(`What do you see?`, `https://assets.puter.site/puppy.mp4`, {
model: "reka/reka-edge",
})
.then(puter.print);
</script>
</body>
</html>
```
<strong class="example-title">Stream the response</strong>
```html;ai-chat-stream
+6
View File
@@ -55,6 +55,12 @@ const examples = [
slug: 'ai-gpt-vision',
source: '/playground/examples/ai-gpt-vision.html',
},
{
title: 'Video Analysis',
description: 'Analyze videos with AI using Puter.js. Run and modify this video analysis example instantly in your browser.',
slug: 'ai-video-analysis',
source: '/playground/examples/ai-video-analysis.html',
},
{
title: 'Stream the response',
description: 'Stream AI chat responses in real-time with Puter.js. Run and experiment with this streaming example in the playground.',
@@ -0,0 +1,16 @@
<html>
<body>
<script src="https://js.puter.com/v2/"></script>
<script>
// Loading ...
puter.print(`Loading...`);
// Video analysis with Reka Edge
puter.ai
.chat(`What do you see?`, `https://assets.puter.site/puppy.mp4`, {
model: "reka/reka-edge",
})
.then(puter.print);
</script>
</body>
</html>
+11 -2
View File
@@ -632,6 +632,15 @@ function arrayBufferToDataUri (arrayBuffer) {
});
}
export {
arrayBufferToDataUri, blob_to_url, blobToDataUri, driverCall, handle_error, handle_resp, initXhr, make_driver_method, parseResponse, setupXhrEventHandlers, uuidv4,
const VIDEO_EXTENSIONS = ['mp4', 'webm', 'mov', 'mpeg', 'avi', 'mkv', 'm4v', 'ogv'];
const isVideoInput = (url) => {
if ( typeof url !== 'string' ) return false;
if ( url.startsWith('data:video/') ) return true;
const ext = url.split('?')[0].split('#')[0].split('.').pop()?.toLowerCase();
return VIDEO_EXTENSIONS.includes(ext);
};
export {
arrayBufferToDataUri, blob_to_url, blobToDataUri, driverCall, handle_error, handle_resp, initXhr, isVideoInput, make_driver_method, parseResponse, setupXhrEventHandlers, uuidv4,
};
+13 -12
View File
@@ -647,36 +647,37 @@ class AI {
requestParams = { messages: [{ content: args[0] }] };
}
// ai.chat(prompt, imageURL/File)
// ai.chat(prompt, imageURL/File, testMode)
// ai.chat(prompt, mediaURL/File)
// ai.chat(prompt, mediaURL/File, testMode)
else if ( typeof args[0] === 'string' && (typeof args[1] === 'string' || args[1] instanceof File) ) {
// if imageURL is a File, transform it to a data URI
// if mediaURL is a File, transform it to a data URI
if ( args[1] instanceof File ) {
args[1] = await utils.blobToDataUri(args[1]);
}
// parse args[1] as an image_url object
const mediaBlock = utils.isVideoInput(args[1])
? { video_url: { url: args[1] } }
: { image_url: { url: args[1] } };
requestParams = {
vision: true,
messages: [
{
content: [
args[0],
{
image_url: {
url: args[1],
},
},
mediaBlock,
],
},
],
};
}
// chat(prompt, [imageURLs])
// chat(prompt, [mediaURLs])
else if ( typeof args[0] === 'string' && Array.isArray(args[1]) ) {
// parse args[1] as an array of image_url objects
for ( let i = 0; i < args[1].length; i++ ) {
args[1][i] = { image_url: { url: args[1][i] } };
const url = args[1][i];
args[1][i] = utils.isVideoInput(url)
? { video_url: { url } }
: { image_url: { url } };
}
requestParams = {
vision: true,
+1 -1
View File
@@ -1,4 +1,4 @@
export type AIMessageContent = string | { image_url?: { url: string } } | Record<string, unknown>;
export type AIMessageContent = string | { image_url?: { url: string } } | { video_url?: { url: string } } | Record<string, unknown>;
export interface ChatMessage {
role?: string;