Skip to content

Commit 76c7edf

Browse files
Enable passing images along with text for models that support multimodal input (#254)
* Enable passing images along with text for models that support multimodal input * Update comments for image inputs * Fix spaces and comments
1 parent 35653b3 commit 76c7edf

File tree

1 file changed

+39
-2
lines changed

1 file changed

+39
-2
lines changed

typescript/src/model.ts

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,42 @@ export interface PromptSection {
1313
/**
1414
* Specifies the content of this section.
1515
*/
16-
content: string;
16+
content: string | MultimodalPromptContent[];
1717
}
1818

19+
/**
20+
* GPT-4-vision, GPT-4-omni and GPT-4-turbo allow multi-modal input, where images and text can
21+
* be part of the prompt. To support this, the content section of the prompt has an array of objects.
22+
*/
23+
export type MultimodalPromptContent =
24+
| string
25+
| TextPromptContent
26+
| ImagePromptContent;
27+
28+
export type TextPromptContent = {
29+
type: "text";
30+
text: string;
31+
};
32+
33+
export type ImagePromptContent = {
34+
type: "image_url";
35+
image_url: ImageUrl;
36+
};
37+
38+
export type ImageUrl = {
39+
/*
40+
* This could be a URL to a hosted image, or the base64-encoded image content.
41+
*/
42+
url: string;
43+
44+
/*
45+
* Controls how the model processes the image and generates its textual understanding.
46+
* In "low" mode, the model treats the image as 512x512px, while "high" mode considers
47+
* the image at full size.
48+
*/
49+
detail?: "auto" | "low" | "high";
50+
};
51+
1952
/**
2053
* Represents a AI language model that can complete prompts. TypeChat uses an implementation of this
2154
* interface to communicate with an AI service that can translate natural language requests to JSON
@@ -135,7 +168,11 @@ function createFetchLanguageModel(url: string, headers: object, defaultParams: o
135168
const response = await fetch(url, options);
136169
if (response.ok) {
137170
const json = await response.json() as { choices: { message: PromptSection }[] };
138-
return success(json.choices[0].message.content ?? "");
171+
if (typeof json.choices[0].message.content === "string") {
172+
return success(json.choices[0].message.content ?? "");
173+
} else {
174+
return error(`REST API unexpected response format: ${JSON.stringify(json.choices[0].message.content)}`);
175+
}
139176
}
140177
if (!isTransientHttpError(response.status) || retryCount >= retryMaxAttempts) {
141178
return error(`REST API error ${response.status}: ${response.statusText}`);

0 commit comments

Comments
 (0)