Skip to content

Commit 99b8247

Browse files
authored
Enhance data type (#378)
1 parent 74c5a15 commit 99b8247

File tree

21 files changed

+509
-514
lines changed

21 files changed

+509
-514
lines changed

.changeset/selfish-tips-lie.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"create-llama": patch
3+
---
4+
5+
Simplify and unify handling file uploads

templates/components/engines/python/agent/tools/interpreter.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import uuid
55
from typing import List, Optional
66

7-
from app.engine.utils.file_helper import FileMetadata, save_file
7+
from app.services.file import DocumentFile, FileService
88
from e2b_code_interpreter import CodeInterpreter
99
from e2b_code_interpreter.models import Logs
1010
from llama_index.core.tools import FunctionTool
@@ -32,7 +32,7 @@ class E2BCodeInterpreter:
3232
output_dir = "output/tools"
3333
uploaded_files_dir = "output/uploaded"
3434

35-
def __init__(self, api_key: str = None):
35+
def __init__(self, api_key: Optional[str] = None):
3636
if api_key is None:
3737
api_key = os.getenv("E2B_API_KEY")
3838
filesever_url_prefix = os.getenv("FILESERVER_URL_PREFIX")
@@ -72,15 +72,17 @@ def _init_interpreter(self, sandbox_files: List[str] = []):
7272
self.interpreter.files.write(file_path, content)
7373
logger.info(f"Uploaded {len(sandbox_files)} files to sandbox")
7474

75-
def _save_to_disk(self, base64_data: str, ext: str) -> FileMetadata:
75+
def _save_to_disk(self, base64_data: str, ext: str) -> DocumentFile:
7676
buffer = base64.b64decode(base64_data)
7777

78-
filename = f"{uuid.uuid4()}.{ext}" # generate a unique filename
79-
output_path = os.path.join(self.output_dir, filename)
78+
# Output from e2b doesn't have a name. Create a random name for it.
79+
filename = f"e2b_file_{uuid.uuid4()}.{ext}"
8080

81-
file_metadata = save_file(buffer, file_path=output_path)
81+
document_file = FileService.save_file(
82+
buffer, file_name=filename, save_dir=self.output_dir
83+
)
8284

83-
return file_metadata
85+
return document_file
8486

8587
def _parse_result(self, result) -> List[InterpreterExtraResult]:
8688
"""
@@ -99,12 +101,12 @@ def _parse_result(self, result) -> List[InterpreterExtraResult]:
99101
for ext, data in zip(formats, results):
100102
match ext:
101103
case "png" | "svg" | "jpeg" | "pdf":
102-
file_metadata = self._save_to_disk(data, ext)
104+
document_file = self._save_to_disk(data, ext)
103105
output.append(
104106
InterpreterExtraResult(
105107
type=ext,
106-
filename=file_metadata.name,
107-
url=file_metadata.url,
108+
filename=document_file.name,
109+
url=document_file.url,
108110
)
109111
)
110112
case _:

templates/components/engines/typescript/agent/tools/interpreter.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,16 @@ export class InterpreterTool implements BaseTool<InterpreterParameter> {
111111
// upload files to sandbox
112112
if (input.sandboxFiles) {
113113
console.log(`Uploading ${input.sandboxFiles.length} files to sandbox`);
114-
for (const filePath of input.sandboxFiles) {
115-
const fileName = path.basename(filePath);
116-
const localFilePath = path.join(this.uploadedFilesDir, fileName);
117-
const content = fs.readFileSync(localFilePath);
118-
await this.codeInterpreter?.files.write(filePath, content);
114+
try {
115+
for (const filePath of input.sandboxFiles) {
116+
const fileName = path.basename(filePath);
117+
const localFilePath = path.join(this.uploadedFilesDir, fileName);
118+
const content = fs.readFileSync(localFilePath);
119+
await this.codeInterpreter?.files.write(filePath, content);
120+
}
121+
} catch (error) {
122+
console.error("Got error when uploading files to sandbox", error);
119123
}
120-
console.log(`Uploaded ${input.sandboxFiles.length} files to sandbox`);
121124
}
122125
return this.codeInterpreter;
123126
}

templates/components/llamaindex/typescript/documents/helper.ts

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import crypto from "node:crypto";
33
import fs from "node:fs";
44
import path from "node:path";
55
import { getExtractors } from "../../engine/loader";
6+
import { DocumentFile } from "../streaming/annotations";
67

78
const MIME_TYPE_TO_EXT: Record<string, string> = {
89
"application/pdf": "pdf",
@@ -14,43 +15,38 @@ const MIME_TYPE_TO_EXT: Record<string, string> = {
1415

1516
const UPLOADED_FOLDER = "output/uploaded";
1617

17-
export type FileMetadata = {
18-
id: string;
19-
name: string;
20-
url: string;
21-
refs: string[];
22-
};
23-
2418
export async function storeAndParseFile(
25-
filename: string,
19+
name: string,
2620
fileBuffer: Buffer,
2721
mimeType: string,
28-
): Promise<FileMetadata> {
29-
const fileMetadata = await storeFile(filename, fileBuffer, mimeType);
30-
const documents: Document[] = await parseFile(fileBuffer, filename, mimeType);
22+
): Promise<DocumentFile> {
23+
const file = await storeFile(name, fileBuffer, mimeType);
24+
const documents: Document[] = await parseFile(fileBuffer, name, mimeType);
3125
// Update document IDs in the file metadata
32-
fileMetadata.refs = documents.map((document) => document.id_ as string);
33-
return fileMetadata;
26+
file.refs = documents.map((document) => document.id_ as string);
27+
return file;
3428
}
3529

3630
export async function storeFile(
37-
filename: string,
31+
name: string,
3832
fileBuffer: Buffer,
3933
mimeType: string,
4034
) {
4135
const fileExt = MIME_TYPE_TO_EXT[mimeType];
4236
if (!fileExt) throw new Error(`Unsupported document type: ${mimeType}`);
4337

4438
const fileId = crypto.randomUUID();
45-
const newFilename = `${fileId}_${sanitizeFileName(filename)}`;
39+
const newFilename = `${sanitizeFileName(name)}_${fileId}.${fileExt}`;
4640
const filepath = path.join(UPLOADED_FOLDER, newFilename);
4741
const fileUrl = await saveDocument(filepath, fileBuffer);
4842
return {
4943
id: fileId,
5044
name: newFilename,
45+
size: fileBuffer.length,
46+
type: fileExt,
5147
url: fileUrl,
5248
refs: [] as string[],
53-
} as FileMetadata;
49+
} as DocumentFile;
5450
}
5551

5652
export async function parseFile(
@@ -104,5 +100,6 @@ export async function saveDocument(filepath: string, content: string | Buffer) {
104100
}
105101

106102
function sanitizeFileName(fileName: string) {
107-
return fileName.replace(/[^a-zA-Z0-9_.-]/g, "_");
103+
// Remove file extension and sanitize
104+
return fileName.split(".")[0].replace(/[^a-zA-Z0-9_-]/g, "_");
108105
}

templates/components/llamaindex/typescript/documents/upload.ts

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,40 @@ import { Document, LLamaCloudFileService, VectorStoreIndex } from "llamaindex";
22
import { LlamaCloudIndex } from "llamaindex/cloud/LlamaCloudIndex";
33
import fs from "node:fs/promises";
44
import path from "node:path";
5-
import { FileMetadata, parseFile, storeFile } from "./helper";
5+
import { DocumentFile } from "../streaming/annotations";
6+
import { parseFile, storeFile } from "./helper";
67
import { runPipeline } from "./pipeline";
78

89
export async function uploadDocument(
910
index: VectorStoreIndex | LlamaCloudIndex | null,
10-
filename: string,
11+
name: string,
1112
raw: string,
12-
): Promise<FileMetadata> {
13+
): Promise<DocumentFile> {
1314
const [header, content] = raw.split(",");
1415
const mimeType = header.replace("data:", "").replace(";base64", "");
1516
const fileBuffer = Buffer.from(content, "base64");
1617

1718
// Store file
18-
const fileMetadata = await storeFile(filename, fileBuffer, mimeType);
19+
const fileMetadata = await storeFile(name, fileBuffer, mimeType);
1920

2021
// If the file is csv and has codeExecutorTool, we don't need to index the file.
2122
if (mimeType === "text/csv" && (await hasCodeExecutorTool())) {
2223
return fileMetadata;
2324
}
24-
25+
let documentIds: string[] = [];
2526
if (index instanceof LlamaCloudIndex) {
2627
// trigger LlamaCloudIndex API to upload the file and run the pipeline
2728
const projectId = await index.getProjectId();
2829
const pipelineId = await index.getPipelineId();
2930
try {
30-
const documentId = await LLamaCloudFileService.addFileToPipeline(
31-
projectId,
32-
pipelineId,
33-
new File([fileBuffer], filename, { type: mimeType }),
34-
{ private: "true" },
35-
);
36-
// Update file metadata with document IDs
37-
fileMetadata.refs = [documentId];
38-
return fileMetadata;
31+
documentIds = [
32+
await LLamaCloudFileService.addFileToPipeline(
33+
projectId,
34+
pipelineId,
35+
new File([fileBuffer], name, { type: mimeType }),
36+
{ private: "true" },
37+
),
38+
];
3939
} catch (error) {
4040
if (
4141
error instanceof ReferenceError &&
@@ -47,14 +47,14 @@ export async function uploadDocument(
4747
}
4848
throw error;
4949
}
50+
} else {
51+
// run the pipeline for other vector store indexes
52+
const documents: Document[] = await parseFile(fileBuffer, name, mimeType);
53+
documentIds = await runPipeline(index, documents);
5054
}
5155

52-
// run the pipeline for other vector store indexes
53-
const documents: Document[] = await parseFile(fileBuffer, filename, mimeType);
5456
// Update file metadata with document IDs
55-
fileMetadata.refs = documents.map((document) => document.id_ as string);
56-
// Run the pipeline
57-
await runPipeline(index, documents);
57+
fileMetadata.refs = documentIds;
5858
return fileMetadata;
5959
}
6060

templates/components/llamaindex/typescript/streaming/annotations.ts

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,13 @@ import { MessageContent, MessageContentDetail } from "llamaindex";
33

44
export type DocumentFileType = "csv" | "pdf" | "txt" | "docx";
55

6-
export type UploadedFileMeta = {
6+
export type DocumentFile = {
77
id: string;
88
name: string;
9-
url?: string;
10-
refs?: string[];
11-
};
12-
13-
export type DocumentFile = {
14-
type: DocumentFileType;
9+
size: number;
10+
type: string;
1511
url: string;
16-
metadata: UploadedFileMeta;
12+
refs?: string[];
1713
};
1814

1915
type Annotation = {
@@ -30,7 +26,7 @@ export function isValidMessages(messages: Message[]): boolean {
3026
export function retrieveDocumentIds(messages: Message[]): string[] {
3127
// retrieve document Ids from the annotations of all messages (if any)
3228
const documentFiles = retrieveDocumentFiles(messages);
33-
return documentFiles.map((file) => file.metadata?.refs || []).flat();
29+
return documentFiles.map((file) => file.refs || []).flat();
3430
}
3531

3632
export function retrieveDocumentFiles(messages: Message[]): DocumentFile[] {
@@ -63,16 +59,15 @@ export function retrieveMessageContent(messages: Message[]): MessageContent {
6359
}
6460

6561
function getFileContent(file: DocumentFile): string {
66-
const fileMetadata = file.metadata;
67-
let defaultContent = `=====File: ${fileMetadata.name}=====\n`;
62+
let defaultContent = `=====File: ${file.name}=====\n`;
6863
// Include file URL if it's available
6964
const urlPrefix = process.env.FILESERVER_URL_PREFIX;
7065
let urlContent = "";
7166
if (urlPrefix) {
72-
if (fileMetadata.url) {
73-
urlContent = `File URL: ${fileMetadata.url}\n`;
67+
if (file.url) {
68+
urlContent = `File URL: ${file.url}\n`;
7469
} else {
75-
urlContent = `File URL (instruction: do not update this file URL yourself): ${urlPrefix}/output/uploaded/${fileMetadata.name}\n`;
70+
urlContent = `File URL (instruction: do not update this file URL yourself): ${urlPrefix}/output/uploaded/${file.name}\n`;
7671
}
7772
} else {
7873
console.warn(
@@ -82,11 +77,11 @@ function getFileContent(file: DocumentFile): string {
8277
defaultContent += urlContent;
8378

8479
// Include document IDs if it's available
85-
if (fileMetadata.refs) {
86-
defaultContent += `Document IDs: ${fileMetadata.refs}\n`;
80+
if (file.refs) {
81+
defaultContent += `Document IDs: ${file.refs}\n`;
8782
}
8883
// Include sandbox file paths
89-
const sandboxFilePath = `/tmp/${fileMetadata.name}`;
84+
const sandboxFilePath = `/tmp/${file.name}`;
9085
defaultContent += `Sandbox file path (instruction: only use sandbox path for artifact or code interpreter tool): ${sandboxFilePath}\n`;
9186

9287
return defaultContent;

templates/components/routers/python/sandbox.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,10 +172,14 @@ def _download_cell_results(cell_results: Optional[List]) -> List[Dict[str, str]]
172172
data = result[ext]
173173

174174
if ext in ["png", "svg", "jpeg", "pdf"]:
175-
file_path = os.path.join("output", "tools", f"{uuid.uuid4()}.{ext}")
176175
base64_data = data
177176
buffer = base64.b64decode(base64_data)
178-
file_meta = save_file(content=buffer, file_path=file_path)
177+
file_name = f"{uuid.uuid4()}.{ext}"
178+
file_meta = save_file(
179+
content=buffer,
180+
file_name=file_name,
181+
save_dir=os.path.join("output", "tools"),
182+
)
179183
output.append(
180184
{
181185
"type": ext,

0 commit comments

Comments
 (0)