Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
7dcbf2f
tmp
leehuwuj Oct 8, 2024
1e2502a
update private file handler
leehuwuj Oct 11, 2024
e12bd29
enhance code
leehuwuj Oct 11, 2024
1cef23c
reduce complexity
leehuwuj Oct 11, 2024
5bd3591
fix mypy
leehuwuj Oct 11, 2024
c8a9472
fix mypy
leehuwuj Oct 11, 2024
5fd25f6
remove comment
leehuwuj Oct 11, 2024
a4d3d36
support upload file and enhance interpreter tool
leehuwuj Oct 14, 2024
6efadd4
fix blocking stream event
leehuwuj Oct 14, 2024
3e82be7
fix mypy
leehuwuj Oct 14, 2024
393a926
Merge remote-tracking branch 'origin/main' into feat/upload-file-sandbox
leehuwuj Oct 14, 2024
9602c6c
add changeset and fix mypy after merge
leehuwuj Oct 14, 2024
985cb26
fix mypy
leehuwuj Oct 14, 2024
9a4c0a3
enhance code
leehuwuj Oct 14, 2024
2efc727
typing
leehuwuj Oct 14, 2024
249edf5
wording
leehuwuj Oct 15, 2024
22cd958
exclude indexing private csv file if code executor tool is enabled
leehuwuj Oct 15, 2024
30e408b
remove file content and duplicated file id
leehuwuj Oct 15, 2024
94b338a
simpler file upload
leehuwuj Oct 15, 2024
6bb7a30
support for TS
leehuwuj Oct 15, 2024
bbf321f
support file upload for artifact in TS
leehuwuj Oct 15, 2024
852e6ec
enhance file path
leehuwuj Oct 15, 2024
5ae6b57
enhance code
leehuwuj Oct 15, 2024
c64e2ba
revise vercel streaming
leehuwuj Oct 15, 2024
36cdb1e
remove redundant id
leehuwuj Oct 15, 2024
e0921fe
add show file widget to the
leehuwuj Oct 15, 2024
a3c1c55
allow upload file with empty index store
leehuwuj Oct 15, 2024
bae12e6
Merge branch 'main' into feat/upload-file-sandbox
marcusschiesser Oct 15, 2024
7d9dee2
add data scientist use case
marcusschiesser Oct 15, 2024
3b91e7b
use GPT4o model for data scientist and code artifact
marcusschiesser Oct 15, 2024
954113e
update comments
leehuwuj Oct 15, 2024
624aea7
use previewcard to render documents
marcusschiesser Oct 15, 2024
788fab0
fix: UI overlap, key warning, wrong filename and url in markdown
thucpn Oct 16, 2024
0f56092
use div as tag wrapper for message
thucpn Oct 16, 2024
051b3cf
update ui
leehuwuj Oct 16, 2024
4f92ce2
update for python
leehuwuj Oct 16, 2024
db025d9
Merge remote-tracking branch 'origin/main' into lee/enhance-code
leehuwuj Oct 16, 2024
c5a381f
fix missing change for FE
leehuwuj Oct 16, 2024
f825a8c
update python
leehuwuj Oct 16, 2024
8b8afa0
fix python typing
leehuwuj Oct 17, 2024
659d0c7
update for ts
leehuwuj Oct 17, 2024
0655a4e
add missing filetype and filesize
leehuwuj Oct 17, 2024
9eb8468
update comments
leehuwuj Oct 17, 2024
c4e2325
refactor code
leehuwuj Oct 17, 2024
7dd9c86
remove redundant code
leehuwuj Oct 17, 2024
3a82df0
remove redundant code
leehuwuj Oct 17, 2024
af83b50
fix duplicated file ext in file name
leehuwuj Oct 17, 2024
a8c819b
fix llamacloud log
leehuwuj Oct 17, 2024
f84db97
enhance code
leehuwuj Oct 17, 2024
e4748df
show error if uploading to sandbox failed
leehuwuj Oct 17, 2024
98dc945
add change set
leehuwuj Oct 17, 2024
48dcc51
Update .changeset/selfish-tips-lie.md
marcusschiesser Oct 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/selfish-tips-lie.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Simplify and unify handling file uploads
22 changes: 12 additions & 10 deletions templates/components/engines/python/agent/tools/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import uuid
from typing import List, Optional

from app.engine.utils.file_helper import FileMetadata, save_file
from app.services.file import DocumentFile, FileService
from e2b_code_interpreter import CodeInterpreter
from e2b_code_interpreter.models import Logs
from llama_index.core.tools import FunctionTool
Expand Down Expand Up @@ -32,7 +32,7 @@ class E2BCodeInterpreter:
output_dir = "output/tools"
uploaded_files_dir = "output/uploaded"

def __init__(self, api_key: str = None):
def __init__(self, api_key: Optional[str] = None):
if api_key is None:
api_key = os.getenv("E2B_API_KEY")
filesever_url_prefix = os.getenv("FILESERVER_URL_PREFIX")
Expand Down Expand Up @@ -72,15 +72,17 @@ def _init_interpreter(self, sandbox_files: List[str] = []):
self.interpreter.files.write(file_path, content)
logger.info(f"Uploaded {len(sandbox_files)} files to sandbox")

def _save_to_disk(self, base64_data: str, ext: str) -> FileMetadata:
def _save_to_disk(self, base64_data: str, ext: str) -> DocumentFile:
buffer = base64.b64decode(base64_data)

filename = f"{uuid.uuid4()}.{ext}" # generate a unique filename
output_path = os.path.join(self.output_dir, filename)
# Output from e2b doesn't have a name. Create a random name for it.
filename = f"e2b_file_{uuid.uuid4()}.{ext}"

file_metadata = save_file(buffer, file_path=output_path)
document_file = FileService.save_file(
buffer, file_name=filename, save_dir=self.output_dir
)

return file_metadata
return document_file

def _parse_result(self, result) -> List[InterpreterExtraResult]:
"""
Expand All @@ -99,12 +101,12 @@ def _parse_result(self, result) -> List[InterpreterExtraResult]:
for ext, data in zip(formats, results):
match ext:
case "png" | "svg" | "jpeg" | "pdf":
file_metadata = self._save_to_disk(data, ext)
document_file = self._save_to_disk(data, ext)
output.append(
InterpreterExtraResult(
type=ext,
filename=file_metadata.name,
url=file_metadata.url,
filename=document_file.name,
url=document_file.url,
)
)
case _:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,16 @@ export class InterpreterTool implements BaseTool<InterpreterParameter> {
// upload files to sandbox
if (input.sandboxFiles) {
console.log(`Uploading ${input.sandboxFiles.length} files to sandbox`);
for (const filePath of input.sandboxFiles) {
const fileName = path.basename(filePath);
const localFilePath = path.join(this.uploadedFilesDir, fileName);
const content = fs.readFileSync(localFilePath);
await this.codeInterpreter?.files.write(filePath, content);
try {
for (const filePath of input.sandboxFiles) {
const fileName = path.basename(filePath);
const localFilePath = path.join(this.uploadedFilesDir, fileName);
const content = fs.readFileSync(localFilePath);
await this.codeInterpreter?.files.write(filePath, content);
}
} catch (error) {
console.error("Got error when uploading files to sandbox", error);
}
console.log(`Uploaded ${input.sandboxFiles.length} files to sandbox`);
}
return this.codeInterpreter;
}
Expand Down
31 changes: 14 additions & 17 deletions templates/components/llamaindex/typescript/documents/helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import crypto from "node:crypto";
import fs from "node:fs";
import path from "node:path";
import { getExtractors } from "../../engine/loader";
import { DocumentFile } from "../streaming/annotations";

const MIME_TYPE_TO_EXT: Record<string, string> = {
"application/pdf": "pdf",
Expand All @@ -14,43 +15,38 @@ const MIME_TYPE_TO_EXT: Record<string, string> = {

const UPLOADED_FOLDER = "output/uploaded";

export type FileMetadata = {
id: string;
name: string;
url: string;
refs: string[];
};

export async function storeAndParseFile(
filename: string,
name: string,
fileBuffer: Buffer,
mimeType: string,
): Promise<FileMetadata> {
const fileMetadata = await storeFile(filename, fileBuffer, mimeType);
const documents: Document[] = await parseFile(fileBuffer, filename, mimeType);
): Promise<DocumentFile> {
const file = await storeFile(name, fileBuffer, mimeType);
const documents: Document[] = await parseFile(fileBuffer, name, mimeType);
// Update document IDs in the file metadata
fileMetadata.refs = documents.map((document) => document.id_ as string);
return fileMetadata;
file.refs = documents.map((document) => document.id_ as string);
return file;
}

export async function storeFile(
filename: string,
name: string,
fileBuffer: Buffer,
mimeType: string,
) {
const fileExt = MIME_TYPE_TO_EXT[mimeType];
if (!fileExt) throw new Error(`Unsupported document type: ${mimeType}`);

const fileId = crypto.randomUUID();
const newFilename = `${fileId}_${sanitizeFileName(filename)}`;
const newFilename = `${sanitizeFileName(name)}_${fileId}.${fileExt}`;
const filepath = path.join(UPLOADED_FOLDER, newFilename);
const fileUrl = await saveDocument(filepath, fileBuffer);
return {
id: fileId,
name: newFilename,
size: fileBuffer.length,
type: fileExt,
url: fileUrl,
refs: [] as string[],
} as FileMetadata;
} as DocumentFile;
}

export async function parseFile(
Expand Down Expand Up @@ -104,5 +100,6 @@ export async function saveDocument(filepath: string, content: string | Buffer) {
}

function sanitizeFileName(fileName: string) {
return fileName.replace(/[^a-zA-Z0-9_.-]/g, "_");
// Remove file extension and sanitize
return fileName.split(".")[0].replace(/[^a-zA-Z0-9_-]/g, "_");
}
38 changes: 19 additions & 19 deletions templates/components/llamaindex/typescript/documents/upload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,40 @@ import { Document, LLamaCloudFileService, VectorStoreIndex } from "llamaindex";
import { LlamaCloudIndex } from "llamaindex/cloud/LlamaCloudIndex";
import fs from "node:fs/promises";
import path from "node:path";
import { FileMetadata, parseFile, storeFile } from "./helper";
import { DocumentFile } from "../streaming/annotations";
import { parseFile, storeFile } from "./helper";
import { runPipeline } from "./pipeline";

export async function uploadDocument(
index: VectorStoreIndex | LlamaCloudIndex | null,
filename: string,
name: string,
raw: string,
): Promise<FileMetadata> {
): Promise<DocumentFile> {
const [header, content] = raw.split(",");
const mimeType = header.replace("data:", "").replace(";base64", "");
const fileBuffer = Buffer.from(content, "base64");

// Store file
const fileMetadata = await storeFile(filename, fileBuffer, mimeType);
const fileMetadata = await storeFile(name, fileBuffer, mimeType);

// If the file is csv and has codeExecutorTool, we don't need to index the file.
if (mimeType === "text/csv" && (await hasCodeExecutorTool())) {
return fileMetadata;
}

let documentIds: string[] = [];
if (index instanceof LlamaCloudIndex) {
// trigger LlamaCloudIndex API to upload the file and run the pipeline
const projectId = await index.getProjectId();
const pipelineId = await index.getPipelineId();
try {
const documentId = await LLamaCloudFileService.addFileToPipeline(
projectId,
pipelineId,
new File([fileBuffer], filename, { type: mimeType }),
{ private: "true" },
);
// Update file metadata with document IDs
fileMetadata.refs = [documentId];
return fileMetadata;
documentIds = [
await LLamaCloudFileService.addFileToPipeline(
projectId,
pipelineId,
new File([fileBuffer], name, { type: mimeType }),
{ private: "true" },
),
];
} catch (error) {
if (
error instanceof ReferenceError &&
Expand All @@ -47,14 +47,14 @@ export async function uploadDocument(
}
throw error;
}
} else {
// run the pipeline for other vector store indexes
const documents: Document[] = await parseFile(fileBuffer, name, mimeType);
documentIds = await runPipeline(index, documents);
}

// run the pipeline for other vector store indexes
const documents: Document[] = await parseFile(fileBuffer, filename, mimeType);
// Update file metadata with document IDs
fileMetadata.refs = documents.map((document) => document.id_ as string);
// Run the pipeline
await runPipeline(index, documents);
fileMetadata.refs = documentIds;
return fileMetadata;
}

Expand Down
29 changes: 12 additions & 17 deletions templates/components/llamaindex/typescript/streaming/annotations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,13 @@ import { MessageContent, MessageContentDetail } from "llamaindex";

export type DocumentFileType = "csv" | "pdf" | "txt" | "docx";

export type UploadedFileMeta = {
export type DocumentFile = {
id: string;
name: string;
url?: string;
refs?: string[];
};

export type DocumentFile = {
type: DocumentFileType;
size: number;
type: string;
url: string;
metadata: UploadedFileMeta;
refs?: string[];
Comment on lines +6 to +12
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Codebase verification

Inconsistency Found: id Field Missing in Python DocumentFile Class

The TypeScript DocumentFile type includes the id field, but the corresponding Python DocumentFile class does not. This discrepancy can lead to inconsistencies in data handling between the TypeScript and Python components.

Affected files:

  • templates/types/streaming/fastapi/app/services/file.py: class DocumentFile(BaseModel):
🔗 Analysis chain

Streamlined DocumentFile type, but consider cross-language consistency

The DocumentFile type has been updated to include more explicit properties, which improves clarity and usability. However, there's an existing question about the presence of the id field in TypeScript but not in Python.

To ensure consistency across languages, please verify if the id field is necessary in both TypeScript and Python implementations. Run the following script to check for DocumentFile or similar types in Python files:

If there are discrepancies, consider aligning the implementations or documenting the reason for the difference.

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Search for DocumentFile or similar types in Python files
rg --type python -i "class\s+(DocumentFile|UploadedFile|FileMetadata)"

Length of output: 103


Script:

#!/bin/bash
# Search for DocumentFile or similar types in Python files
rg -g "*.py" -i "class\s+(DocumentFile|UploadedFile|FileMetadata)"

Length of output: 343

};

type Annotation = {
Expand All @@ -30,7 +26,7 @@ export function isValidMessages(messages: Message[]): boolean {
export function retrieveDocumentIds(messages: Message[]): string[] {
// retrieve document Ids from the annotations of all messages (if any)
const documentFiles = retrieveDocumentFiles(messages);
return documentFiles.map((file) => file.metadata?.refs || []).flat();
return documentFiles.map((file) => file.refs || []).flat();
}

export function retrieveDocumentFiles(messages: Message[]): DocumentFile[] {
Expand Down Expand Up @@ -63,16 +59,15 @@ export function retrieveMessageContent(messages: Message[]): MessageContent {
}

function getFileContent(file: DocumentFile): string {
const fileMetadata = file.metadata;
let defaultContent = `=====File: ${fileMetadata.name}=====\n`;
let defaultContent = `=====File: ${file.name}=====\n`;
// Include file URL if it's available
const urlPrefix = process.env.FILESERVER_URL_PREFIX;
let urlContent = "";
if (urlPrefix) {
if (fileMetadata.url) {
urlContent = `File URL: ${fileMetadata.url}\n`;
if (file.url) {
urlContent = `File URL: ${file.url}\n`;
} else {
urlContent = `File URL (instruction: do not update this file URL yourself): ${urlPrefix}/output/uploaded/${fileMetadata.name}\n`;
urlContent = `File URL (instruction: do not update this file URL yourself): ${urlPrefix}/output/uploaded/${file.name}\n`;
}
} else {
console.warn(
Expand All @@ -82,11 +77,11 @@ function getFileContent(file: DocumentFile): string {
defaultContent += urlContent;

// Include document IDs if it's available
if (fileMetadata.refs) {
defaultContent += `Document IDs: ${fileMetadata.refs}\n`;
if (file.refs) {
defaultContent += `Document IDs: ${file.refs}\n`;
}
// Include sandbox file paths
const sandboxFilePath = `/tmp/${fileMetadata.name}`;
const sandboxFilePath = `/tmp/${file.name}`;
defaultContent += `Sandbox file path (instruction: only use sandbox path for artifact or code interpreter tool): ${sandboxFilePath}\n`;

return defaultContent;
Expand Down
8 changes: 6 additions & 2 deletions templates/components/routers/python/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,14 @@ def _download_cell_results(cell_results: Optional[List]) -> List[Dict[str, str]]
data = result[ext]

if ext in ["png", "svg", "jpeg", "pdf"]:
file_path = os.path.join("output", "tools", f"{uuid.uuid4()}.{ext}")
base64_data = data
buffer = base64.b64decode(base64_data)
file_meta = save_file(content=buffer, file_path=file_path)
file_name = f"{uuid.uuid4()}.{ext}"
file_meta = save_file(
content=buffer,
file_name=file_name,
save_dir=os.path.join("output", "tools"),
)
output.append(
{
"type": ext,
Expand Down
Loading
Loading