Skip to content
109 changes: 108 additions & 1 deletion src/discord.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,14 @@ impl EventHandler for Handler {
text: prompt_with_sender.clone(),
});

// Process attachments: route by content type (audio → STT, image → encode)
// Process attachments: route by content type (audio → STT, text file → inline, image → encode)
let mut audio_skipped = false;
if !msg.attachments.is_empty() {
let mut text_file_bytes: u64 = 0;
let mut text_file_count: u32 = 0;
const TEXT_TOTAL_CAP: u64 = 1024 * 1024; // 1 MB total for all text file attachments
const TEXT_FILE_COUNT_CAP: u32 = 5;

for attachment in &msg.attachments {
if is_audio_attachment(attachment) {
if self.stt_config.enabled {
Expand All @@ -220,6 +225,23 @@ impl EventHandler for Handler {
warn!(filename = %attachment.filename, "skipping audio attachment (STT disabled)");
audio_skipped = true;
}
} else if is_text_attachment(attachment) {
if text_file_count >= TEXT_FILE_COUNT_CAP {
warn!(filename = %attachment.filename, count = text_file_count, "text file count cap reached, skipping");
continue;
}
// Pre-check with Discord-reported size (fast path, avoids unnecessary download).
// Running total uses actual downloaded bytes for accurate accounting.
if text_file_bytes + u64::from(attachment.size) > TEXT_TOTAL_CAP {
warn!(filename = %attachment.filename, total = text_file_bytes, "text attachments total exceeds 1MB cap, skipping remaining");
continue;
}
if let Some((content_block, actual_bytes)) = download_and_read_text_file(attachment).await {
text_file_bytes += actual_bytes;
text_file_count += 1;
debug!(filename = %attachment.filename, "adding text file attachment");
content_blocks.push(content_block);
}
} else if let Some(content_block) = download_and_encode_image(attachment).await {
debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment");
content_blocks.push(content_block);
Expand Down Expand Up @@ -329,6 +351,91 @@ impl EventHandler for Handler {
}
}

/// Extensions recognised as text-based files that can be inlined into the prompt.
const TEXT_EXTENSIONS: &[&str] = &[
"txt", "csv", "log", "md", "json", "jsonl", "yaml", "yml", "toml", "xml",
"rs", "py", "js", "ts", "jsx", "tsx", "go", "java", "c", "cpp", "h", "hpp",
"rb", "sh", "bash", "zsh", "fish", "ps1", "bat", "sql", "html", "css",
"scss", "less", "ini", "cfg", "conf", "env",
];

/// Exact filenames (no extension) recognised as text files.
const TEXT_FILENAMES: &[&str] = &[
"dockerfile", "makefile", "justfile", "rakefile", "gemfile",
"procfile", "vagrantfile", ".gitignore", ".dockerignore", ".editorconfig",
];

/// MIME types recognised as text-based (beyond `text/*`).
const TEXT_MIME_TYPES: &[&str] = &[
"application/json",
"application/xml",
"application/javascript",
"application/x-yaml",
"application/x-sh",
"application/toml",
"application/x-toml",
];

/// Check if an attachment is a text-based file we can inline.
fn is_text_attachment(attachment: &serenity::model::channel::Attachment) -> bool {
let mime = attachment.content_type.as_deref().unwrap_or("");
let mime_base = mime.split(';').next().unwrap_or(mime).trim();
if mime_base.starts_with("text/") || TEXT_MIME_TYPES.contains(&mime_base) {
return true;
}
// Check extension
if attachment.filename.contains('.') {
if let Some(ext) = attachment.filename.rsplit('.').next() {
if TEXT_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
return true;
}
}
}
// Check exact filename (Dockerfile, Makefile, etc.)
TEXT_FILENAMES.contains(&attachment.filename.to_lowercase().as_str())
}

/// Download a text-based file attachment and return it as a ContentBlock::Text.
/// Files larger than 512 KB are skipped to avoid bloating the prompt.
async fn download_and_read_text_file(
attachment: &serenity::model::channel::Attachment,
) -> Option<(ContentBlock, u64)> {
const MAX_SIZE: u64 = 512 * 1024; // 512 KB

if u64::from(attachment.size) > MAX_SIZE {
warn!(filename = %attachment.filename, size = attachment.size, "text file exceeds 512KB limit, skipping");
return None;
}

let resp = HTTP_CLIENT.get(&attachment.url).send().await.ok()?;
if !resp.status().is_success() {
warn!(url = %attachment.url, status = %resp.status(), "text file download failed");
return None;
}
let bytes = resp.bytes().await.ok()?;
let actual_size = bytes.len() as u64;

// Defense-in-depth: verify actual download size
if actual_size > MAX_SIZE {
warn!(filename = %attachment.filename, size = actual_size, "downloaded text file exceeds 512KB limit, skipping");
return None;
}

// from_utf8_lossy returns Cow::Borrowed for valid UTF-8 (zero-copy)
let text = String::from_utf8_lossy(&bytes).into_owned();

// Dynamic fence: keep adding backticks until the fence doesn't appear in content
let mut fence = "```".to_string();
while text.contains(fence.as_str()) {
fence.push('`');
}

debug!(filename = %attachment.filename, bytes = text.len(), "text file inlined");
Some((ContentBlock::Text {
text: format!("[File: {}]\n{fence}\n{}\n{fence}", attachment.filename, text),
}, actual_size))
}

/// Check if an attachment is an audio file (voice messages are typically audio/ogg).
fn is_audio_attachment(attachment: &serenity::model::channel::Attachment) -> bool {
let mime = attachment.content_type.as_deref().unwrap_or("");
Expand Down
Loading