diff --git a/Cargo.lock b/Cargo.lock index b69c10c..10757a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -315,6 +315,7 @@ dependencies = [ "dirs 5.0.1", "env_logger", "fs_extra", + "glob", "ignore", "indicatif", "log", @@ -578,6 +579,12 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "globset" version = "0.4.16" diff --git a/Cargo.toml b/Cargo.toml index 61398de..fec17fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ log = "0.4" fs_extra = "1.3" walkdir = "2.4" ignore = "0.4" +glob = "0.3" tempfile = "3.8" # Unix utilities integration - using system commands with fallbacks # uucore provides shared utilities for potential future native integration diff --git a/src/commands/file.rs b/src/commands/file.rs index c55f70d..730dbc9 100644 --- a/src/commands/file.rs +++ b/src/commands/file.rs @@ -2,19 +2,479 @@ // version: 1.0.0 // guid: fbdd6298-852d-4041-a846-83781ff68a50 +//! File operations: read, write, glob, list, exists. +//! +//! All operations are gated by the same path policy that protects sensitive +//! system locations (/etc, /bin, /sys, ...). When the env var +//! `SAFE_AI_UTIL_REPO_ROOT` is set, every path must canonicalize to a location +//! inside that root — this is how the burndown driver sandboxes agents to a +//! single worktree. +//! +//! Read and write have hard byte ceilings to defend against accidental +//! resource exhaustion. They are configurable via env vars but conservatively +//! defaulted. + use crate::executor::Executor; -use anyhow::Result; -use clap::{ArgMatches, Command}; +use anyhow::{anyhow, Context, Result}; +use clap::{Arg, ArgMatches, Command}; +use std::env; +use std::fs; +use std::io::{self, Read, Write}; +use std::path::{Path, PathBuf}; +use tracing::{debug, info, warn}; + +const DEFAULT_MAX_READ_BYTES: u64 = 10 * 1024 * 1024; // 10 MiB +const DEFAULT_MAX_WRITE_BYTES: u64 = 5 * 1024 * 1024; // 5 MiB +const DEFAULT_MAX_GLOB_RESULTS: usize = 5_000; -/// Build the file command +/// Build the file command tree. pub fn build_command() -> Command { - Command::new("file").about("File operations") - // Add subcommands here + Command::new("file") + .about("File operations (read/write/glob/list/exists), audited and policy-gated") + .subcommand_required(true) + .arg_required_else_help(true) + .subcommand( + Command::new("read") + .about("Read a file to stdout") + .arg(Arg::new("path") + .long("path") + .help("File to read") + .required(true)) + .arg(Arg::new("max-bytes") + .long("max-bytes") + .help("Maximum bytes to read (default 10 MiB)") + .value_parser(clap::value_parser!(u64))), + ) + .subcommand( + Command::new("write") + .about("Write content to a file (creates or overwrites)") + .arg(Arg::new("path") + .long("path") + .help("File to write") + .required(true)) + .arg(Arg::new("content") + .long("content") + .help("Inline content (mutually exclusive with --content-stdin)") + .conflicts_with("content-stdin")) + .arg(Arg::new("content-stdin") + .long("content-stdin") + .help("Read content from stdin (use for any non-trivial size)") + .action(clap::ArgAction::SetTrue)) + .arg(Arg::new("create-dirs") + .long("create-dirs") + .help("Create parent directories if missing") + .action(clap::ArgAction::SetTrue)) + .arg(Arg::new("max-bytes") + .long("max-bytes") + .help("Maximum bytes to write (default 5 MiB)") + .value_parser(clap::value_parser!(u64))), + ) + .subcommand( + Command::new("glob") + .about("Match files against a glob pattern, one per line") + .arg(Arg::new("pattern") + .long("pattern") + .help("Glob pattern, e.g. 'src/**/*.rs'") + .required(true)) + .arg(Arg::new("max-results") + .long("max-results") + .help("Maximum results to return (default 5000)") + .value_parser(clap::value_parser!(usize))), + ) + .subcommand( + Command::new("list") + .about("List directory entries (non-recursive), one per line") + .arg(Arg::new("path") + .long("path") + .help("Directory to list") + .required(true)), + ) + .subcommand( + Command::new("exists") + .about("Exit 0 if path exists, 1 otherwise") + .arg(Arg::new("path") + .long("path") + .help("Path to test") + .required(true)), + ) +} + +/// Dispatch a `file` invocation. +pub async fn execute(matches: &ArgMatches, _executor: &Executor) -> Result<()> { + match matches.subcommand() { + Some(("read", m)) => exec_read(m), + Some(("write", m)) => exec_write(m), + Some(("glob", m)) => exec_glob(m), + Some(("list", m)) => exec_list(m), + Some(("exists", m)) => exec_exists(m), + _ => Err(anyhow!("file: missing or unknown subcommand")), + } +} + +// --------------------------------------------------------------------------- +// subcommand impls +// --------------------------------------------------------------------------- + +fn exec_read(m: &ArgMatches) -> Result<()> { + let path_str = m.get_one::("path").expect("required"); + let max = m + .get_one::("max-bytes") + .copied() + .unwrap_or_else(read_max_bytes); + + let path = validate_path(path_str, PathIntent::Read)?; + let meta = fs::metadata(&path) + .with_context(|| format!("file read: cannot stat '{}'", path.display()))?; + if !meta.is_file() { + return Err(anyhow!("file read: not a regular file: {}", path.display())); + } + if meta.len() > max { + return Err(anyhow!( + "file read: '{}' is {} bytes, exceeds limit of {} bytes", + path.display(), + meta.len(), + max + )); + } + + let bytes = fs::read(&path) + .with_context(|| format!("file read: failed to read '{}'", path.display()))?; + io::stdout().write_all(&bytes)?; + info!("file read: {} ({} bytes)", path.display(), bytes.len()); + Ok(()) +} + +fn exec_write(m: &ArgMatches) -> Result<()> { + let path_str = m.get_one::("path").expect("required"); + let create_dirs = m.get_flag("create-dirs"); + let max = m + .get_one::("max-bytes") + .copied() + .unwrap_or_else(write_max_bytes); + + let path = validate_path(path_str, PathIntent::Write)?; + + let inline = m.get_one::("content").cloned(); + let from_stdin = m.get_flag("content-stdin"); + + let content: Vec = match (inline, from_stdin) { + (Some(s), false) => s.into_bytes(), + (None, true) => { + let mut buf = Vec::new(); + io::stdin().read_to_end(&mut buf)?; + buf + } + (None, false) => { + return Err(anyhow!( + "file write: must supply --content or --content-stdin" + )); + } + (Some(_), true) => unreachable!("clap conflicts_with prevents this"), + }; + + if content.len() as u64 > max { + return Err(anyhow!( + "file write: payload {} bytes exceeds limit {} bytes", + content.len(), + max + )); + } + + if create_dirs { + if let Some(parent) = path.parent() { + if !parent.as_os_str().is_empty() && !parent.exists() { + fs::create_dir_all(parent).with_context(|| { + format!("file write: cannot create parent of '{}'", path.display()) + })?; + } + } + } + + fs::write(&path, &content) + .with_context(|| format!("file write: failed to write '{}'", path.display()))?; + info!("file write: {} ({} bytes)", path.display(), content.len()); + Ok(()) } -/// Execute file commands -pub async fn execute(_matches: &ArgMatches, _executor: &Executor) -> Result<()> { - // Implementation will be added in later phases - println!("File command execution not yet implemented"); +fn exec_glob(m: &ArgMatches) -> Result<()> { + let pattern = m.get_one::("pattern").expect("required"); + let max = m + .get_one::("max-results") + .copied() + .unwrap_or(DEFAULT_MAX_GLOB_RESULTS); + + let resolved_pattern = resolve_glob_pattern(pattern)?; + debug!("file glob: resolved pattern = {}", resolved_pattern); + + let mut count = 0usize; + let stdout = io::stdout(); + let mut out = stdout.lock(); + for entry in glob::glob(&resolved_pattern) + .with_context(|| format!("file glob: bad pattern '{}'", resolved_pattern))? + { + let p = entry.with_context(|| "file glob: entry error")?; + if validate_path(&p.to_string_lossy(), PathIntent::Read).is_err() { + warn!("file glob: skipping out-of-policy path {}", p.display()); + continue; + } + writeln!(out, "{}", p.display())?; + count += 1; + if count >= max { + warn!("file glob: hit max-results cap of {}", max); + break; + } + } + info!("file glob: pattern='{}' results={}", pattern, count); Ok(()) } + +fn exec_list(m: &ArgMatches) -> Result<()> { + let path_str = m.get_one::("path").expect("required"); + let path = validate_path(path_str, PathIntent::Read)?; + let meta = fs::metadata(&path) + .with_context(|| format!("file list: cannot stat '{}'", path.display()))?; + if !meta.is_dir() { + return Err(anyhow!("file list: not a directory: {}", path.display())); + } + + let stdout = io::stdout(); + let mut out = stdout.lock(); + for entry in fs::read_dir(&path) + .with_context(|| format!("file list: failed to read '{}'", path.display()))? + { + let entry = entry?; + writeln!(out, "{}", entry.file_name().to_string_lossy())?; + } + Ok(()) +} + +fn exec_exists(m: &ArgMatches) -> Result<()> { + let path_str = m.get_one::("path").expect("required"); + let path = PathBuf::from(path_str); + if path.exists() { + Ok(()) + } else { + std::process::exit(1); + } +} + +// --------------------------------------------------------------------------- +// path policy +// --------------------------------------------------------------------------- + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum PathIntent { + Read, + Write, +} + +fn validate_path(path_str: &str, intent: PathIntent) -> Result { + if path_str.is_empty() { + return Err(anyhow!("file: empty path")); + } + + let raw = Path::new(path_str); + + if raw.is_absolute() { + let s = raw.to_string_lossy(); + const SENSITIVE: &[&str] = &[ + "/etc", "/bin", "/sbin", "/usr/bin", "/usr/sbin", "/boot", "/root", + "/sys", "/proc", "/dev", + ]; + for prefix in SENSITIVE { + if s.starts_with(prefix) { + return Err(anyhow!( + "file: access to sensitive path '{}' denied by policy", + s + )); + } + } + } + + let resolved = if let Ok(root_str) = env::var("SAFE_AI_UTIL_REPO_ROOT") { + let root = PathBuf::from(&root_str); + let root_canon = fs::canonicalize(&root).with_context(|| { + format!("file: SAFE_AI_UTIL_REPO_ROOT '{}' is not accessible", root_str) + })?; + + let candidate = if raw.is_absolute() { + raw.to_path_buf() + } else { + root_canon.join(raw) + }; + + let canon = match intent { + PathIntent::Read => fs::canonicalize(&candidate).with_context(|| { + format!("file: cannot resolve '{}' under repo root", candidate.display()) + })?, + PathIntent::Write => canonicalize_for_write(&candidate)?, + }; + + if !canon.starts_with(&root_canon) { + return Err(anyhow!( + "file: path '{}' escapes repo root '{}'", + canon.display(), + root_canon.display() + )); + } + canon + } else { + raw.to_path_buf() + }; + + Ok(resolved) +} + +fn canonicalize_for_write(p: &Path) -> Result { + if p.exists() { + return Ok(fs::canonicalize(p)?); + } + let mut probe = p + .parent() + .ok_or_else(|| anyhow!("file write: target '{}' has no parent component", p.display()))? + .to_path_buf(); + let mut suffix: Vec = Vec::new(); + loop { + if probe.as_os_str().is_empty() { + probe = env::current_dir()?; + } + if probe.exists() { + let mut canon = fs::canonicalize(&probe)?; + for part in suffix.iter().rev() { + canon.push(part); + } + canon.push(p.file_name().expect("checked parent above")); + return Ok(canon); + } + let comp = probe + .file_name() + .ok_or_else(|| anyhow!("file write: unable to resolve any ancestor of '{}'", p.display()))? + .to_owned(); + suffix.push(comp); + if !probe.pop() { + return Err(anyhow!( + "file write: unable to resolve any ancestor of '{}'", + p.display() + )); + } + } +} + +fn resolve_glob_pattern(pattern: &str) -> Result { + if Path::new(pattern).is_absolute() { + return Ok(pattern.to_string()); + } + if let Ok(root) = env::var("SAFE_AI_UTIL_REPO_ROOT") { + let root_path = PathBuf::from(&root); + return Ok(root_path.join(pattern).to_string_lossy().into_owned()); + } + Ok(pattern.to_string()) +} + +// --------------------------------------------------------------------------- +// env-var-driven config +// --------------------------------------------------------------------------- + +fn read_max_bytes() -> u64 { + env::var("SAFE_AI_UTIL_MAX_READ_BYTES") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(DEFAULT_MAX_READ_BYTES) +} + +fn write_max_bytes() -> u64 { + env::var("SAFE_AI_UTIL_MAX_WRITE_BYTES") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(DEFAULT_MAX_WRITE_BYTES) +} + +// --------------------------------------------------------------------------- +// tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Mutex; + use tempfile::TempDir; + + static ENV_LOCK: Mutex<()> = Mutex::new(()); + + fn with_repo_root(td: &TempDir, f: F) { + let _g = ENV_LOCK.lock().unwrap(); + let prev = env::var("SAFE_AI_UTIL_REPO_ROOT").ok(); + env::set_var("SAFE_AI_UTIL_REPO_ROOT", td.path()); + f(td.path()); + match prev { + Some(v) => env::set_var("SAFE_AI_UTIL_REPO_ROOT", v), + None => env::remove_var("SAFE_AI_UTIL_REPO_ROOT"), + } + } + + #[test] + fn rejects_sensitive_absolute_paths() { + let _g = ENV_LOCK.lock().unwrap(); + env::remove_var("SAFE_AI_UTIL_REPO_ROOT"); + for p in &["/etc/passwd", "/bin/sh", "/sys/kernel", "/proc/1/mem"] { + let err = validate_path(p, PathIntent::Read).unwrap_err(); + assert!( + err.to_string().contains("sensitive path"), + "expected sensitive-path rejection for {p}, got: {err}" + ); + } + } + + #[test] + fn allows_paths_inside_repo_root() { + let td = TempDir::new().unwrap(); + let inner = td.path().join("inner.txt"); + fs::write(&inner, "hi").unwrap(); + with_repo_root(&td, |_| { + let resolved = validate_path("inner.txt", PathIntent::Read).unwrap(); + assert!(resolved.ends_with("inner.txt")); + }); + } + + #[test] + fn rejects_paths_escaping_repo_root() { + let td = TempDir::new().unwrap(); + let outside = TempDir::new().unwrap(); + let outside_file = outside.path().join("evil.txt"); + fs::write(&outside_file, "x").unwrap(); + with_repo_root(&td, |_| { + let err = validate_path(outside_file.to_str().unwrap(), PathIntent::Read) + .unwrap_err(); + assert!(err.to_string().contains("escapes repo root")); + }); + } + + #[test] + fn write_to_new_file_under_root_resolves() { + let td = TempDir::new().unwrap(); + with_repo_root(&td, |_| { + let resolved = validate_path("new.txt", PathIntent::Write).unwrap(); + assert!(resolved.ends_with("new.txt")); + }); + } + + #[test] + fn write_to_new_file_in_new_subdir_resolves() { + let td = TempDir::new().unwrap(); + with_repo_root(&td, |_| { + let resolved = + validate_path("sub/dir/file.txt", PathIntent::Write).unwrap(); + assert!(resolved.to_string_lossy().contains("file.txt")); + }); + } + + #[test] + fn glob_pattern_resolved_relative_to_root() { + let td = TempDir::new().unwrap(); + fs::write(td.path().join("a.txt"), "a").unwrap(); + with_repo_root(&td, |root| { + let resolved = resolve_glob_pattern("*.txt").unwrap(); + assert!(resolved.starts_with(root.to_str().unwrap())); + }); + } +} diff --git a/src/config.rs b/src/config.rs index 81836dd..e1855f2 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,14 +1,20 @@ // file: src/config.rs -// version: 1.0.0 +// version: 1.1.0 // guid: 6ea31d79-e2bf-4304-a841-22bf1e595512 use crate::error::{AgentError, Result}; +use crate::security::allowlist::{AllowlistConfig, AllowlistOverlay}; use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; use tokio::fs; use tracing::{debug, info}; -/// Application configuration +/// Application configuration. +/// +/// The optional `allowlist` field, when set, REPLACES the SecurityManager's +/// built-in command list with a richer per-command policy (regex argument +/// patterns, forbidden flags, max-args caps, custom validators). When absent, +/// behavior is identical to pre-1.1 — the legacy hardcoded allowed set is used. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Config { pub general: GeneralConfig, @@ -16,6 +22,9 @@ pub struct Config { pub safety: SafetyConfig, pub git: GitConfig, pub execution: ExecutionConfig, + /// Optional rich command allowlist. When `None`, defaults are used. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub allowlist: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -107,41 +116,86 @@ impl Default for Config { max_execution_time: Some(600), }, }, + allowlist: None, } } } impl Config { - /// Load configuration from multiple sources + /// Load configuration from default discovery paths (user dir, project file). pub async fn load() -> Result { + Self::load_with_paths(None, None).await + } + + /// Load configuration with optional explicit `--config` and + /// `--policy-overlay` paths. The overlay, when supplied, is applied to the + /// config's allowlist (or to the default allowlist if the config has none), + /// and the merged result must be strictly narrower than the base. + pub async fn load_with_paths( + explicit_config: Option<&Path>, + overlay_path: Option<&Path>, + ) -> Result { let mut config = Self::default(); - // Try to load from user config directory - if let Some(user_config) = Self::user_config_path() { - if user_config.exists() { - info!("Loading user configuration from: {}", user_config.display()); - config = Self::load_from_file(&user_config).await?; + // Discovery: --config wins over user config wins over project config. + if let Some(path) = explicit_config { + info!("Loading explicit configuration from: {}", path.display()); + config = Self::load_from_file(path).await?; + } else { + if let Some(user_config) = Self::user_config_path() { + if user_config.exists() { + info!("Loading user configuration from: {}", user_config.display()); + config = Self::load_from_file(&user_config).await?; + } } - } - // Try to load from project config - let project_config = Path::new(".safe-ai-util.toml"); - if project_config.exists() { - info!( - "Loading project configuration from: {}", - project_config.display() - ); - let project_config = Self::load_from_file(project_config).await?; - config = Self::merge_configs(config, project_config); + let project_config = Path::new(".safe-ai-util.toml"); + if project_config.exists() { + info!( + "Loading project configuration from: {}", + project_config.display() + ); + let project_config = Self::load_from_file(project_config).await?; + config = Self::merge_configs(config, project_config); + } } - // Override with environment variables config = Self::apply_env_overrides(config)?; + // Apply policy overlay last — must narrow whatever allowlist we have. + if let Some(overlay_path) = overlay_path { + info!("Applying policy overlay from: {}", overlay_path.display()); + let overlay = Self::load_overlay_from_file(overlay_path).await?; + let base = config + .allowlist + .clone() + .unwrap_or_else(AllowlistConfig::secure_default); + let merged = base.apply_overlay(&overlay)?; + config.allowlist = Some(merged); + } + debug!("Final configuration: {:#?}", config); Ok(config) } + /// Load an overlay TOML file. + async fn load_overlay_from_file(path: &Path) -> Result { + let content = fs::read_to_string(path).await.map_err(|e| { + AgentError::config(format!( + "Failed to read policy overlay {}: {}", + path.display(), + e + )) + })?; + toml::from_str(&content).map_err(|e| { + AgentError::config(format!( + "Failed to parse policy overlay {}: {}", + path.display(), + e + )) + }) + } + /// Get the user configuration file path fn user_config_path() -> Option { dirs::config_dir().map(|dir| dir.join("safe-ai-util").join("config.toml")) @@ -192,3 +246,67 @@ impl Config { Ok(config) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_omits_allowlist_field() { + let cfg = Config::default(); + assert!(cfg.allowlist.is_none()); + let serialized = toml::to_string(&cfg).expect("config serializes"); + // skip_serializing_if + None means the field should not appear. + assert!( + !serialized.contains("[allowlist]"), + "serialized config unexpectedly contained allowlist section: {serialized}" + ); + } + + #[test] + fn config_roundtrips_with_allowlist_section() { + let toml_input = r#" +[general] +working_directory = "/tmp" +timeout_seconds = 60 +max_retries = 1 + +[logging] +level = "warn" +format = "Json" +file_rotation = false +max_log_size = "1MB" +retention_days = 1 + +[safety] +dry_run = false +confirm_destructive = false +backup_before_delete = false +validate_paths = false + +[git] +auto_stage = false +require_message = false +push_hooks = false +safe_force_push = false + +[execution] +environment_isolation = false + +[execution.resource_limits] + +[allowlist] +permissive_mode = false +always_allowed = ["git", "make"] +blocked = ["bash", "curl"] + +[allowlist.conditionally_allowed] +"#; + let cfg: Config = toml::from_str(toml_input).expect("parses"); + let policy = cfg.allowlist.expect("allowlist present"); + assert!(policy.always_allowed.contains("git")); + assert!(policy.always_allowed.contains("make")); + assert!(policy.blocked.contains("bash")); + assert!(!policy.permissive_mode); + } +} diff --git a/src/executor.rs b/src/executor.rs index 834de30..542de44 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -1,5 +1,5 @@ // file: src/executor.rs -// version: 2.0.0 +// version: 2.1.0 // guid: bb371682-35cb-4f34-b318-8bf69ec125bd use crate::config::Config; @@ -16,17 +16,27 @@ pub struct Executor { } impl Executor { - /// Create a new executor with the given configuration + /// Create a new executor with the given configuration. + /// + /// When `config.allowlist` is set, the SecurityManager is built from that + /// rich policy (per-command argument restrictions, regex patterns, max-args + /// caps). Otherwise the legacy hardcoded allowlist is used. pub async fn new(config: Config) -> Result { - // Initialize security audit system audit::initialize_audit_system() .map_err(|e| AgentError::system(format!("Failed to initialize audit system: {}", e)))?; - let security = SecurityManager::new(); + let security = match &config.allowlist { + Some(policy) => { + info!("Executor initialized with config-supplied allowlist policy"); + SecurityManager::with_policy(policy.clone()) + } + None => { + info!("Executor initialized with default (hardcoded) allowlist"); + SecurityManager::new() + } + }; - // Log the security configuration - info!("Executor initialized with security controls enabled"); - info!("Security stats: {:?}", security.get_allowed_commands().len()); + info!("Security stats: {} commands allowed", security.get_allowed_commands().len()); Ok(Self { config, security }) } diff --git a/src/logger.rs b/src/logger.rs index 834b78b..315e651 100644 --- a/src/logger.rs +++ b/src/logger.rs @@ -1,30 +1,39 @@ // file: src/logger.rs -// version: 1.2.0 +// version: 1.3.0 // guid: 5a9fbb43-1e0b-4bea-a858-b74b58176503 use crate::error::Result; use chrono; +use std::env; use std::fs; use std::io; +use std::path::PathBuf; use tracing_subscriber::{fmt, layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Layer}; -/// Setup logging for the application with both stdout and file output +/// Setup logging for the application. +/// +/// Behavior is governed by environment variables so callers (worktrees, +/// MCP subprocess, headless agents) can keep noise contained: +/// +/// * `SAFE_AI_UTIL_QUIET=1` — disable file logging entirely; only emit a +/// minimal stderr layer at warn-level so accidental writes to stdout +/// don't corrupt MCP/JSON pipelines. +/// * `SAFE_AI_UTIL_LOG_DIR=` — directory for the per-invocation log +/// file. Defaults to `./logs/` (legacy behavior) when unset. +/// * `RUST_LOG=` — standard tracing filter, applied to both layers. pub fn setup_logging() -> Result<()> { - // Create logs directory if it doesn't exist - let logs_dir = "logs"; - if !std::path::Path::new(logs_dir).exists() { - fs::create_dir_all(logs_dir)?; + if env::var("SAFE_AI_UTIL_QUIET").map(|v| v != "0" && !v.is_empty()).unwrap_or(false) { + return setup_quiet_logging(); + } + + let logs_dir = log_dir_from_env(); + if !logs_dir.exists() { + fs::create_dir_all(&logs_dir)?; } - // Generate log filename with timestamp let now = chrono::Utc::now(); - let log_filename = format!( - "{}/safe-ai-util-{}.log", - logs_dir, - now.format("%Y%m%d_%H%M%S") - ); + let log_filename = logs_dir.join(format!("safe-ai-util-{}.log", now.format("%Y%m%d_%H%M%S"))); - // Create filter from environment or default to info let filter_stdout = EnvFilter::try_from_default_env() .or_else(|_| EnvFilter::try_new("info")) .unwrap(); @@ -32,26 +41,22 @@ pub fn setup_logging() -> Result<()> { .or_else(|_| EnvFilter::try_new("info")) .unwrap(); - // Create file appender let file = std::fs::OpenOptions::new() .create(true) .append(true) .open(&log_filename)?; - // Create stdout layer let stdout_layer = fmt::layer() .with_target(false) .with_writer(io::stdout) .with_filter(filter_stdout); - // Create file layer let file_layer = fmt::layer() .with_target(false) - .with_ansi(false) // No ANSI colors in log files + .with_ansi(false) .with_writer(file) .with_filter(filter_file); - // Initialize subscriber with both layers tracing_subscriber::registry() .with(stdout_layer) .with(file_layer) @@ -59,8 +64,31 @@ pub fn setup_logging() -> Result<()> { tracing::info!( "Logging initialized - writing to stdout and {}", - log_filename + log_filename.display() ); Ok(()) } + +/// Headless logging: warn+ to stderr, no file, no stdout pollution. +fn setup_quiet_logging() -> Result<()> { + let filter = EnvFilter::try_from_default_env() + .or_else(|_| EnvFilter::try_new("warn")) + .unwrap(); + let stderr_layer = fmt::layer() + .with_target(false) + .with_writer(io::stderr) + .with_filter(filter); + tracing_subscriber::registry().with(stderr_layer).init(); + Ok(()) +} + +/// Resolve the log directory from env vars, falling back to legacy `./logs/`. +fn log_dir_from_env() -> PathBuf { + if let Ok(dir) = env::var("SAFE_AI_UTIL_LOG_DIR") { + if !dir.is_empty() { + return PathBuf::from(dir); + } + } + PathBuf::from("logs") +} diff --git a/src/main.rs b/src/main.rs index 89458d3..1d27035 100644 --- a/src/main.rs +++ b/src/main.rs @@ -33,15 +33,24 @@ async fn main() -> Result<()> { // Initialize logging first setup_logging()?; - // Load configuration - let config = Config::load().await?; - info!("Starting Safe AI Utility"); - // Build CLI + // Parse CLI before loading config so --config and --policy-overlay can + // influence which files are read. let app = build_cli(); let matches = app.get_matches(); + let explicit_config = matches.get_one::("config").map(std::path::PathBuf::from); + let overlay_path = matches + .get_one::("policy-overlay") + .map(std::path::PathBuf::from); + + let config = Config::load_with_paths( + explicit_config.as_deref(), + overlay_path.as_deref(), + ) + .await?; + // Create executor with config let executor = Executor::new(config).await?; @@ -107,6 +116,12 @@ fn build_cli() -> Command { .value_name("FILE") .help("Specify custom configuration file") ) + .arg( + Arg::new("policy-overlay") + .long("policy-overlay") + .value_name("FILE") + .help("Apply a narrowing policy overlay (overlay must only tighten the allowlist; widening is rejected)") + ) .arg( Arg::new("args-file") .long("args-file") diff --git a/src/security/allowlist.rs b/src/security/allowlist.rs index 0e45d0b..9ac1523 100644 --- a/src/security/allowlist.rs +++ b/src/security/allowlist.rs @@ -400,6 +400,169 @@ pub struct AllowlistStats { pub permissive_mode: bool, } +/// A narrowing overlay applied on top of an existing `AllowlistConfig`. +/// +/// Every field is optional. When a field is present, its contents must +/// represent a *strictly equal or tighter* policy than the base — the +/// `apply_overlay` function rejects anything that would widen access. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct AllowlistOverlay { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub always_allowed: Option>, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub conditionally_allowed: Option>, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub blocked: Option>, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub permissive_mode: Option, +} + +impl AllowlistConfig { + /// Apply a narrowing overlay, returning the merged config or an error if + /// the overlay would widen access. + pub fn apply_overlay(&self, overlay: &AllowlistOverlay) -> Result { + let mut merged = self.clone(); + + // 1. permissive_mode — overlay can flip it OFF, but never ON if base is OFF. + if let Some(p) = overlay.permissive_mode { + if p && !self.permissive_mode { + return Err(AgentError::security( + "policy overlay attempted to enable permissive_mode (would widen access)", + )); + } + merged.permissive_mode = p; + } + + // 2. always_allowed — every overlay entry must already be reachable in + // the base. Result narrows by enumeration. + if let Some(overlay_allowed) = &overlay.always_allowed { + let base_reachable: HashSet = self + .always_allowed + .iter() + .cloned() + .chain(self.conditionally_allowed.keys().cloned()) + .collect(); + for cmd in overlay_allowed { + if !base_reachable.contains(cmd) { + return Err(AgentError::security(format!( + "policy overlay tried to add '{}' to always_allowed (not reachable in base)", + cmd + ))); + } + } + merged.always_allowed = overlay_allowed.clone(); + for cmd in overlay_allowed { + merged.conditionally_allowed.remove(cmd); + } + } + + // 3. blocked — union (overlay can only add bans). + if let Some(overlay_blocked) = &overlay.blocked { + for cmd in overlay_blocked { + merged.blocked.insert(cmd.clone()); + merged.always_allowed.remove(cmd); + merged.conditionally_allowed.remove(cmd); + } + } + + // 4. conditionally_allowed — only existing keys, with tightened restrictions. + if let Some(overlay_cond) = &overlay.conditionally_allowed { + for (cmd, overlay_restr) in overlay_cond { + let base_reachable = self.always_allowed.contains(cmd) + || self.conditionally_allowed.contains_key(cmd); + if !base_reachable { + return Err(AgentError::security(format!( + "policy overlay tried to introduce conditional rules for '{}' (not reachable in base)", + cmd + ))); + } + + let base_restr = self + .conditionally_allowed + .get(cmd) + .cloned() + .unwrap_or_else(|| CommandRestrictions { + max_args: None, + required_args: vec![], + forbidden_args: vec![], + allowed_patterns: vec![], + forbidden_patterns: vec![], + requires_elevation: false, + custom_validator: None, + }); + + let tightened = tighten_restrictions(&base_restr, overlay_restr); + merged.conditionally_allowed.insert(cmd.clone(), tightened); + merged.always_allowed.remove(cmd); + } + } + + Ok(merged) + } +} + +/// Combine two `CommandRestrictions` such that the result is no looser than +/// either operand. Forbidden lists are unioned; required lists are unioned; +/// max_args is the minimum of the two; allowed_patterns is intersected only +/// when both sides populate it (otherwise the populated side wins, avoiding +/// the "empty allowed_patterns means anything goes" footgun). +fn tighten_restrictions( + base: &CommandRestrictions, + overlay: &CommandRestrictions, +) -> CommandRestrictions { + let mut required = base.required_args.clone(); + for r in &overlay.required_args { + if !required.contains(r) { + required.push(r.clone()); + } + } + + let mut forbidden = base.forbidden_args.clone(); + for f in &overlay.forbidden_args { + if !forbidden.contains(f) { + forbidden.push(f.clone()); + } + } + + let mut forbidden_pat = base.forbidden_patterns.clone(); + for f in &overlay.forbidden_patterns { + if !forbidden_pat.contains(f) { + forbidden_pat.push(f.clone()); + } + } + + let allowed_pat = match (base.allowed_patterns.is_empty(), overlay.allowed_patterns.is_empty()) { + (true, _) => overlay.allowed_patterns.clone(), + (_, true) => base.allowed_patterns.clone(), + (false, false) => base + .allowed_patterns + .iter() + .filter(|p| overlay.allowed_patterns.contains(p)) + .cloned() + .collect(), + }; + + let max_args = match (base.max_args, overlay.max_args) { + (Some(a), Some(b)) => Some(a.min(b)), + (Some(a), None) => Some(a), + (None, Some(b)) => Some(b), + (None, None) => None, + }; + + CommandRestrictions { + max_args, + required_args: required, + forbidden_args: forbidden, + allowed_patterns: allowed_pat, + forbidden_patterns: forbidden_pat, + requires_elevation: base.requires_elevation || overlay.requires_elevation, + custom_validator: overlay + .custom_validator + .clone() + .or_else(|| base.custom_validator.clone()), + } +} + #[cfg(test)] mod tests { use super::*; @@ -445,6 +608,166 @@ mod tests { assert!(!config.is_command_allowed("bash")); } + // ---------------- Overlay narrow-only enforcement ---------------- + + fn make_base() -> AllowlistConfig { + let mut cfg = AllowlistConfig { + always_allowed: HashSet::new(), + conditionally_allowed: HashMap::new(), + blocked: HashSet::new(), + permissive_mode: false, + }; + cfg.always_allowed.insert("git".to_string()); + cfg.always_allowed.insert("make".to_string()); + cfg.blocked.insert("bash".to_string()); + cfg.conditionally_allowed.insert( + "docker".to_string(), + CommandRestrictions { + max_args: Some(20), + required_args: vec![], + forbidden_args: vec!["--privileged".to_string()], + allowed_patterns: vec![], + forbidden_patterns: vec![], + requires_elevation: false, + custom_validator: None, + }, + ); + cfg + } + + #[test] + fn overlay_can_narrow_always_allowed() { + let base = make_base(); + let mut overlay_set = HashSet::new(); + overlay_set.insert("git".to_string()); // drops `make` + let overlay = AllowlistOverlay { + always_allowed: Some(overlay_set), + ..AllowlistOverlay::default() + }; + let merged = base.apply_overlay(&overlay).expect("narrowing should succeed"); + assert!(merged.always_allowed.contains("git")); + assert!(!merged.always_allowed.contains("make")); + } + + #[test] + fn overlay_rejects_widening_always_allowed() { + let base = make_base(); + let mut overlay_set = HashSet::new(); + overlay_set.insert("curl".to_string()); // not in base + let overlay = AllowlistOverlay { + always_allowed: Some(overlay_set), + ..AllowlistOverlay::default() + }; + let err = base.apply_overlay(&overlay).unwrap_err(); + assert!( + err.to_string().contains("not reachable in base"), + "got: {err}" + ); + } + + #[test] + fn overlay_can_add_to_blocked() { + let base = make_base(); + let mut bset = HashSet::new(); + bset.insert("zsh".to_string()); + let overlay = AllowlistOverlay { + blocked: Some(bset), + ..AllowlistOverlay::default() + }; + let merged = base.apply_overlay(&overlay).unwrap(); + assert!(merged.blocked.contains("bash")); + assert!(merged.blocked.contains("zsh")); + } + + #[test] + fn overlay_blocked_overrides_allowed() { + let base = make_base(); + let mut bset = HashSet::new(); + bset.insert("git".to_string()); // ban a previously-allowed command + let overlay = AllowlistOverlay { + blocked: Some(bset), + ..AllowlistOverlay::default() + }; + let merged = base.apply_overlay(&overlay).unwrap(); + assert!(merged.blocked.contains("git")); + assert!(!merged.always_allowed.contains("git")); + assert!(!merged.is_command_allowed("git")); + } + + #[test] + fn overlay_can_tighten_conditional_restrictions() { + let base = make_base(); + let mut cond = HashMap::new(); + cond.insert( + "docker".to_string(), + CommandRestrictions { + max_args: Some(5), // tighter than base's 20 + required_args: vec![], + forbidden_args: vec!["--rm".to_string()], // adds new ban + allowed_patterns: vec![], + forbidden_patterns: vec![], + requires_elevation: false, + custom_validator: None, + }, + ); + let overlay = AllowlistOverlay { + conditionally_allowed: Some(cond), + ..AllowlistOverlay::default() + }; + let merged = base.apply_overlay(&overlay).unwrap(); + let r = merged.conditionally_allowed.get("docker").unwrap(); + assert_eq!(r.max_args, Some(5)); + assert!(r.forbidden_args.contains(&"--privileged".to_string())); // base preserved + assert!(r.forbidden_args.contains(&"--rm".to_string())); // overlay added + } + + #[test] + fn overlay_rejects_introducing_unknown_conditional() { + let base = make_base(); + let mut cond = HashMap::new(); + cond.insert( + "wget".to_string(), + CommandRestrictions { + max_args: Some(1), + required_args: vec![], + forbidden_args: vec![], + allowed_patterns: vec![], + forbidden_patterns: vec![], + requires_elevation: false, + custom_validator: None, + }, + ); + let overlay = AllowlistOverlay { + conditionally_allowed: Some(cond), + ..AllowlistOverlay::default() + }; + let err = base.apply_overlay(&overlay).unwrap_err(); + assert!(err.to_string().contains("not reachable in base")); + } + + #[test] + fn overlay_rejects_enabling_permissive_mode() { + let base = make_base(); + let overlay = AllowlistOverlay { + permissive_mode: Some(true), + ..AllowlistOverlay::default() + }; + let err = base.apply_overlay(&overlay).unwrap_err(); + assert!(err.to_string().contains("permissive_mode")); + } + + #[test] + fn overlay_can_disable_permissive_mode() { + let mut base = make_base(); + base.permissive_mode = true; + let overlay = AllowlistOverlay { + permissive_mode: Some(false), + ..AllowlistOverlay::default() + }; + let merged = base.apply_overlay(&overlay).unwrap(); + assert!(!merged.permissive_mode); + } + #[test] fn test_restrictions() { let mut config = AllowlistConfig::secure_default(); diff --git a/src/security/audit.rs b/src/security/audit.rs index 3925934..0c278a4 100644 --- a/src/security/audit.rs +++ b/src/security/audit.rs @@ -193,18 +193,34 @@ fn write_audit_entry_impl(entry: &AuditEntry) -> std::io::Result<()> { Ok(()) } -/// Get the audit log directory +/// Get the audit log directory. +/// +/// Checked in order: +/// 1. `SAFE_AI_UTIL_AUDIT_PATH` (preferred, project-namespaced) +/// 2. `COPILOT_AUDIT_DIR` (legacy, kept for backwards compatibility) +/// 3. `/security` fn get_audit_log_directory() -> PathBuf { - // Try to use a dedicated audit log directory + if let Ok(audit_dir) = std::env::var("SAFE_AI_UTIL_AUDIT_PATH") { + if !audit_dir.is_empty() { + return PathBuf::from(audit_dir); + } + } if let Ok(audit_dir) = std::env::var("COPILOT_AUDIT_DIR") { - return PathBuf::from(audit_dir); + if !audit_dir.is_empty() { + return PathBuf::from(audit_dir); + } } - // Fall back to logs directory in current working directory - let mut log_dir = std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")); - log_dir.push("logs"); - log_dir.push("security"); - log_dir + let base = std::env::var("SAFE_AI_UTIL_LOG_DIR") + .ok() + .filter(|s| !s.is_empty()) + .map(PathBuf::from) + .unwrap_or_else(|| { + let mut p = std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")); + p.push("logs"); + p + }); + base.join("security") } /// Rotate audit logs if they get too large diff --git a/src/security/mod.rs b/src/security/mod.rs index b62ca87..21c5465 100644 --- a/src/security/mod.rs +++ b/src/security/mod.rs @@ -1,5 +1,5 @@ // file: src/security/mod.rs -// version: 1.0.0 +// version: 1.1.0 // guid: a1b2c3d4-e5f6-7890-abcd-ef1234567890 //! Security module for the Copilot Agent Utility @@ -13,24 +13,55 @@ pub mod validator; pub mod audit; use crate::error::{AgentError, Result}; +use allowlist::AllowlistConfig; use std::collections::HashSet; use tracing::{info, warn}; -/// Security configuration and enforcement +/// Security configuration and enforcement. +/// +/// Two modes: +/// - **Legacy** (`policy = None`): use the hardcoded `default_allowed_commands` +/// set. This preserves pre-1.1 behavior for callers that haven't supplied +/// a config-file allowlist. +/// - **Policy-driven** (`policy = Some(_)`): defer fully to `AllowlistConfig`, +/// which provides per-command restrictions (forbidden args, regex patterns, +/// max-args caps, custom validators). The hashset is still populated for +/// `is_command_allowed` cheap-path lookups, but argument validation goes +/// through the richer `policy.validate_command()` path. #[derive(Debug, Clone)] pub struct SecurityManager { allowed_commands: HashSet, audit_enabled: bool, strict_mode: bool, + policy: Option, } impl SecurityManager { - /// Create a new security manager with default safe configuration + /// Create a new security manager with the default hardcoded allowlist. pub fn new() -> Self { Self { allowed_commands: Self::default_allowed_commands(), audit_enabled: true, strict_mode: true, + policy: None, + } + } + + /// Create a security manager driven by a rich `AllowlistConfig`. + /// + /// The hashset of allowed commands is rebuilt from the policy's + /// `always_allowed` ∪ `conditionally_allowed.keys()` so the cheap-path + /// check stays consistent with the rich validator. + pub fn with_policy(policy: AllowlistConfig) -> Self { + let mut allowed: HashSet = policy.always_allowed.iter().cloned().collect(); + for key in policy.conditionally_allowed.keys() { + allowed.insert(key.clone()); + } + Self { + allowed_commands: allowed, + audit_enabled: true, + strict_mode: true, + policy: Some(policy), } } @@ -111,7 +142,11 @@ impl SecurityManager { result } - /// Validate and sanitize command arguments + /// Validate and sanitize command arguments. + /// + /// When a rich `policy` is attached, the per-command restrictions from + /// `AllowlistConfig::validate_command` (forbidden flags, regex patterns, + /// max-args caps) are applied in addition to the legacy sanitizer/validator. pub fn validate_arguments(&self, command: &str, args: &[String]) -> Result> { if !self.is_command_allowed(command) { return Err(AgentError::security( @@ -122,6 +157,10 @@ impl SecurityManager { let sanitized_args = sanitizer::sanitize_arguments(command, args)?; validator::validate_command_arguments(command, &sanitized_args)?; + if let Some(policy) = &self.policy { + policy.validate_command(command, &sanitized_args)?; + } + if self.audit_enabled { audit::log_command_execution(command, &sanitized_args); } @@ -229,4 +268,60 @@ mod tests { assert!(security.add_allowed_command("dangerous_command".to_string()).is_ok()); assert!(security.is_command_allowed("dangerous_command")); } + + #[test] + fn with_policy_uses_policy_allowlist() { + // Build a tight policy that only allows `git` and disallows everything else. + let mut policy = AllowlistConfig::secure_default(); + policy.always_allowed.clear(); + policy.always_allowed.insert("git".to_string()); + policy.conditionally_allowed.clear(); + + let security = SecurityManager::with_policy(policy); + assert!(security.is_command_allowed("git")); + // Commands that the legacy hardcoded list allows must NOT leak through + // when a policy is set. + assert!(!security.is_command_allowed("cargo")); + assert!(!security.is_command_allowed("npm")); + } + + #[test] + fn with_policy_enforces_forbidden_args() { + // Build a policy that bans `make clean` — a restriction the legacy + // validator/sanitizer has no awareness of, so this test exercises the + // policy layer specifically. + use crate::security::allowlist::CommandRestrictions; + let mut policy = AllowlistConfig::secure_default(); + policy.always_allowed.remove("make"); + policy.conditionally_allowed.insert( + "make".to_string(), + CommandRestrictions { + max_args: Some(10), + required_args: vec![], + forbidden_args: vec!["clean".to_string()], + allowed_patterns: vec![], + forbidden_patterns: vec![], + requires_elevation: false, + custom_validator: None, + }, + ); + + let security = SecurityManager::with_policy(policy); + + // `make build` should pass. + assert!(security + .validate_arguments("make", &vec!["build".to_string()]) + .is_ok()); + + // `make clean` must be rejected by the rich policy. + let err = security + .validate_arguments("make", &vec!["clean".to_string()]) + .unwrap_err(); + let msg = err.to_string().to_lowercase(); + assert!( + msg.contains("forbidden") || msg.contains("clean"), + "expected forbidden-arg rejection for 'make clean', got: {}", + err + ); + } }