diff --git a/Cargo.toml b/Cargo.toml index 22b999a8..0069870f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,6 @@ priority = "optional" assets = [ ["target/release/rtk", "usr/bin/", "755"], ] - # cargo-generate-rpm configuration [package.metadata.generate-rpm] assets = [ diff --git a/src/cmds/system/ls.rs b/src/cmds/system/ls.rs index a69c271f..ac40ae32 100644 --- a/src/cmds/system/ls.rs +++ b/src/cmds/system/ls.rs @@ -4,8 +4,20 @@ use super::constants::NOISE_DIRS; use crate::core::runner::{self, RunOptions}; use crate::core::utils::resolved_command; use anyhow::Result; +use lazy_static::lazy_static; +use regex::Regex; use std::io::IsTerminal; +lazy_static! { + /// Matches the date+time portion in `ls -la` output, which serves as a + /// stable anchor regardless of owner/group column width. + /// E.g.: " Mar 31 16:18 " or " Dec 25 2024 " + static ref LS_DATE_RE: Regex = Regex::new( + r"\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2}\s+(?:\d{4}|\d{2}:\d{2})\s+" + ) + .unwrap(); +} + pub fn run(args: &[String], verbose: u8) -> Result { let show_all = args .iter() @@ -101,6 +113,40 @@ fn human_size(bytes: u64) -> String { } } +/// Parse a single `ls -la` line, returning `(file_type_char, size, name)`. +/// +/// Uses the date field as a stable anchor — the date format in `ls -la` is +/// always three tokens (`Mon DD HH:MM` or `Mon DD YYYY`), so we locate it +/// with a regex, then extract size (rightmost number before the date) and +/// filename (everything after the date). This handles owner/group names that +/// contain spaces, which break the old fixed-column approach. +fn parse_ls_line(line: &str) -> Option<(char, u64, String)> { + let date_match = LS_DATE_RE.find(line)?; + let name = line[date_match.end()..].to_string(); + + let before_date = &line[..date_match.start()]; + let before_parts: Vec<&str> = before_date.split_whitespace().collect(); + if before_parts.len() < 4 { + return None; + } + + let perms = before_parts[0]; + let file_type = perms.chars().next()?; + + // Size is the rightmost parseable number before the date. + // nlinks is also numeric but appears earlier; scanning from the end + // guarantees we hit the size field first. + let mut size: u64 = 0; + for part in before_parts.iter().rev() { + if let Ok(s) = part.parse::() { + size = s; + break; + } + } + + Some((file_type, size, name)) +} + /// Parse ls -la output into compact format: /// name/ (dirs) /// name size (files) @@ -113,18 +159,13 @@ fn compact_ls(raw: &str, show_all: bool) -> (String, String) { let mut by_ext: HashMap = HashMap::new(); for line in raw.lines() { - // Skip total, empty, . and .. if line.starts_with("total ") || line.is_empty() { continue; } - let parts: Vec<&str> = line.split_whitespace().collect(); - if parts.len() < 9 { + let Some((file_type, size, name)) = parse_ls_line(line) else { continue; - } - - // Filename is everything from column 9 onward (handles spaces) - let name = parts[8..].join(" "); + }; // Skip . and .. if name == "." || name == ".." { @@ -136,12 +177,9 @@ fn compact_ls(raw: &str, show_all: bool) -> (String, String) { continue; } - let is_dir = parts[0].starts_with('d'); - - if is_dir { + if file_type == 'd' { dirs.push(name); - } else if parts[0].starts_with('-') || parts[0].starts_with('l') { - let size: u64 = parts[4].parse().unwrap_or(0); + } else if file_type == '-' || file_type == 'l' { let ext = if let Some(pos) = name.rfind('.') { name[pos..].to_string() } else { @@ -325,4 +363,109 @@ mod tests { line_count ); } + + // Regression test for #948: owner/group with spaces breaks fixed-column parsing + #[test] + fn test_compact_multiline_group() { + let input = "total 8\n\ + -rw-r--r-- 1 fjeanne utilisa. du domaine 0 Mar 31 16:18 empty.txt\n\ + -rw-r--r-- 1 fjeanne utilisa. du domaine 1234 Mar 31 16:18 data.json\n"; + let (entries, _summary) = compact_ls(input, false); + assert!( + entries.contains("empty.txt"), + "should contain 'empty.txt', got: {entries}" + ); + assert!( + entries.contains("data.json"), + "should contain 'data.json', got: {entries}" + ); + assert!( + !entries.contains("16:18"), + "time should not leak into filename, got: {entries}" + ); + assert!( + entries.contains("0B"), + "empty.txt should show 0B, got: {entries}" + ); + assert!( + entries.contains("1.2K"), + "data.json should show 1.2K (1234 bytes), got: {entries}" + ); + } + + #[test] + fn test_compact_year_format_date() { + // Some systems show year instead of time for old files + let input = "total 8\n\ + -rw-r--r-- 1 user staff 5678 Dec 25 2024 archive.tar\n"; + let (entries, _summary) = compact_ls(input, false); + assert!( + entries.contains("archive.tar"), + "should contain filename, got: {entries}" + ); + assert!( + entries.contains("5.5K"), + "should show 5.5K, got: {entries}" + ); + } + + #[test] + fn test_parse_ls_line_basic() { + let (ft, size, name) = parse_ls_line( + "-rw-r--r-- 1 user staff 1234 Jan 1 12:00 file.txt", + ) + .unwrap(); + assert_eq!(ft, '-'); + assert_eq!(size, 1234); + assert_eq!(name, "file.txt"); + } + + #[test] + fn test_parse_ls_line_multiline_group() { + let (ft, size, name) = parse_ls_line( + "-rw-r--r-- 1 fjeanne utilisa. du domaine 0 Mar 31 16:18 empty.txt", + ) + .unwrap(); + assert_eq!(ft, '-'); + assert_eq!(size, 0); + assert_eq!(name, "empty.txt"); + } + + #[test] + fn test_parse_ls_line_dir_with_space_in_group() { + let (ft, size, name) = parse_ls_line( + "drwxr-xr-x 2 fjeanne utilisa. du domaine 64 Mar 31 16:18 my dir", + ) + .unwrap(); + assert_eq!(ft, 'd'); + assert_eq!(size, 64); + assert_eq!(name, "my dir"); + } + + #[test] + fn test_parse_ls_line_symlink() { + let (ft, size, name) = parse_ls_line( + "lrwxr-xr-x 1 user staff 10 Jan 1 12:00 link -> target", + ) + .unwrap(); + assert_eq!(ft, 'l'); + assert_eq!(size, 10); + assert_eq!(name, "link -> target"); + } + + #[test] + fn test_parse_ls_line_returns_none_for_total() { + assert!(parse_ls_line("total 48").is_none()); + } + + #[test] + fn test_parse_ls_line_year_format() { + let (ft, size, name) = parse_ls_line( + "-rw-r--r-- 1 user staff 5678 Dec 25 2024 old.tar.gz", + ) + .unwrap(); + assert_eq!(ft, '-'); + assert_eq!(size, 5678); + assert_eq!(name, "old.tar.gz"); + } }