Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 114 additions & 9 deletions crates/core/src/diary_ingest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ pub fn ingest_diaries(
let mut palace_db = PalaceDb::open(palace_path)?;
let mut days_updated = 0usize;
let mut closets_created = 0usize;
// Track legacy-state backfills (content_hash filled in for entries that
// previously had only the size). These don't update `days_updated`
// because no drawer is rewritten — but the state file still needs to
// be persisted so the strict hash check sticks across runs.
let mut state_dirty = false;

// Find all .md files
let diary_files: Vec<PathBuf> = WalkDir::new(&diary_dir)
Expand Down Expand Up @@ -84,14 +89,25 @@ pub fn ingest_diaries(
let curr_size = text.len();
let curr_hash = format!("{:x}", Sha256::digest(text.as_bytes()));
if !force {
if let Some(prev) = state.get(&state_key) {
if let Some(prev) = state.get(&state_key).cloned() {
if let Some(prev_hash) = prev.content_hash.as_ref() {
if prev_hash == &curr_hash {
continue;
}
} else if prev.size > 0 && prev.size == curr_size {
// Legacy state without content_hash: keep size-based skip so a
// post-upgrade run doesn't re-ingest every untouched diary.
// Legacy state without content_hash: keep size-based skip but
// backfill the hash so future runs use the strict check
// (mirrors upstream mempalace `2ff6283`). Without this, a
// pre-Fix-#3 state file would stay on the size-only path
// forever and re-introduce the same-size-edit blind spot.
state.insert(
state_key.clone(),
StateEntry {
content_hash: Some(curr_hash.clone()),
..prev
},
);
state_dirty = true;
continue;
}
}
Expand Down Expand Up @@ -143,13 +159,18 @@ pub fn ingest_diaries(

palace_db.flush()?;

// Save state
if days_updated > 0 {
// Save state when any drawer was rewritten OR when legacy entries
// were backfilled with their content_hash. The latter is silent (no
// banner) because no drawer changed — but persisting it is what
// makes the strict hash check stick on the next run.
if days_updated > 0 || state_dirty {
fs::write(&state_file, serde_json::to_string_pretty(&state)?)?;
println!(
"Diary: {} days updated, {} new closets",
days_updated, closets_created
);
if days_updated > 0 {
println!(
"Diary: {} days updated, {} new closets",
days_updated, closets_created
);
}
}

Ok(DiaryIngestStats {
Expand Down Expand Up @@ -394,4 +415,88 @@ mod tests {
None => std::env::remove_var("HOME"),
}
}

#[test]
fn test_ingest_diaries_backfills_content_hash_on_legacy_state() {
// Regression for upstream mempalace 2ff6283 (follow-up to #925).
//
// A pre-Fix-#3 state file has `size` set but no `content_hash`. The
// size-skip path correctly avoids re-ingesting an untouched diary,
// but it must also write the missing hash back so subsequent runs
// use the strict hash check — otherwise a same-size edit done after
// the upgrade would still slip through the legacy code path
// forever.
let _guard = crate::test_env_lock()
.lock()
.expect("test env lock should be available");
let temp = tempfile::TempDir::new().unwrap();
let prev_home = std::env::var_os("HOME");
std::env::set_var("HOME", temp.path());

let palace_path = temp.path().join("palace");
let diary_dir = temp.path().join("diaries");
std::fs::create_dir_all(&diary_dir).unwrap();

let diary_file = diary_dir.join("2024-01-15.md");
let original = "## Notes\n\nteh quick brown fox jumps over the lazy dog and again here.\n";
std::fs::write(&diary_file, original).unwrap();

// First ingest establishes baseline state (with content_hash).
let stats = ingest_diaries(&diary_dir, Some(&palace_path), "diary", false).unwrap();
assert_eq!(stats.days_updated, 1);

// ingest_diaries canonicalizes diary_dir internally (`resolve_path`).
// On macOS `/tmp` symlinks to `/private/tmp`, which changes the
// hashed state-file key — look up the state file via the same
// canonical form ingest_diaries actually used.
let canonical_diary_dir = diary_dir
.canonicalize()
.unwrap_or_else(|_| diary_dir.clone());

// Simulate a legacy state file: load, strip content_hash, rewrite.
let state_path = state_file_for(&palace_path, &canonical_diary_dir).unwrap();
let mut raw_state: HashMap<String, serde_json::Value> =
serde_json::from_str(&std::fs::read_to_string(&state_path).unwrap()).unwrap();
for entry in raw_state.values_mut() {
if let Some(obj) = entry.as_object_mut() {
obj.remove("content_hash");
}
}
std::fs::write(
&state_path,
serde_json::to_string_pretty(&raw_state).unwrap(),
)
.unwrap();

// Re-ingest with unchanged content: legacy size-skip path triggers,
// but the backfill must persist content_hash to disk.
let stats = ingest_diaries(&diary_dir, Some(&palace_path), "diary", false).unwrap();
assert_eq!(
stats.days_updated, 0,
"legacy size-skip path should still skip unchanged content"
);

let after: HashMap<String, StateEntry> =
serde_json::from_str(&std::fs::read_to_string(&state_path).unwrap()).unwrap();
let entry = after.values().next().expect("state should have one entry");
assert!(
entry.content_hash.is_some(),
"backfill must populate content_hash so future runs use the strict check"
);

// Now a same-size edit must be detected via the freshly-backfilled hash.
let edited = original.replace("teh", "the");
assert_eq!(edited.len(), original.len());
std::fs::write(&diary_file, &edited).unwrap();
let stats = ingest_diaries(&diary_dir, Some(&palace_path), "diary", false).unwrap();
assert_eq!(
stats.days_updated, 1,
"post-backfill: same-size edit must be detected via content hash"
);

match prev_home {
Some(h) => std::env::set_var("HOME", h),
None => std::env::remove_var("HOME"),
}
}
}
119 changes: 114 additions & 5 deletions crates/core/src/exporter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,58 @@ fn reject_symlink(path: &Path, label: &str) -> anyhow::Result<()> {
Ok(())
}

/// Open a file for writing, refusing to follow a symlink at the target
/// path.
///
/// Mirrors upstream mempalace `7545238` (Copilot review on #1156): the
/// directory-level `reject_symlink` check leaves a TOCTOU window where a
/// symlink swapped in between create-dir and file-open would still
/// redirect writes. On POSIX we close that window with `O_NOFOLLOW`,
/// which fails the open itself if `path` is a symlink. On Windows we
/// fall back to a `symlink_metadata` pre-check (narrower than no check
/// at all).
fn safe_open_for_write(path: &Path, append: bool) -> anyhow::Result<std::fs::File> {
#[cfg(unix)]
{
use std::os::unix::fs::OpenOptionsExt;
let mut opts = std::fs::OpenOptions::new();
opts.write(true).create(true);
if append {
opts.append(true);
} else {
opts.truncate(true);
}
opts.custom_flags(libc::O_NOFOLLOW);
match opts.open(path) {
Ok(f) => Ok(f),
Err(e) => {
// ELOOP: the target was a symlink (O_NOFOLLOW).
if e.raw_os_error() == Some(libc::ELOOP) {
anyhow::bail!("refusing to write: {} is a symbolic link.", path.display());
}
Err(e.into())
}
}
}
#[cfg(not(unix))]
{
if std::fs::symlink_metadata(path)
.map(|m| m.file_type().is_symlink())
.unwrap_or(false)
{
anyhow::bail!("refusing to write: {} is a symbolic link.", path.display());
}
let mut opts = std::fs::OpenOptions::new();
opts.write(true).create(true);
if append {
opts.append(true);
} else {
opts.truncate(true);
}
Ok(opts.open(path)?)
}
}

pub struct ExportStats {
pub wings: usize,
pub rooms: usize,
Expand Down Expand Up @@ -115,10 +167,7 @@ pub fn export_palace(palace_path: Option<&Path>, output_dir: &Path) -> anyhow::R
let key = format!("{}|{}", wing, room);
let is_new = !opened_rooms.contains_key(&key);

let mut file = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(&room_file)?;
let mut file = safe_open_for_write(&room_file, true)?;
use std::io::Write;
if is_new {
writeln!(file, "# {} / {}\n", wing, room)?;
Expand Down Expand Up @@ -185,7 +234,11 @@ pub fn export_palace(palace_path: Option<&Path>, output_dir: &Path) -> anyhow::R
));
}

std::fs::write(&index_path, index_lines.join("\n"))?;
{
use std::io::Write;
let mut f = safe_open_for_write(&index_path, false)?;
f.write_all(index_lines.join("\n").as_bytes())?;
}

println!(
"\n Exported {} drawers across {} wings, {} rooms",
Expand Down Expand Up @@ -271,4 +324,60 @@ mod tests {
assert!(msg.contains("symbolic link"), "unexpected error: {msg}");
assert!(msg.contains("output_dir"), "unexpected error: {msg}");
}

#[test]
fn test_safe_open_for_write_allows_regular_file() {
// Sanity: a plain (non-existent) path opens fine.
let temp = tempfile::TempDir::new().unwrap();
let path = temp.path().join("regular.md");
let mut f = safe_open_for_write(&path, false).expect("regular path should open");
use std::io::Write;
f.write_all(b"hello").unwrap();
drop(f);
assert_eq!(std::fs::read_to_string(&path).unwrap(), "hello");
}

#[cfg(unix)]
#[test]
fn test_safe_open_for_write_blocks_symlinked_file() {
// Mirrors upstream mempalace 7545238: the per-file open must refuse
// to follow a symlink at the target path, closing the TOCTOU window
// that the directory-level reject_symlink check leaves open.
let temp = tempfile::TempDir::new().unwrap();
let real_target = temp.path().join("real_target.md");
// The target need not exist — O_NOFOLLOW fails on the symlink
// itself before resolution.
let link = temp.path().join("link.md");
std::os::unix::fs::symlink(&real_target, &link).unwrap();

let err = safe_open_for_write(&link, false)
.expect_err("safe_open_for_write must refuse a symlinked target");
let msg = format!("{}", err);
assert!(msg.contains("symbolic link"), "unexpected error: {msg}");

// The symlink target must not have been created behind us.
assert!(
!real_target.exists(),
"symlink target should not have been created"
);
}

#[cfg(unix)]
#[test]
fn test_safe_open_for_write_appends_to_regular_file() {
// The append=true variant used for room files preserves prior
// content (each drawer in a room writes a new section).
let temp = tempfile::TempDir::new().unwrap();
let path = temp.path().join("room.md");
std::fs::write(&path, "original\n").unwrap();
{
let mut f = safe_open_for_write(&path, true).expect("append open should succeed");
use std::io::Write;
f.write_all(b"appended\n").unwrap();
}
assert_eq!(
std::fs::read_to_string(&path).unwrap(),
"original\nappended\n"
);
}
}
Loading
Loading