From 8e07aee2ae7fbcaef2e46aed84bbb9a41b29f2bf Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Tue, 14 Apr 2026 16:08:12 +0000 Subject: [PATCH 01/10] Add serde::Deserialize impl for ociman::reference::Name Enables Name to be deserialized from a plain string via #[serde(try_from = "String")] and a TryFrom impl that delegates to FromStr. Lets downstream crates use Name directly in serde-derived config types without a custom deserialize helper. --- ociman/src/reference.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/ociman/src/reference.rs b/ociman/src/reference.rs index 1f501cf7..0e4d2128 100644 --- a/ociman/src/reference.rs +++ b/ociman/src/reference.rs @@ -644,12 +644,21 @@ impl std::fmt::Display for Path { /// assert_eq!(name.domain.unwrap().to_string(), "localhost"); /// assert_eq!(name.path.to_string(), "pg-ephemeral/main"); /// ``` -#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd, serde::Deserialize)] +#[serde(try_from = "String")] pub struct Name { pub domain: Option, pub path: Path, } +impl TryFrom for Name { + type Error = String; + + fn try_from(value: String) -> Result { + value.parse() + } +} + impl Parse for Name { /// Parse a name, applying the distribution/reference convention for domain /// detection on top of the OCI spec: the segment before the first '/' is @@ -1077,4 +1086,17 @@ mod tests { assert!(name.domain.is_none()); assert_eq!(name.path.to_string(), "myorg/myproject/myimage"); } + + #[test] + fn test_name_deserialize_from_string() { + let name: Name = serde_json::from_str("\"ghcr.io/myorg\"").unwrap(); + assert_eq!(name.domain.unwrap().to_string(), "ghcr.io"); + assert_eq!(name.path.to_string(), "myorg"); + } + + #[test] + fn test_name_deserialize_invalid_string_fails() { + let result: Result = serde_json::from_str("\"bad name with spaces\""); + assert!(result.is_err()); + } } From d5984f672fc26759e4f210f10228ff48127ee6f3 Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Tue, 14 Apr 2026 19:11:35 +0000 Subject: [PATCH 02/10] Unify PathComponent grammar through a const consumer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the nom-combinator `Parse` impl and the separate hand-rolled helpers with a single `const fn consume_path_component` that implements the full grammar (alpha-numeric blocks + [_.]|__|[-]* separators) and returns the number of bytes consumed. Both call sites delegate to it: - `PathComponent::from_static_or_panic` (new) — requires the full input to be consumed, panics otherwise. Usable at compile time for constants. - `impl Parse for PathComponent` — uses the consumed length as the nom match boundary, leaving the remainder for the next combinator. Storage changes from `String` to `Cow<'static, str>` so the const branch can hold a borrowed `&'static str`. Grammar is unchanged; the existing PathComponent doctest (with `_`, `.`, `__`, multi-dash edge cases) still passes through the new wrapper. Added const-context tests for the new constructor covering valid inputs and the three rejection modes (empty, leading separator, trailing orphan separator). --- ociman/src/reference.rs | 153 ++++++++++++++++++++++++++++++++++------ 1 file changed, 132 insertions(+), 21 deletions(-) diff --git a/ociman/src/reference.rs b/ociman/src/reference.rs index 0e4d2128..c0413fc9 100644 --- a/ociman/src/reference.rs +++ b/ociman/src/reference.rs @@ -47,7 +47,7 @@ use nom::{ IResult, Parser, branch::alt, - bytes::complete::{tag, take_while, take_while_m_n, take_while1}, + bytes::complete::{take_while, take_while_m_n, take_while1}, character::complete::{char, digit1, u16}, combinator::{all_consuming, opt, recognize, verify}, multi::{many0, separated_list1}, @@ -508,13 +508,94 @@ impl std::fmt::Display for Domain { /// ); /// ``` #[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct PathComponent(String); +pub struct PathComponent(std::borrow::Cow<'static, str>); + +/// Const-friendly consumer for [`PathComponent`]. +/// +/// Implements the full grammar +/// `path-component := alpha-numeric (separator alpha-numeric)*` +/// where `alpha-numeric := [a-z0-9]+` and +/// `separator := [_.] | __ | [-]*`. +/// +/// Returns the number of bytes consumed from the start of `input`. The +/// callers split: +/// - [`PathComponent::from_static_or_panic`] requires the full input to be +/// consumed (rejects trailing data). +/// - [`PathComponent`]'s `Parse` impl uses the consumed length as the nom +/// match boundary, leaving the remainder for the next combinator. +/// +/// This is the single source of truth for the grammar — both compile-time +/// static literals and runtime nom parsing go through it. +const fn consume_path_component(input: &str) -> Result { + let bytes = input.as_bytes(); + let len = bytes.len(); + + // Initial alpha-numeric block (mandatory, at least one byte). + let mut pos = 0; + while pos < len { + let byte = bytes[pos]; + if !(byte.is_ascii_lowercase() || byte.is_ascii_digit()) { + break; + } + pos += 1; + } + if pos == 0 { + return Err("path component must start with [a-z0-9]"); + } + + // Loop: try to consume (separator + alpha-numeric). If the trailing + // alpha-numeric is missing, rewind the separator and stop. + loop { + let saved = pos; + + // Separator: __ | _ | . | [-]* + if pos + 1 < len && bytes[pos] == b'_' && bytes[pos + 1] == b'_' { + pos += 2; + } else if pos < len && (bytes[pos] == b'_' || bytes[pos] == b'.') { + pos += 1; + } else { + while pos < len && bytes[pos] == b'-' { + pos += 1; + } + } + + // Trailing alpha-numeric (mandatory after a non-empty separator + // attempt; required for the iteration to succeed). + let inner_start = pos; + while pos < len { + let byte = bytes[pos]; + if !(byte.is_ascii_lowercase() || byte.is_ascii_digit()) { + break; + } + pos += 1; + } + + if pos == inner_start { + return Ok(saved); + } + } +} impl PathComponent { #[must_use] pub fn as_str(&self) -> &str { &self.0 } + + /// Create a [`PathComponent`] from a static string at compile time. + /// + /// # Panics + /// + /// Panics if `input` is not a valid path component, or if it contains + /// trailing bytes that cannot be consumed by the grammar. + #[must_use] + pub const fn from_static_or_panic(input: &'static str) -> Self { + match consume_path_component(input) { + Ok(consumed) if consumed == input.len() => Self(std::borrow::Cow::Borrowed(input)), + Ok(_) => panic!("path component has trailing input"), + Err(message) => panic!("{}", message), + } + } } impl AsRef for PathComponent { @@ -523,27 +604,18 @@ impl AsRef for PathComponent { } } -impl PathComponent { - /// Pattern: `[a-z0-9]+` - fn parse_alpha_numeric(input: &str) -> IResult<&str, &str> { - take_while1(|character: char| character.is_ascii_lowercase() || character.is_ascii_digit()) - .parse(input) - } - - /// Pattern: `[_.]|__|[-]*` - fn parse_separator(input: &str) -> IResult<&str, &str> { - alt((tag("__"), tag("_"), tag("."), recognize(many0(char('-'))))).parse(input) - } -} - impl Parse for PathComponent { fn parse(input: &str) -> IResult<&str, Self> { - recognize(pair( - Self::parse_alpha_numeric, - many0(pair(Self::parse_separator, Self::parse_alpha_numeric)), - )) - .map(|string: &str| Self(string.to_string())) - .parse(input) + match consume_path_component(input) { + Ok(consumed) => { + let (matched, rest) = input.split_at(consumed); + Ok((rest, Self(std::borrow::Cow::Owned(matched.to_string())))) + } + Err(_) => Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::TakeWhile1, + ))), + } } } @@ -1099,4 +1171,43 @@ mod tests { let result: Result = serde_json::from_str("\"bad name with spaces\""); assert!(result.is_err()); } + + #[test] + fn test_path_component_from_static_or_panic_const_context() { + // Exercise the const constructor in a const context — this would + // fail at compile time if the grammar rejected the input. + const PLAIN: PathComponent = PathComponent::from_static_or_panic("alpine"); + const WITH_DASH: PathComponent = PathComponent::from_static_or_panic("pg-ephemeral"); + const WITH_UNDERSCORE: PathComponent = PathComponent::from_static_or_panic("foo_bar"); + const WITH_DOT: PathComponent = PathComponent::from_static_or_panic("foo.bar"); + const WITH_DOUBLE_UNDERSCORE: PathComponent = + PathComponent::from_static_or_panic("foo__bar"); + + assert_eq!(PLAIN.as_str(), "alpine"); + assert_eq!(WITH_DASH.as_str(), "pg-ephemeral"); + assert_eq!(WITH_UNDERSCORE.as_str(), "foo_bar"); + assert_eq!(WITH_DOT.as_str(), "foo.bar"); + assert_eq!(WITH_DOUBLE_UNDERSCORE.as_str(), "foo__bar"); + } + + #[test] + #[should_panic(expected = "path component must start with [a-z0-9]")] + fn test_path_component_from_static_or_panic_rejects_leading_dash() { + let _ = PathComponent::from_static_or_panic("-alpine"); + } + + #[test] + #[should_panic(expected = "path component has trailing input")] + fn test_path_component_from_static_or_panic_rejects_trailing_dash() { + // "foo-" consumes as "foo" (grammar rewinds the orphan dash), + // leaving a non-zero trailing remainder that `from_static_or_panic` + // refuses. + let _ = PathComponent::from_static_or_panic("foo-"); + } + + #[test] + #[should_panic(expected = "path component must start with [a-z0-9]")] + fn test_path_component_from_static_or_panic_rejects_empty() { + let _ = PathComponent::from_static_or_panic(""); + } } From 961f1bbd901a13e7035c3d5df2fef357756ce20b Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Tue, 14 Apr 2026 20:04:37 +0000 Subject: [PATCH 03/10] Refactor Path to non-empty (first, rest) representation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Path is defined by the OCI grammar as one-or-more path components. The previous `Path(Vec)` storage made non-emptiness an invariant that constructors had to enforce at runtime, which leaked `Option` / `expect("non-empty")` into callers that want to build a Path programmatically. Change storage to `Path { first, rest }` so non-emptiness is encoded in the type. The nom Parse impl now uses `pair(first, many0(separated by /))` which naturally produces the shape. Public API: - `Path::iter()` — iterate all components starting with first - `Path::extended(component)` — append a component, returning a new non-empty Path - `impl From for Path` — build a single-component Path `components()` is removed; callers use `iter()` instead. The Path doctest is updated accordingly. --- ociman/src/reference.rs | 43 ++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/ociman/src/reference.rs b/ociman/src/reference.rs index c0413fc9..240dd00a 100644 --- a/ociman/src/reference.rs +++ b/ociman/src/reference.rs @@ -629,6 +629,9 @@ impl std::fmt::Display for PathComponent { /// /// Pattern: `path-component ['/' path-component]*` /// +/// Non-emptiness is encoded in the type: a [`Path`] is `(first, rest)`, where +/// `first` is always present and `rest` may be empty. +/// /// # Examples /// /// ``` @@ -636,11 +639,11 @@ impl std::fmt::Display for PathComponent { /// /// let path: Path = "library/alpine".parse().unwrap(); /// assert_eq!(path.to_string(), "library/alpine"); -/// assert_eq!(path.components().len(), 2); +/// assert_eq!(path.iter().count(), 2); /// /// let path: Path = "owner/repo/subpath".parse().unwrap(); /// assert_eq!(path.to_string(), "owner/repo/subpath"); -/// assert_eq!(path.components().len(), 3); +/// assert_eq!(path.iter().count(), 3); /// /// // Rejects empty input /// assert_eq!( @@ -661,26 +664,48 @@ impl std::fmt::Display for PathComponent { /// ); /// ``` #[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct Path(Vec); +pub struct Path { + first: PathComponent, + rest: Vec, +} impl Path { + /// Iterate over all components, starting with the first. + pub fn iter(&self) -> impl Iterator { + std::iter::once(&self.first).chain(self.rest.iter()) + } + + /// Append a component to the path. #[must_use] - pub fn components(&self) -> &[PathComponent] { - &self.0 + pub fn extended(mut self, component: PathComponent) -> Self { + self.rest.push(component); + self + } +} + +impl From for Path { + fn from(first: PathComponent) -> Self { + Self { + first, + rest: Vec::new(), + } } } impl Parse for Path { fn parse(input: &str) -> IResult<&str, Self> { - separated_list1(char('/'), PathComponent::parse) - .map(Self) - .parse(input) + pair( + PathComponent::parse, + many0(preceded(char('/'), PathComponent::parse)), + ) + .map(|(first, rest)| Self { first, rest }) + .parse(input) } } impl std::fmt::Display for Path { fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write_interspersed(formatter, &self.0, "/") + write_interspersed(formatter, self.iter().map(PathComponent::as_str), "/") } } From ee7163fac6705d5fa701fe51ac58cec9d56df23b Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Tue, 14 Apr 2026 20:41:57 +0000 Subject: [PATCH 04/10] Make Backend::pull_image fallible with PullError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously `pull_image` panicked on any failure via `.unwrap()`. The upcoming cache-walk-back flow needs to distinguish "the registry does not have this tag" from "something else went wrong" so it can fall through to an older cache stage without silently swallowing auth or network errors. Introduce `PullError::{NotFound, Other}` and a pure `classify_pull_result` helper that maps (exit status, stderr bytes) to the variant. NotFound is detected via the OCI distribution-spec `MANIFEST_UNKNOWN` error code as rendered on stderr by docker, podman, and skopeo — the substring `"manifest unknown"`. The const comment documents why this is load-bearing string matching and why every better-looking alternative (engine sockets, CLI --json flags, manifest-inspect, direct registry HTTP) was rejected. `pull_image_if_absent` now propagates the Result. The three existing callers (two in pg-ephemeral/tests/base.rs, two in ociman/tests/integration.rs, one in the ociman internal backend::tests module) all `.unwrap()` — they want must-succeed semantics. Reference fields in PullError are boxed to stay under clippy::result-large-err's 128-byte threshold. Unit tests cover the classifier directly with canned stderr strings for the four interesting cases: success, podman not-found, docker not-found, auth failure, network error. No network, no daemon, no registry required. --- ociman/src/backend.rs | 174 ++++++++++++++++++++++++++++++++++-- ociman/tests/integration.rs | 4 +- pg-ephemeral/tests/base.rs | 4 +- 3 files changed, 169 insertions(+), 13 deletions(-) diff --git a/ociman/src/backend.rs b/ociman/src/backend.rs index d5f37474..001a99c7 100644 --- a/ociman/src/backend.rs +++ b/ociman/src/backend.rs @@ -1,5 +1,90 @@ use cmd_proc::*; +/// Substring used to detect the OCI distribution-spec `MANIFEST_UNKNOWN` +/// error code as rendered on stderr by docker, podman, and skopeo when a +/// registry reports that a tag does not exist. +/// +/// **This is load-bearing string matching and we do not like it.** We fall +/// back on it because there is no better option available today: +/// +/// - Neither `docker pull` nor `podman pull` has a `--json` / `--format` +/// flag. The CLIs only expose human-readable stderr. +/// - Exit codes are useless: both tools return `1` (or `125` for podman) +/// for every failure mode — not-found, auth, network, tls — without +/// discrimination. +/// - The docker/podman engine REST APIs do stream NDJSON, but the error +/// `message` / `errorDetail.message` fields contain the same human +/// string (`... manifest unknown`) — the daemons do not surface the +/// registry's structured error code — so switching to the socket would +/// just move the substring match from stderr to a JSON field, at the +/// cost of a ~400KB HTTP client dependency. No actual signal gain. +/// - `docker manifest inspect` still requires `experimental: enabled` as +/// of Docker 28, and `podman manifest inspect` is local-only. `skopeo +/// inspect` is clean but is a separate binary not always installed +/// (Docker Desktop ships without it). +/// - The only path to a clean spec-defined signal is talking to the +/// registry HTTP API directly, which means reimplementing bearer-token +/// auth, cred-helper integration, and auth challenges — substantial +/// work for a library that otherwise just shells out to the CLI. +/// +/// The OCI Distribution Spec v1.1.0 defines the `MANIFEST_UNKNOWN` error +/// code (code-7) that registries MUST return when a manifest is absent: +/// +/// +/// **However the spec does not mandate this stderr string.** The spec only +/// mandates the uppercase `code` field in the registry's JSON response; +/// the human-readable `message` field is OPTIONAL and its content is +/// unspecified. The lowercase `"manifest unknown"` substring this constant +/// matches is a de-facto convention that docker, podman, and skopeo all +/// happen to use when rendering the error to stderr. If a future CLI +/// version changes its wording, this constant must be updated and a +/// corresponding test will break. +const MANIFEST_UNKNOWN_STDERR_SIGNAL: &str = "manifest unknown"; + +#[derive(Debug, thiserror::Error)] +pub enum PullError { + // The `reference` field is boxed because `image::Reference` is ~176 + // bytes (Name + Vec of PathComponents + Tag + Digest), and + // `clippy::result-large-err` trips on anything >128 bytes inside a + // `Result::Err`. + #[error("image not found in registry: {reference}")] + NotFound { + reference: Box, + }, + #[error("pull failed for {reference}: {message}")] + Other { + reference: Box, + message: String, + }, +} + +/// Turn a pull subprocess exit status + captured stderr into a +/// [`PullError`] (or [`Ok`] on success). +/// +/// Split out from [`Backend::pull_image`] so it can be unit-tested with +/// canned stderr bytes — no network, no daemon, no registry. +fn classify_pull_result( + reference: &crate::image::Reference, + success: bool, + stderr: &[u8], +) -> Result<(), PullError> { + if success { + return Ok(()); + } + + let stderr = String::from_utf8_lossy(stderr); + if stderr.contains(MANIFEST_UNKNOWN_STDERR_SIGNAL) { + Err(PullError::NotFound { + reference: Box::new(reference.clone()), + }) + } else { + Err(PullError::Other { + reference: Box::new(reference.clone()), + message: stderr.trim().to_string(), + }) + } +} + #[derive(Copy, Clone, Debug, Eq, PartialEq, serde::Deserialize, clap::ValueEnum)] #[serde(rename_all = "snake_case")] pub enum Selection { @@ -73,19 +158,33 @@ impl Backend { .unwrap(); } - /// Pull an image from a registry - pub async fn pull_image(&self, reference: &crate::image::Reference) { - self.command() + /// Pull an image from a registry. + /// + /// Stdout streams to the parent so users see layer progress. Stderr is + /// captured and parsed to distinguish [`PullError::NotFound`] (registry + /// reports `manifest unknown`) from other failures. + pub async fn pull_image(&self, reference: &crate::image::Reference) -> Result<(), PullError> { + let output = self + .command() .arguments(["pull", &reference.to_string()]) - .status() + .stderr_capture() + .accept_nonzero_exit() + .run() .await .unwrap(); + + classify_pull_result(reference, output.status.success(), &output.bytes) } - /// Pull an image only if it's not already present - pub async fn pull_image_if_absent(&self, reference: &crate::image::Reference) { - if !self.is_image_present(reference).await { - self.pull_image(reference).await; + /// Pull an image only if it's not already present locally. + pub async fn pull_image_if_absent( + &self, + reference: &crate::image::Reference, + ) -> Result<(), PullError> { + if self.is_image_present(reference).await { + Ok(()) + } else { + self.pull_image(reference).await } } @@ -539,6 +638,63 @@ pub mod resolve { mod tests { use super::*; + fn pull_test_reference() -> crate::image::Reference { + "ghcr.io/myorg/pg-ephemeral/main:abc123".parse().unwrap() + } + + #[test] + fn test_classify_pull_result_success() { + let reference = pull_test_reference(); + assert!(classify_pull_result(&reference, true, b"").is_ok()); + } + + #[test] + fn test_classify_pull_result_not_found_podman() { + let reference = pull_test_reference(); + // Representative podman stderr for a non-existent tag. + let stderr = b"Error: initializing source docker://ghcr.io/myorg/pg-ephemeral/main:abc123: reading manifest abc123 in ghcr.io/myorg/pg-ephemeral/main: manifest unknown"; + match classify_pull_result(&reference, false, stderr) { + Err(PullError::NotFound { reference: r }) => assert_eq!(*r, reference), + other => panic!("expected PullError::NotFound, got {other:?}"), + } + } + + #[test] + fn test_classify_pull_result_not_found_docker() { + let reference = pull_test_reference(); + // Representative docker stderr for a non-existent tag. + let stderr = b"Error response from daemon: manifest for ghcr.io/myorg/pg-ephemeral/main:abc123 not found: manifest unknown: manifest unknown"; + match classify_pull_result(&reference, false, stderr) { + Err(PullError::NotFound { reference: r }) => assert_eq!(*r, reference), + other => panic!("expected PullError::NotFound, got {other:?}"), + } + } + + #[test] + fn test_classify_pull_result_auth_failure_is_other() { + let reference = pull_test_reference(); + // Auth failure must NOT be misclassified as NotFound. + let stderr = b"Error response from daemon: pull access denied for ghcr.io/myorg/pg-ephemeral/main, repository does not exist or may require 'docker login': denied: requested access to the resource is denied"; + match classify_pull_result(&reference, false, stderr) { + Err(PullError::Other { + reference: r, + message, + }) => { + assert_eq!(*r, reference); + assert!(message.contains("denied")); + } + other => panic!("expected PullError::Other, got {other:?}"), + } + } + + #[test] + fn test_classify_pull_result_network_error_is_other() { + let reference = pull_test_reference(); + let stderr = b"Error response from daemon: Get https://ghcr.io/v2/: dial tcp: lookup ghcr.io: no such host"; + let result = classify_pull_result(&reference, false, stderr); + assert!(matches!(result, Err(PullError::Other { .. }))); + } + #[tokio::test] async fn test_container_resolver_localhost() { let backend = crate::test_backend_setup!(); @@ -674,7 +830,7 @@ mod tests { // Create test images by tagging alpine let source = crate::testing::ALPINE_LATEST_IMAGE.clone(); - backend.pull_image_if_absent(&source).await; + backend.pull_image_if_absent(&source).await.unwrap(); let target_a: crate::image::Reference = "localhost/ociman-test/image-references-by-name:a" .parse() diff --git a/ociman/tests/integration.rs b/ociman/tests/integration.rs index bf03fbc6..77c895ba 100644 --- a/ociman/tests/integration.rs +++ b/ociman/tests/integration.rs @@ -267,7 +267,7 @@ async fn test_image_tag() { let target: ociman::image::Reference = ociman::testing::test_reference("ociman-test-tagged:latest"); - backend.pull_image_if_absent(&source).await; + backend.pull_image_if_absent(&source).await.unwrap(); assert!(!backend.is_image_present(&target).await); @@ -285,7 +285,7 @@ async fn test_image_pull_if_absent() { let reference = ociman::testing::ALPINE_LATEST_IMAGE.clone(); - backend.pull_image_if_absent(&reference).await; + backend.pull_image_if_absent(&reference).await.unwrap(); assert!(backend.is_image_present(&reference).await); } diff --git a/pg-ephemeral/tests/base.rs b/pg-ephemeral/tests/base.rs index f73709d3..1657dc20 100644 --- a/pg-ephemeral/tests/base.rs +++ b/pg-ephemeral/tests/base.rs @@ -5,7 +5,7 @@ async fn pull_test_images() { let backend = ociman::test_backend_setup!(); let default_image: ociman::image::Reference = (&pg_ephemeral::Image::default()).into(); - backend.pull_image(&default_image).await; + backend.pull_image(&default_image).await.unwrap(); for image in [ &*common::POSTGRES_IMAGE, @@ -13,7 +13,7 @@ async fn pull_test_images() { &*common::NODE_IMAGE, &*ociman::testing::ALPINE_LATEST_IMAGE, ] { - backend.pull_image(image).await; + backend.pull_image(image).await.unwrap(); } } From ef142b07e5fc0ee0755402e32b6b17f7d33a21a8 Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Tue, 14 Apr 2026 20:47:14 +0000 Subject: [PATCH 05/10] Add cache_registry config option to pg-ephemeral MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows configuring an OCI registry prefix (e.g. ghcr.io/myorg) that is applied to every cache image reference the tool builds. Does not affect the cache key hash — the hex suffix is content-derived only — so two machines pointed at different registries still agree on what content hashes to and can share a remote cache if they point at the same one. Plumbed through the usual defaults/instance/overrides precedence: - Top-level Config default - Per-instance InstanceDefinition override - CLI `--cache-registry` flag override - Threaded into Instance and Definition - Delivered to CacheStatus::from_cache_key via LoadedSeeds::load Cache references are built programmatically using the new Path API (`Path::from(component).extended(...)`) and the `PG_EPHEMERAL_COMPONENT` const (via `PathComponent::from_static_or_panic`). No string concatenation + re-parse round-trip. CacheKey changes from `[u8; 32]` to `sha2::digest::Output` so the existing `impl From> for Tag` in ociman can produce the tag directly. Integration test verifies both halves of the contract — that the registry prefix is applied, and that the hex hash is identical with and without the registry set. --- pg-ephemeral/src/cli.rs | 8 +++++ pg-ephemeral/src/config.rs | 16 +++++++++ pg-ephemeral/src/definition.rs | 3 ++ pg-ephemeral/src/seed.rs | 61 +++++++++++++++++++++++++++++----- pg-ephemeral/tests/base.rs | 10 ++++++ pg-ephemeral/tests/cache.rs | 47 ++++++++++++++++++++++++++ 6 files changed, 137 insertions(+), 8 deletions(-) diff --git a/pg-ephemeral/src/cli.rs b/pg-ephemeral/src/cli.rs index abf04988..b39f6a79 100644 --- a/pg-ephemeral/src/cli.rs +++ b/pg-ephemeral/src/cli.rs @@ -54,6 +54,13 @@ pub struct App { /// If the autodetection fails exits with an error. #[arg(long)] backend: Option, + /// Overwrite cache registry + /// + /// When set, all cache image references are prefixed with this registry + /// name (e.g. `ghcr.io/myorg`), enabling push/pull against a remote + /// registry. Does not affect cache key hashing. + #[arg(long)] + cache_registry: Option, /// Overwrite image #[arg(long)] image: Option, @@ -68,6 +75,7 @@ impl App { pub async fn run(&self) -> Result<(), Error> { let overwrites = crate::config::InstanceDefinition { backend: self.backend, + cache_registry: self.cache_registry.clone(), image: self.image.clone(), seeds: indexmap::IndexMap::new(), ssl_config: self diff --git a/pg-ephemeral/src/config.rs b/pg-ephemeral/src/config.rs index a2e6acf1..61f13686 100644 --- a/pg-ephemeral/src/config.rs +++ b/pg-ephemeral/src/config.rs @@ -7,6 +7,7 @@ use crate::seed::{Command, CommandCacheConfig, Seed, SeedName}; pub struct Instance { pub application_name: Option, pub backend: ociman::backend::Selection, + pub cache_registry: Option, pub database: pg_client::Database, pub seeds: indexmap::IndexMap, pub ssl_config: Option, @@ -22,6 +23,7 @@ impl Instance { Self { backend, application_name: None, + cache_registry: None, seeds: indexmap::IndexMap::new(), ssl_config: None, superuser: pg_client::User::POSTGRES, @@ -40,6 +42,7 @@ impl Instance { instance_name: instance_name.clone(), application_name: self.application_name.clone(), backend: self.backend.resolve().await?, + cache_registry: self.cache_registry.clone(), database: self.database.clone(), seeds: self.seeds.clone(), ssl_config: self.ssl_config.clone(), @@ -148,6 +151,7 @@ pub struct SslConfigDefinition { #[serde(deny_unknown_fields)] pub struct InstanceDefinition { pub backend: Option, + pub cache_registry: Option, pub image: Option, #[serde(default)] pub seeds: indexmap::IndexMap, @@ -161,6 +165,7 @@ impl InstanceDefinition { pub fn empty() -> Self { Self { backend: None, + cache_registry: None, image: None, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -195,6 +200,13 @@ impl InstanceDefinition { .or(defaults.backend) .unwrap_or(ociman::backend::Selection::Auto); + let cache_registry = overwrites + .cache_registry + .as_ref() + .or(self.cache_registry.as_ref()) + .or(defaults.cache_registry.as_ref()) + .cloned(); + let seeds = self .seeds .into_iter() @@ -219,6 +231,7 @@ impl InstanceDefinition { Ok(Instance { application_name: None, backend, + cache_registry, database: pg_client::Database::POSTGRES, seeds, ssl_config, @@ -235,6 +248,7 @@ impl InstanceDefinition { pub struct Config { image: Option, backend: Option, + cache_registry: Option, ssl_config: Option, #[serde(default, with = "humantime_serde")] wait_available_timeout: Option, @@ -246,6 +260,7 @@ impl std::default::Default for Config { Self { image: Some(Image::default()), backend: None, + cache_registry: None, ssl_config: None, wait_available_timeout: None, instances: None, @@ -331,6 +346,7 @@ impl Config { ) -> Result { let defaults = InstanceDefinition { backend: self.backend, + cache_registry: self.cache_registry.clone(), image: self.image.clone(), seeds: indexmap::IndexMap::new(), ssl_config: self.ssl_config.clone(), diff --git a/pg-ephemeral/src/definition.rs b/pg-ephemeral/src/definition.rs index e99aa1c3..e1e1e361 100644 --- a/pg-ephemeral/src/definition.rs +++ b/pg-ephemeral/src/definition.rs @@ -27,6 +27,7 @@ pub struct Definition { pub instance_name: crate::InstanceName, pub application_name: Option, pub backend: ociman::Backend, + pub cache_registry: Option, pub database: pg_client::Database, pub seeds: indexmap::IndexMap, pub ssl_config: Option, @@ -48,6 +49,7 @@ impl Definition { instance_name, backend, application_name: None, + cache_registry: None, seeds: indexmap::IndexMap::new(), ssl_config: None, superuser: pg_client::User::POSTGRES, @@ -98,6 +100,7 @@ impl Definition { &self.seeds, &self.backend, instance_name, + self.cache_registry.as_ref(), ) .await } diff --git a/pg-ephemeral/src/seed.rs b/pg-ephemeral/src/seed.rs index c73ed85b..19fe3822 100644 --- a/pg-ephemeral/src/seed.rs +++ b/pg-ephemeral/src/seed.rs @@ -1,6 +1,40 @@ use git_proc::Build; -type CacheKey = [u8; 32]; +type CacheKey = sha2::digest::Output; + +const PG_EPHEMERAL_COMPONENT: ociman::reference::PathComponent = + ociman::reference::PathComponent::from_static_or_panic("pg-ephemeral"); + +fn build_cache_reference( + cache_registry: Option<&ociman::reference::Name>, + instance_name: &crate::InstanceName, + cache_key: CacheKey, +) -> ociman::Reference { + let instance_component: ociman::reference::PathComponent = + instance_name.as_str().parse().unwrap(); + + let (domain, path) = match cache_registry { + Some(registry) => ( + registry.domain.clone(), + registry + .path + .clone() + .extended(PG_EPHEMERAL_COMPONENT.clone()) + .extended(instance_component), + ), + None => ( + None, + ociman::reference::Path::from(PG_EPHEMERAL_COMPONENT.clone()) + .extended(instance_component), + ), + }; + + ociman::Reference { + name: ociman::reference::Name { domain, path }, + tag: Some(cache_key.into()), + digest: None, + } +} #[derive(Clone, Debug, PartialEq)] pub enum CacheStatus { @@ -14,12 +48,11 @@ impl CacheStatus { cache_key: Option, backend: &ociman::Backend, instance_name: &crate::InstanceName, + cache_registry: Option<&ociman::reference::Name>, ) -> Self { match cache_key { Some(key) => { - let reference = format!("pg-ephemeral/{}:{}", instance_name, hex::encode(key)) - .parse() - .unwrap(); + let reference = build_cache_reference(cache_registry, instance_name, key); if backend.is_image_present(&reference).await { Self::Hit { reference } } else { @@ -259,6 +292,7 @@ impl Seed { hash_chain: &mut HashChain, backend: &ociman::Backend, instance_name: &crate::InstanceName, + cache_registry: Option<&ociman::reference::Name>, ) -> Result { match self { Seed::SqlFile { path } => { @@ -276,6 +310,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, name, @@ -316,6 +351,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, name, @@ -422,6 +458,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, cache_key_output, @@ -437,6 +474,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, name, @@ -451,6 +489,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, name, @@ -478,6 +517,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, name, @@ -620,9 +660,7 @@ impl HashChain { fn cache_key(&self) -> Option { use sha2::Digest; - self.hasher - .as_ref() - .map(|hasher| hasher.clone().finalize().into()) + self.hasher.as_ref().map(|hasher| hasher.clone().finalize()) } fn stop(&mut self) { @@ -643,6 +681,7 @@ impl<'a> LoadedSeeds<'a> { seeds: &indexmap::IndexMap, backend: &ociman::Backend, instance_name: &crate::InstanceName, + cache_registry: Option<&ociman::reference::Name>, ) -> Result { let mut hash_chain = HashChain::new(); let mut loaded_seeds = Vec::new(); @@ -662,7 +701,13 @@ impl<'a> LoadedSeeds<'a> { for (name, seed) in seeds { let loaded_seed = seed - .load(name.clone(), &mut hash_chain, backend, instance_name) + .load( + name.clone(), + &mut hash_chain, + backend, + instance_name, + cache_registry, + ) .await?; loaded_seeds.push(loaded_seed); } diff --git a/pg-ephemeral/tests/base.rs b/pg-ephemeral/tests/base.rs index 1657dc20..f517531b 100644 --- a/pg-ephemeral/tests/base.rs +++ b/pg-ephemeral/tests/base.rs @@ -69,6 +69,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -83,6 +84,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Podman, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -107,6 +109,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -121,6 +124,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -135,6 +139,7 @@ fn test_config_file() { "tests/database.toml", &pg_ephemeral::config::InstanceDefinition { backend: Some(ociman::backend::Selection::Docker), + cache_registry: None, image: Some("18.0".parse().unwrap()), seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -153,6 +158,7 @@ fn test_config_file_no_explicit_instance() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -175,6 +181,7 @@ fn test_config_file_no_explicit_instance() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Podman, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -188,6 +195,7 @@ fn test_config_file_no_explicit_instance() { "tests/database_no_explicit_instance.toml", &pg_ephemeral::config::InstanceDefinition { backend: Some(ociman::backend::Selection::Podman), + cache_registry: None, image: Some("18.0".parse().unwrap()), seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -218,6 +226,7 @@ fn test_config_ssl() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: Some(pg_ephemeral::definition::SslConfig::Generated { @@ -576,6 +585,7 @@ fn test_config_image_with_sha256_digest() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, diff --git a/pg-ephemeral/tests/cache.rs b/pg-ephemeral/tests/cache.rs index b6b31e85..1b94e436 100644 --- a/pg-ephemeral/tests/cache.rs +++ b/pg-ephemeral/tests/cache.rs @@ -303,6 +303,53 @@ async fn test_cache_status_change_with_image() { assert_ne!(stdout2, stdout1); } +#[tokio::test] +async fn test_cache_registry_prefixes_reference_without_changing_hash() { + let _backend = ociman::test_backend_setup!(); + let dir = TestDir::new("cache-registry-test"); + + dir.write_file("schema.sql", "CREATE TABLE users (id INTEGER PRIMARY KEY);"); + + // Baseline: no cache_registry. + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + let baseline = run_pg_ephemeral(&["cache", "status", "--json"], &dir.path).await; + let baseline: serde_json::Value = serde_json::from_str(&baseline).unwrap(); + let baseline_reference = baseline["seeds"][0]["reference"].as_str().unwrap(); + assert!(baseline_reference.starts_with("pg-ephemeral/main:")); + + // Same config plus cache_registry. + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + cache_registry = "ghcr.io/mbj" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + let prefixed = run_pg_ephemeral(&["cache", "status", "--json"], &dir.path).await; + let prefixed: serde_json::Value = serde_json::from_str(&prefixed).unwrap(); + let prefixed_reference = prefixed["seeds"][0]["reference"].as_str().unwrap(); + + // The prefixed reference should be the baseline reference with the registry prepended, + // proving (a) the registry prefix is applied and (b) the hash is unaffected. + assert_eq!( + prefixed_reference, + format!("ghcr.io/mbj/{baseline_reference}") + ); +} + #[tokio::test] async fn test_cache_status_chain_propagates() { let _backend = ociman::test_backend_setup!(); From 95301c5dc31e01a7c2b8e53f5fec3cfac3a47afc Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Tue, 14 Apr 2026 21:01:49 +0000 Subject: [PATCH 06/10] Make Backend::push_image fallible with PushError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror of the pull_image change. Previously push_image panicked on any failure via `.unwrap()`. The upcoming `cache push` subcommand needs to surface failures per-stage so the user can see which cache images failed to upload without the process just dying mid-chain. Unlike PullError, there's no useful sub-discrimination on push — every failure (auth, network, rate limit, missing local image) collapses into "the upload didn't happen", so PushError is a single struct with the reference + stderr-derived message. Reference is boxed for the same result_large_err reason as PullError's variants. The classifier is extracted as a pure `classify_push_result` function and unit-tested with canned stderr, same pattern as pull. No external callers of push_image exist yet, so no caller updates. --- ociman/src/backend.rs | 70 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/ociman/src/backend.rs b/ociman/src/backend.rs index 001a99c7..8ca9b003 100644 --- a/ociman/src/backend.rs +++ b/ociman/src/backend.rs @@ -85,6 +85,35 @@ fn classify_pull_result( } } +#[derive(Debug, thiserror::Error)] +#[error("push failed for {reference}: {message}")] +pub struct PushError { + // Boxed for the same reason as `PullError`'s reference fields — see + // that type's comment. + pub reference: Box, + pub message: String, +} + +/// Turn a push subprocess exit status + captured stderr into a +/// [`PushError`] (or [`Ok`] on success). +/// +/// Split out from [`Backend::push_image`] for the same reason as +/// [`classify_pull_result`] — unit-testable without a network or daemon. +fn classify_push_result( + reference: &crate::image::Reference, + success: bool, + stderr: &[u8], +) -> Result<(), PushError> { + if success { + return Ok(()); + } + + Err(PushError { + reference: Box::new(reference.clone()), + message: String::from_utf8_lossy(stderr).trim().to_string(), + }) +} + #[derive(Copy, Clone, Debug, Eq, PartialEq, serde::Deserialize, clap::ValueEnum)] #[serde(rename_all = "snake_case")] pub enum Selection { @@ -188,13 +217,24 @@ impl Backend { } } - /// Push an image to a registry - pub async fn push_image(&self, reference: &crate::image::Reference) { - self.command() + /// Push an image to a registry. + /// + /// Stdout streams to the parent so users see upload progress. Stderr + /// is captured and surfaced as [`PushError`] on non-zero exit. Unlike + /// pull, there's no useful sub-discrimination here: every push failure + /// (auth, network, rate limit, missing local image) collapses into the + /// same "it didn't upload" outcome as far as callers are concerned. + pub async fn push_image(&self, reference: &crate::image::Reference) -> Result<(), PushError> { + let output = self + .command() .arguments(["push", &reference.to_string()]) - .status() + .stderr_capture() + .accept_nonzero_exit() + .run() .await .unwrap(); + + classify_push_result(reference, output.status.success(), &output.bytes) } pub async fn remove_image(&self, reference: &crate::image::Reference) { @@ -695,6 +735,28 @@ mod tests { assert!(matches!(result, Err(PullError::Other { .. }))); } + #[test] + fn test_classify_push_result_success() { + let reference = pull_test_reference(); + assert!(classify_push_result(&reference, true, b"").is_ok()); + } + + #[test] + fn test_classify_push_result_failure_captures_stderr() { + let reference = pull_test_reference(); + let stderr = b"unauthorized: authentication required\n"; + match classify_push_result(&reference, false, stderr) { + Err(PushError { + reference: r, + message, + }) => { + assert_eq!(*r, reference); + assert_eq!(message, "unauthorized: authentication required"); + } + Ok(()) => panic!("expected PushError"), + } + } + #[tokio::test] async fn test_container_resolver_localhost() { let backend = crate::test_backend_setup!(); From 06ef60ac92ea77e165e5a52e7760d14175cf491a Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Tue, 14 Apr 2026 21:08:27 +0000 Subject: [PATCH 07/10] Document cache_registry in CHANGELOG and README --- pg-ephemeral/CHANGELOG.md | 8 +++++ pg-ephemeral/README.md | 67 ++++++++++++++++++++++++++++++++------- 2 files changed, 63 insertions(+), 12 deletions(-) diff --git a/pg-ephemeral/CHANGELOG.md b/pg-ephemeral/CHANGELOG.md index 967fb560..eba6970a 100644 --- a/pg-ephemeral/CHANGELOG.md +++ b/pg-ephemeral/CHANGELOG.md @@ -2,6 +2,14 @@ ## Unreleased +### Added + +- `cache_registry` config field and `--cache-registry` CLI flag. When set, + all cache image references are prefixed with the given OCI registry name + (e.g. `ghcr.io/myorg`), so cache images can be pushed and pulled to share + warm cache state across machines. The cache key hash is unaffected — + different registries do not fragment the cache. + ### Fixed - Suppress spurious `PID N is not a PostgreSQL backend process` warnings when diff --git a/pg-ephemeral/README.md b/pg-ephemeral/README.md index 83e87d4f..07381f07 100644 --- a/pg-ephemeral/README.md +++ b/pg-ephemeral/README.md @@ -50,12 +50,13 @@ cache = { type = "none" } ### Top-level fields -| Field | Description | -|--------------------------|----------------------------------------------------------------------| -| `image` | PostgreSQL version / image tag (e.g. `"17.1"`) | -| `backend` | `"docker"`, `"podman"`, or omit for auto-detection (see below) | -| `ssl_config` | SSL configuration with `hostname` field | -| `wait_available_timeout` | How long to wait for PostgreSQL to accept connections (e.g. `"30s"`) | +| Field | Description | +|--------------------------|-----------------------------------------------------------------------------| +| `image` | PostgreSQL version / image tag (e.g. `"17.1"`) | +| `backend` | `"docker"`, `"podman"`, or omit for auto-detection (see below) | +| `cache_registry` | OCI registry prefix for cache images (e.g. `"ghcr.io/myorg"`). See [Sharing cache across machines](#sharing-cache-across-machines). | +| `ssl_config` | SSL configuration with `hostname` field | +| `wait_available_timeout` | How long to wait for PostgreSQL to accept connections (e.g. `"30s"`) | ### Backend selection @@ -170,6 +171,47 @@ For `command` type seeds, the `cache` field controls how the cache key is comput | `{ type = "key-script", script = "..." }` | Run a script whose stdout is hashed as the cache key. | | `{ type = "none" }` | Disable caching. Breaks the cache chain for this and all subsequent seeds. | +### Sharing cache across machines + +By default, cache images are named `pg-ephemeral/:` and live +only in the local Docker/Podman image store. Set `cache_registry` to a remote +OCI registry prefix and every cache image gains that prefix, so references +become push/pullable addresses in a registry you can share across machines +(CI runners, developer laptops, production build hosts): + +```toml +image = "17.1" +cache_registry = "ghcr.io/myorg" + +[instances.main.seeds.schema] +type = "sql-file" +path = "schema.sql" +``` + +The `cache_registry` value can be any valid OCI registry name — just a host +(`ghcr.io`), a host plus namespace (`ghcr.io/myorg`), or a private registry +(`registry.example.com:5000/team`). `pg-ephemeral cache status` will now +report references like `ghcr.io/myorg/pg-ephemeral/main:`. + +**The cache key hash is not affected by `cache_registry`.** Two machines +pointed at different registries still compute the same hex for the same +content, and switching a project from no registry to a registry (or between +registries) does not invalidate any existing cache. + +> **Note:** pg-ephemeral does not yet ship a native `cache push` / `cache +> pull` subcommand. For now, use your container CLI directly once the cache +> is populated — the references printed by `pg-ephemeral cache status` are +> valid `docker push` / `docker pull` / `podman push` / `podman pull` +> arguments. Registry authentication is handled entirely by the underlying +> tool (`docker login`, `podman login`, or cred-helper integration). + +You can override the registry on a single invocation with `--cache-registry` +without editing `database.toml`: + +```sh +pg-ephemeral --cache-registry ghcr.io/myorg cache status --json +``` + ## Rust Library pg-ephemeral can be used as a Rust library for integration tests or any code that needs @@ -398,12 +440,13 @@ Commands: platform Platform support checks Options: - --config-file Config file path (default: database.toml) - --no-config-file Use defaults, ignore any config file - --backend Override backend (docker, podman) - --image Override PostgreSQL image - --ssl-hostname Enable SSL with the specified hostname - --instance Target instance (default: main) + --config-file Config file path (default: database.toml) + --no-config-file Use defaults, ignore any config file + --backend Override backend (docker, podman) + --cache-registry Override cache_registry from config (e.g. ghcr.io/myorg) + --image Override PostgreSQL image + --ssl-hostname Enable SSL with the specified hostname + --instance Target instance (default: main) ``` ## How it compares to testcontainers From 6ea00f0283a3855c77b8b0e1ee97ccd1d5fa8b2d Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Tue, 14 Apr 2026 23:13:47 +0000 Subject: [PATCH 08/10] Add pg-ephemeral cache pull and cache push subcommands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consumes the fallible pull_image/push_image primitives and the cache_registry config option to move cache images between the local store and a remote OCI registry. `cache pull` walks the seed chain from tip backwards. For each stage: - Uncacheable → skip, keep walking. - Hit (already local) → return, nothing to pull. - Miss → try backend.pull_image. On success return; on PullError::NotFound walk to the next older stage; on PullError::Other abort. This gives "newest cached stage the registry has" in a single network round-trip in the happy case, and gracefully degrades through colder cache states on incremental miss. `cache push` iterates forward and pushes every stage whose local cache status is Hit. Miss and Uncacheable stages are skipped. Aborts on the first push failure (typical CI shape is `cache pull && cache populate && cache push`, and you want to see push failures loudly). Both commands error with a clear message if `cache_registry` is not configured — there's nothing to pull from or push to. Also switches the binary's error output from the derived `Debug` form (e.g. "Error: CacheSync(RegistryNotSet)") to the user-facing `Display` form (e.g. "Error: cache_registry must be set …"). This affects every error the CLI can surface, not just the new ones. Integration tests cover the no-registry error path for both subcommands end-to-end via run_pg_ephemeral_expect_failure. A real registry-backed push/pull round trip is still out of scope — it would require spinning up a registry:2 container in the test harness. README and CHANGELOG updated. --- pg-ephemeral/CHANGELOG.md | 11 +++ pg-ephemeral/README.md | 32 ++++++-- pg-ephemeral/src/bin/pg-ephemeral.rs | 7 +- pg-ephemeral/src/cli.rs | 27 +++++++ pg-ephemeral/src/definition.rs | 116 +++++++++++++++++++++++++++ pg-ephemeral/tests/cache.rs | 76 ++++++++++++++++++ 6 files changed, 259 insertions(+), 10 deletions(-) diff --git a/pg-ephemeral/CHANGELOG.md b/pg-ephemeral/CHANGELOG.md index eba6970a..9cb9cbfb 100644 --- a/pg-ephemeral/CHANGELOG.md +++ b/pg-ephemeral/CHANGELOG.md @@ -9,6 +9,17 @@ (e.g. `ghcr.io/myorg`), so cache images can be pushed and pulled to share warm cache state across machines. The cache key hash is unaffected — different registries do not fragment the cache. +- `pg-ephemeral cache pull` subcommand. Walks the seed chain from tip + backwards and pulls the newest stage that exists in the configured + registry, then stops. Requires `cache_registry` to be set. +- `pg-ephemeral cache push` subcommand. Pushes every locally-cached + stage to the configured registry. Requires `cache_registry` to be set. + +### Changed + +- CLI errors are now printed using their user-facing `Display` form + (e.g. "Error: cache_registry must be set …") instead of the internal + `Debug` form (e.g. "CacheSync(RegistryNotSet)"). ### Fixed diff --git a/pg-ephemeral/README.md b/pg-ephemeral/README.md index 07381f07..3535f30b 100644 --- a/pg-ephemeral/README.md +++ b/pg-ephemeral/README.md @@ -153,6 +153,12 @@ pg-ephemeral cache status --json # Pre-populate the cache without running an interactive session pg-ephemeral cache populate +# Pull cache images from the configured registry (requires cache_registry) +pg-ephemeral cache pull + +# Push locally-cached stages to the configured registry (requires cache_registry) +pg-ephemeral cache push + # Remove cached images pg-ephemeral cache reset @@ -198,18 +204,28 @@ pointed at different registries still compute the same hex for the same content, and switching a project from no registry to a registry (or between registries) does not invalidate any existing cache. -> **Note:** pg-ephemeral does not yet ship a native `cache push` / `cache -> pull` subcommand. For now, use your container CLI directly once the cache -> is populated — the references printed by `pg-ephemeral cache status` are -> valid `docker push` / `docker pull` / `podman push` / `podman pull` -> arguments. Registry authentication is handled entirely by the underlying -> tool (`docker login`, `podman login`, or cred-helper integration). +Once `cache_registry` is set, use the two dedicated subcommands to move +cache images between the local image store and the remote registry: + +```sh +# Pull the newest cached stage from the registry that exists remotely. +# Walks the seed chain from tip backwards and stops on the first hit. +pg-ephemeral cache pull + +# Populate anything still missing locally, then push everything that's +# now cached locally to the registry. Typical CI shape: +pg-ephemeral cache pull && pg-ephemeral cache populate && pg-ephemeral cache push +``` + +Registry authentication is handled entirely by the underlying container +CLI (`docker login`, `podman login`, or cred-helper integration) — no +pg-ephemeral-specific setup required. You can override the registry on a single invocation with `--cache-registry` without editing `database.toml`: ```sh -pg-ephemeral --cache-registry ghcr.io/myorg cache status --json +pg-ephemeral --cache-registry ghcr.io/myorg cache pull ``` ## Rust Library @@ -434,7 +450,7 @@ Commands: container-psql Run interactive psql inside the container container-shell Run interactive shell inside the container container-schema-dump Dump schema from the container - cache Cache management (status, populate, reset) + cache Cache management (status, populate, pull, push, reset) integration-server Run integration server (pipe-based control protocol) list List defined instances platform Platform support checks diff --git a/pg-ephemeral/src/bin/pg-ephemeral.rs b/pg-ephemeral/src/bin/pg-ephemeral.rs index 66dd7ff2..20f95fa8 100644 --- a/pg-ephemeral/src/bin/pg-ephemeral.rs +++ b/pg-ephemeral/src/bin/pg-ephemeral.rs @@ -1,10 +1,13 @@ use pg_ephemeral::cli; #[tokio::main(flavor = "current_thread")] -async fn main() -> Result<(), cli::Error> { +async fn main() { env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); use clap::Parser; - cli::App::parse().run().await + if let Err(error) = cli::App::parse().run().await { + eprintln!("Error: {error}"); + std::process::exit(1); + } } diff --git a/pg-ephemeral/src/cli.rs b/pg-ephemeral/src/cli.rs index b39f6a79..f03fdb6c 100644 --- a/pg-ephemeral/src/cli.rs +++ b/pg-ephemeral/src/cli.rs @@ -9,6 +9,8 @@ pub enum Error { Config(#[from] crate::config::Error), #[error(transparent)] Container(#[from] crate::container::Error), + #[error(transparent)] + CacheSync(#[from] crate::definition::CacheSyncError), #[error("Unknown instance: {0}")] UnknownInstance(InstanceName), } @@ -140,6 +142,17 @@ pub enum CacheCommand { }, /// Populate cache by running seeds and committing at each cacheable point Populate, + /// Pull cache images from the configured registry. + /// + /// Walks the seed chain from tip backwards and pulls the newest stage + /// that exists remotely. Requires `cache_registry` to be set. + Pull, + /// Push all locally-cached stages to the configured registry. + /// + /// Pushes every stage currently stored locally (status "hit"). Stages + /// not yet populated locally are skipped. Requires `cache_registry` to + /// be set. + Push, } #[derive(Clone, Debug, clap::Parser)] @@ -312,6 +325,20 @@ impl Command { definition.populate_cache(instance_name).await?; definition.print_cache_status(instance_name, false).await?; } + CacheCommand::Pull => { + let definition = Self::get_instance(instance_map, instance_name)? + .definition(instance_name) + .await + .unwrap(); + definition.pull_cache(instance_name).await?; + } + CacheCommand::Push => { + let definition = Self::get_instance(instance_map, instance_name)? + .definition(instance_name) + .await + .unwrap(); + definition.push_cache(instance_name).await?; + } }, Self::ContainerPsql { instance_name } => { let definition = Self::get_instance(instance_map, instance_name)? diff --git a/pg-ephemeral/src/definition.rs b/pg-ephemeral/src/definition.rs index e1e1e361..91b01560 100644 --- a/pg-ephemeral/src/definition.rs +++ b/pg-ephemeral/src/definition.rs @@ -14,6 +14,22 @@ pub enum SeedApplyError { Sql(#[from] sqlx::Error), } +/// Errors from cache sync operations ([`Definition::pull_cache`] / +/// [`Definition::push_cache`]). +#[derive(Debug, thiserror::Error)] +pub enum CacheSyncError { + #[error( + "cache_registry must be set in database.toml or via --cache-registry to use this command" + )] + RegistryNotSet, + #[error(transparent)] + SeedLoad(#[from] LoadError), + #[error(transparent)] + Pull(#[from] ociman::backend::PullError), + #[error(transparent)] + Push(#[from] ociman::backend::PushError), +} + #[derive(Clone, Debug, PartialEq)] pub enum SslConfig { Generated { @@ -337,6 +353,106 @@ impl Definition { Ok((previous_cache_reference.cloned(), Vec::new())) } + /// Pull cache images from the configured registry by walking the seed + /// chain from tip backwards. + /// + /// Returns as soon as any cacheable stage lands locally: + /// - Already present locally (Hit) → return immediately, nothing to pull. + /// - Missing locally (Miss) → attempt to pull from the registry; on + /// success return, on [`PullError::NotFound`] walk to the next older + /// stage, on [`PullError::Other`] abort. + /// - Uncacheable → skip and continue walking. + /// + /// Returns `Ok(())` even if no cached stage was found in the registry — + /// the caller can tell the difference via logs. + /// + /// # Errors + /// + /// Returns [`CacheSyncError::RegistryNotSet`] if the definition has no + /// `cache_registry` configured. + pub async fn pull_cache( + &self, + instance_name: &crate::InstanceName, + ) -> Result<(), CacheSyncError> { + if self.cache_registry.is_none() { + return Err(CacheSyncError::RegistryNotSet); + } + + let loaded_seeds = self.load_seeds(instance_name).await?; + let seeds: Vec<&LoadedSeed> = loaded_seeds.iter_seeds().collect(); + + for seed in seeds.iter().rev() { + use crate::seed::CacheStatus; + match seed.cache_status() { + CacheStatus::Uncacheable => { + log::debug!("cache pull: skipping uncacheable seed {}", seed.name()); + continue; + } + CacheStatus::Hit { reference } => { + log::info!( + "cache pull: {} already present locally at {reference}", + seed.name() + ); + return Ok(()); + } + CacheStatus::Miss { reference } => { + log::info!("cache pull: attempting {reference}"); + match self.backend.pull_image(reference).await { + Ok(()) => { + log::info!("cache pull: pulled {reference}"); + return Ok(()); + } + Err(ociman::backend::PullError::NotFound { .. }) => { + log::debug!("cache pull: {reference} not in registry, walking back"); + continue; + } + Err(error) => return Err(error.into()), + } + } + } + } + + log::info!("cache pull: no cached stage found in registry"); + Ok(()) + } + + /// Push all locally-cached stages to the configured registry. + /// + /// Iterates the seed chain in order and pushes every stage whose local + /// cache status is [`CacheStatus::Hit`](crate::seed::CacheStatus::Hit). + /// [`CacheStatus::Miss`](crate::seed::CacheStatus::Miss) and + /// [`CacheStatus::Uncacheable`](crate::seed::CacheStatus::Uncacheable) + /// stages are skipped. Aborts on the first push failure. + /// + /// # Errors + /// + /// Returns [`CacheSyncError::RegistryNotSet`] if the definition has no + /// `cache_registry` configured. + pub async fn push_cache( + &self, + instance_name: &crate::InstanceName, + ) -> Result<(), CacheSyncError> { + if self.cache_registry.is_none() { + return Err(CacheSyncError::RegistryNotSet); + } + + let loaded_seeds = self.load_seeds(instance_name).await?; + let mut pushed_any = false; + + for seed in loaded_seeds.iter_seeds() { + if let crate::seed::CacheStatus::Hit { reference } = seed.cache_status() { + log::info!("cache push: pushing {reference}"); + self.backend.push_image(reference).await?; + pushed_any = true; + } + } + + if !pushed_any { + log::info!("cache push: no locally cached stages to push"); + } + Ok(()) + } + pub async fn run_integration_server( &self, result_fd: std::os::fd::RawFd, diff --git a/pg-ephemeral/tests/cache.rs b/pg-ephemeral/tests/cache.rs index 1b94e436..113d899a 100644 --- a/pg-ephemeral/tests/cache.rs +++ b/pg-ephemeral/tests/cache.rs @@ -350,6 +350,82 @@ async fn test_cache_registry_prefixes_reference_without_changing_hash() { ); } +async fn run_pg_ephemeral_expect_failure( + args: &[&str], + current_dir: &std::path::Path, +) -> (String, String) { + let pg_ephemeral_bin = env!("CARGO_BIN_EXE_pg-ephemeral"); + let output = cmd_proc::Command::new(pg_ephemeral_bin) + .arguments(args) + .working_directory(current_dir) + .stdout_capture() + .stderr_capture() + .accept_nonzero_exit() + .run() + .await + .unwrap(); + + assert!( + !output.status.success(), + "expected pg-ephemeral {} to fail, but it succeeded\nstdout:\n{}", + args.join(" "), + String::from_utf8_lossy(&output.stdout) + ); + + ( + String::from_utf8_lossy(&output.stdout).into_owned(), + String::from_utf8_lossy(&output.stderr).into_owned(), + ) +} + +#[tokio::test] +async fn test_cache_pull_without_registry_errors() { + let _backend = ociman::test_backend_setup!(); + let dir = TestDir::new("cache-pull-no-registry"); + + dir.write_file("schema.sql", "CREATE TABLE users (id INTEGER PRIMARY KEY);"); + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + + let (_stdout, stderr) = run_pg_ephemeral_expect_failure(&["cache", "pull"], &dir.path).await; + assert!( + stderr.contains("cache_registry must be set"), + "expected registry-not-set error, got stderr:\n{stderr}" + ); +} + +#[tokio::test] +async fn test_cache_push_without_registry_errors() { + let _backend = ociman::test_backend_setup!(); + let dir = TestDir::new("cache-push-no-registry"); + + dir.write_file("schema.sql", "CREATE TABLE users (id INTEGER PRIMARY KEY);"); + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + + let (_stdout, stderr) = run_pg_ephemeral_expect_failure(&["cache", "push"], &dir.path).await; + assert!( + stderr.contains("cache_registry must be set"), + "expected registry-not-set error, got stderr:\n{stderr}" + ); +} + #[tokio::test] async fn test_cache_status_chain_propagates() { let _backend = ociman::test_backend_setup!(); From 505f4362af9ea35f241a9e47264838c2c5349125 Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Wed, 15 Apr 2026 01:06:28 +0000 Subject: [PATCH 09/10] Add manager pg-ephemeral github-actions cache-registry test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements an end-to-end verification of the cache push/pull round trip against a real OCI registry. Invoked as manager pg-ephemeral github-actions cache-registry test from the CI workflow after `docker login ghcr.io`. Flow: populate → push → reset --force → status (assert all miss) → pull → status (assert tip hit). Exercises the walk-back path implicitly — with two seed stages and a reset local store, the tip must be pulled fresh from the registry for the assertion to hold. The command uses a typed `Error` enum (thiserror) so failures propagate cleanly through `?` instead of panicking, and so CI log output distinguishes the failure mode (binary missing, scratch-dir I/O, subprocess spawn, JSON shape, status mismatch) rather than collapsing everything to a panic backtrace. Adds thiserror as a manager dependency. The registry path and instance name are derived from the `GITHUB_REPOSITORY` and `GITHUB_RUN_ID` environment variables set by GitHub Actions, with sane fallbacks so the command is at least invocable locally for debugging. Each CI run gets a unique instance name via GITHUB_RUN_ID so concurrent jobs don't collide. A new top-level `GithubActions` subcommand under `pg-ephemeral` namespaces this and any future GHA-only test subcommands, keeping them out of the paths that need to work on a vanilla local checkout. This commit does NOT wire the step into ci.yml — that comes next, once the manager-side logic is reviewable in isolation. --- Cargo.lock | 1 + manager/Cargo.toml | 1 + manager/src/pg_ephemeral.rs | 12 + manager/src/pg_ephemeral/github_actions.rs | 18 ++ .../github_actions/cache_registry.rs | 306 ++++++++++++++++++ 5 files changed, 338 insertions(+) create mode 100644 manager/src/pg_ephemeral/github_actions.rs create mode 100644 manager/src/pg_ephemeral/github_actions/cache_registry.rs diff --git a/Cargo.lock b/Cargo.lock index 81565c77..8f722419 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2425,6 +2425,7 @@ dependencies = [ "stratosphere-core", "syn", "tar", + "thiserror 2.0.18", "tokio", "toml", "toml_edit", diff --git a/manager/Cargo.toml b/manager/Cargo.toml index ae789ba8..b8daf66e 100644 --- a/manager/Cargo.toml +++ b/manager/Cargo.toml @@ -30,6 +30,7 @@ similar = "3" stratosphere-core.workspace = true syn.workspace = true tar = "0.4" +thiserror.workspace = true toml.workspace = true toml_edit.workspace = true url.workspace = true diff --git a/manager/src/pg_ephemeral.rs b/manager/src/pg_ephemeral.rs index b57e1489..8815e0c2 100644 --- a/manager/src/pg_ephemeral.rs +++ b/manager/src/pg_ephemeral.rs @@ -1,3 +1,4 @@ +pub(crate) mod github_actions; pub(crate) mod npm; pub(crate) mod ruby; @@ -13,6 +14,16 @@ pub(crate) enum Command { #[clap(subcommand)] command: npm::Command, }, + /// GitHub Actions-only commands + /// + /// Subcommands under this namespace assume they run from a GitHub Actions + /// workflow with the environment, secrets, and pre-setup (e.g. + /// `docker login`) that the workflow provides. They are not expected to + /// work from a vanilla local checkout. + GithubActions { + #[clap(subcommand)] + command: github_actions::Command, + }, /// Sync all generated files with Rust source of truth Sync { /// Fail if git is dirty after syncing (for CI verification) @@ -26,6 +37,7 @@ impl Command { match self { Self::Ruby { command } => command.run().await, Self::Npm { command } => command.run().await, + Self::GithubActions { command } => command.run().await, Self::Sync { reject_dirty } => { ruby::sync(*reject_dirty).await?; npm::sync(*reject_dirty).await?; diff --git a/manager/src/pg_ephemeral/github_actions.rs b/manager/src/pg_ephemeral/github_actions.rs new file mode 100644 index 00000000..8ae28de1 --- /dev/null +++ b/manager/src/pg_ephemeral/github_actions.rs @@ -0,0 +1,18 @@ +pub(crate) mod cache_registry; + +#[derive(Debug, clap::Parser)] +pub(crate) enum Command { + /// Cache registry end-to-end tests. + CacheRegistry { + #[clap(subcommand)] + command: cache_registry::Command, + }, +} + +impl Command { + pub(crate) async fn run(&self) -> Result<(), Box> { + match self { + Self::CacheRegistry { command } => command.run().await, + } + } +} diff --git a/manager/src/pg_ephemeral/github_actions/cache_registry.rs b/manager/src/pg_ephemeral/github_actions/cache_registry.rs new file mode 100644 index 00000000..1d2f7cea --- /dev/null +++ b/manager/src/pg_ephemeral/github_actions/cache_registry.rs @@ -0,0 +1,306 @@ +use std::path::{Path, PathBuf}; + +use cmd_proc::EnvVariableName; + +const ENV_GITHUB_REPOSITORY: EnvVariableName = + EnvVariableName::from_static_or_panic("GITHUB_REPOSITORY"); +const ENV_GITHUB_RUN_ID: EnvVariableName = EnvVariableName::from_static_or_panic("GITHUB_RUN_ID"); + +#[derive(Debug, thiserror::Error)] +pub(crate) enum Error { + #[error( + "pg-ephemeral binary not found at {path}. Build it first with `cargo build --release --package pg-ephemeral`." + )] + BinaryMissing { path: PathBuf }, + #[error("unsupported host platform: {arch}-{os}")] + UnsupportedHostPlatform { + arch: &'static str, + os: &'static str, + }, + #[error("failed to prepare scratch directory at {path}")] + Scratch { + path: PathBuf, + #[source] + source: std::io::Error, + }, + #[error("pg-ephemeral {args} failed")] + PgEphemeral { + args: String, + #[source] + source: cmd_proc::CommandError, + }, + #[error("failed to parse cache status JSON")] + StatusParse(#[source] serde_json::Error), + #[error("cache status JSON missing required field: {path}")] + StatusShape { path: &'static str }, + #[error( + "cache status sanity check failed: seed {seed} expected status {expected}, got {actual}" + )] + StatusMismatch { + seed: String, + expected: &'static str, + actual: String, + }, + #[error("cache status returned no seeds — test configuration produced an empty chain")] + EmptySeeds, +} + +#[derive(Debug, clap::Parser)] +pub(crate) enum Command { + /// Run the end-to-end cache registry round trip. + /// + /// Populates a small test cache, pushes it to ghcr.io, clears the + /// local store, pulls it back, and verifies the tip becomes a local + /// hit again. Assumes `docker login ghcr.io` has already been done + /// by the caller (the CI workflow handles this via `GITHUB_TOKEN`). + Test, +} + +impl Command { + pub(crate) async fn run(&self) -> Result<(), Box> { + match self { + Self::Test => Ok(test().await?), + } + } +} + +/// Construct the ghcr.io registry path the test uses. +/// +/// On GitHub Actions, `GITHUB_REPOSITORY` is set to `owner/repo` (e.g. +/// `mbj/mrs`). Locally, we fall back to `mbj/mrs` so the command is at +/// least invocable off-CI for debugging. +fn cache_registry() -> String { + let repository = + std::env::var(ENV_GITHUB_REPOSITORY.as_str()).unwrap_or_else(|_| "mbj/mrs".to_string()); + format!("ghcr.io/{repository}/pg-ephemeral-cache-test") +} + +/// Generate a unique, validly-shaped instance name for this run. +/// +/// `InstanceName` only allows `[a-z0-9-]`, no leading/trailing dash, +/// max 63 bytes. We use `GITHUB_RUN_ID` (numeric, always valid) when +/// running on CI and fall back to the local process id otherwise. +fn instance_name() -> String { + let suffix = std::env::var(ENV_GITHUB_RUN_ID.as_str()) + .unwrap_or_else(|_| format!("local-{}", std::process::id())); + format!("ci-{suffix}") +} + +fn pg_ephemeral_binary() -> Result { + let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("manager crate has a parent directory") + .to_path_buf(); + + // Match the cargo target the current manager binary was built with. + // When manager runs from the CI release build, `CARGO_BUILD_TARGET` + // is set via the workflow env. Locally, fall back to the native host + // triple. + let rust_target = match std::env::var("CARGO_BUILD_TARGET") { + Ok(value) => value, + Err(_) => match (std::env::consts::ARCH, std::env::consts::OS) { + ("x86_64", "linux") => "x86_64-unknown-linux-musl".to_string(), + ("aarch64", "linux") => "aarch64-unknown-linux-musl".to_string(), + ("aarch64", "macos") => "aarch64-apple-darwin".to_string(), + (arch, os) => { + return Err(Error::UnsupportedHostPlatform { arch, os }); + } + }, + }; + + let path = workspace_root + .join("target") + .join(&rust_target) + .join("release") + .join("pg-ephemeral"); + + if !path.exists() { + return Err(Error::BinaryMissing { path }); + } + + Ok(path) +} + +/// Build a fresh `database.toml` + seed files inside a scratch directory. +/// The caller is responsible for cleanup. +fn prepare_test_directory(registry: &str, instance_name: &str) -> Result { + let dir = std::env::temp_dir().join(format!( + "pg-ephemeral-ci-cache-registry-test-{}", + std::process::id() + )); + + let wrap = |source: std::io::Error| Error::Scratch { + path: dir.clone(), + source, + }; + + // Start from a clean slate in case a prior local run left crumbs. + if dir.exists() { + std::fs::remove_dir_all(&dir).map_err(wrap)?; + } + std::fs::create_dir_all(&dir).map_err(wrap)?; + + std::fs::write( + dir.join("schema.sql"), + "CREATE TABLE cache_registry_test (id INTEGER PRIMARY KEY);\n", + ) + .map_err(wrap)?; + + std::fs::write( + dir.join("data.sql"), + "INSERT INTO cache_registry_test (id) VALUES (42);\n", + ) + .map_err(wrap)?; + + let toml = format!( + "cache_registry = \"{registry}\"\n\ + \n\ + [instances.{instance_name}.seeds.schema]\n\ + type = \"sql-file\"\n\ + path = \"schema.sql\"\n\ + \n\ + [instances.{instance_name}.seeds.data]\n\ + type = \"sql-file\"\n\ + path = \"data.sql\"\n" + ); + std::fs::write(dir.join("database.toml"), toml).map_err(wrap)?; + + Ok(dir) +} + +fn display_args(args: &[&str]) -> String { + args.join(" ") +} + +async fn run_pg_ephemeral(binary: &Path, working_dir: &Path, args: &[&str]) -> Result<(), Error> { + // `cmd_proc::Command::status` returns `Err(CommandError)` on any + // non-zero exit unless `accept_nonzero_exit()` is set, so we just + // propagate. + cmd_proc::Command::new(binary) + .arguments(args) + .working_directory(working_dir) + .status() + .await + .map_err(|source| Error::PgEphemeral { + args: display_args(args), + source, + }) +} + +async fn capture_pg_ephemeral( + binary: &Path, + working_dir: &Path, + args: &[&str], +) -> Result { + cmd_proc::Command::new(binary) + .arguments(args) + .working_directory(working_dir) + .stdout_capture() + .string() + .await + .map_err(|source| Error::PgEphemeral { + args: display_args(args), + source, + }) +} + +fn parse_cache_status(json: &str) -> Result { + serde_json::from_str(json).map_err(Error::StatusParse) +} + +fn assert_all_stages_have_status( + status_json: &serde_json::Value, + expected: &'static str, +) -> Result<(), Error> { + let seeds = status_json["seeds"].as_array().ok_or(Error::StatusShape { + path: "seeds (expected array)", + })?; + if seeds.is_empty() { + return Err(Error::EmptySeeds); + } + + for seed in seeds { + let name = seed["name"].as_str().ok_or(Error::StatusShape { + path: "seeds[].name", + })?; + let status = seed["status"].as_str().ok_or(Error::StatusShape { + path: "seeds[].status", + })?; + if status != expected { + return Err(Error::StatusMismatch { + seed: name.to_string(), + expected, + actual: status.to_string(), + }); + } + } + Ok(()) +} + +fn assert_tip_hit(status_json: &serde_json::Value) -> Result<(), Error> { + let seeds = status_json["seeds"].as_array().ok_or(Error::StatusShape { + path: "seeds (expected array)", + })?; + let tip = seeds.last().ok_or(Error::EmptySeeds)?; + let name = tip["name"].as_str().ok_or(Error::StatusShape { + path: "seeds[last].name", + })?; + let status = tip["status"].as_str().ok_or(Error::StatusShape { + path: "seeds[last].status", + })?; + if status != "hit" { + return Err(Error::StatusMismatch { + seed: name.to_string(), + expected: "hit", + actual: status.to_string(), + }); + } + Ok(()) +} + +async fn test() -> Result<(), Error> { + let registry = cache_registry(); + let instance = instance_name(); + let binary = pg_ephemeral_binary()?; + + log::info!("Using cache_registry: {registry}"); + log::info!("Using instance name: {instance}"); + log::info!("Using pg-ephemeral: {}", binary.display()); + + let dir = prepare_test_directory(®istry, &instance)?; + let instance_arg: &[&str] = &["--instance", instance.as_str()]; + + let populate_args: Vec<&str> = [&["cache", "populate"], instance_arg].concat(); + let push_args: Vec<&str> = [&["cache", "push"], instance_arg].concat(); + let reset_args: Vec<&str> = [&["cache", "reset", "--force"], instance_arg].concat(); + let pull_args: Vec<&str> = [&["cache", "pull"], instance_arg].concat(); + let status_args: Vec<&str> = [&["cache", "status", "--json"], instance_arg].concat(); + + log::info!("Step 1/6: cache populate"); + run_pg_ephemeral(&binary, &dir, &populate_args).await?; + + log::info!("Step 2/6: cache push"); + run_pg_ephemeral(&binary, &dir, &push_args).await?; + + log::info!("Step 3/6: cache reset --force (clear local cache)"); + run_pg_ephemeral(&binary, &dir, &reset_args).await?; + + log::info!("Step 4/6: verify cache is empty locally"); + let status_after_reset = capture_pg_ephemeral(&binary, &dir, &status_args).await?; + let parsed = parse_cache_status(&status_after_reset)?; + assert_all_stages_have_status(&parsed, "miss")?; + + log::info!("Step 5/6: cache pull (should walk back and land on tip)"); + run_pg_ephemeral(&binary, &dir, &pull_args).await?; + + log::info!("Step 6/6: verify tip is now a local hit"); + let status_after_pull = capture_pg_ephemeral(&binary, &dir, &status_args).await?; + let parsed = parse_cache_status(&status_after_pull)?; + assert_tip_hit(&parsed)?; + + log::info!("Cleanup: remove scratch directory"); + let _ = std::fs::remove_dir_all(&dir); + + log::info!("Cache registry end-to-end test PASSED"); + Ok(()) +} From 5b9db11ea1704d94a7404c850e10b75ba82e300e Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Wed, 15 Apr 2026 01:08:34 +0000 Subject: [PATCH 10/10] Wire cache-registry end-to-end test into CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a `Run cache-registry end-to-end test` step that invokes `manager pg-ephemeral github-actions cache-registry test`, gated to the x86_64-unknown-linux-musl matrix entry. This is a registry integration check, not a platform compat check, so running it on every matrix target would be waste. The step is preceded by a `docker login ghcr.io` using the automatic GITHUB_TOKEN so pg-ephemeral's backend can authenticate with ghcr.io during push/pull without any repo-level secret setup. Adds `packages: write` to the job's permissions so GITHUB_TOKEN has the scope needed to push to ghcr.io/${{ github.repository }}/... Runs on every push (no branch filter) — the whole point of the test is that it catches registry-level breakage on the branch where it was introduced, not only after merge to main. --- .github/workflows/ci.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f423cfaf..21ad9028 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,6 +8,12 @@ env: jobs: build: + permissions: + contents: read + # Required by the cache-registry end-to-end test, which pushes + # test cache images to ghcr.io/${{ github.repository }}/pg-ephemeral-cache-test + # using GITHUB_TOKEN. + packages: write strategy: matrix: os: @@ -88,6 +94,18 @@ jobs: - name: Run doctests run: cargo --verbose test --doc --all-features --release + # End-to-end cache registry round-trip test. Pushes and pulls against + # ghcr.io using GITHUB_TOKEN. Gated to a single linux-musl matrix + # entry: the test is a registry integration check, not a platform + # compat check, so running it on every matrix target would be waste. + - name: Log in to ghcr.io for cache-registry test + if: ${{ matrix.os.cargo_build_target == 'x86_64-unknown-linux-musl' }} + run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin + + - name: Run cache-registry end-to-end test + if: ${{ matrix.os.cargo_build_target == 'x86_64-unknown-linux-musl' }} + run: target/${{ matrix.os.cargo_build_target }}/release/manager pg-ephemeral github-actions cache-registry test + - name: Set up Ruby 3.4 for macOS gem build if: ${{ endsWith(matrix.os.cargo_build_target, '-darwin') }} uses: ruby/setup-ruby@4c56a21280b36d862b5fc31348f463d60bdc55d5 # v1.301.0