diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f423cfaf..21ad9028 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,6 +8,12 @@ env: jobs: build: + permissions: + contents: read + # Required by the cache-registry end-to-end test, which pushes + # test cache images to ghcr.io/${{ github.repository }}/pg-ephemeral-cache-test + # using GITHUB_TOKEN. + packages: write strategy: matrix: os: @@ -88,6 +94,18 @@ jobs: - name: Run doctests run: cargo --verbose test --doc --all-features --release + # End-to-end cache registry round-trip test. Pushes and pulls against + # ghcr.io using GITHUB_TOKEN. Gated to a single linux-musl matrix + # entry: the test is a registry integration check, not a platform + # compat check, so running it on every matrix target would be waste. + - name: Log in to ghcr.io for cache-registry test + if: ${{ matrix.os.cargo_build_target == 'x86_64-unknown-linux-musl' }} + run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin + + - name: Run cache-registry end-to-end test + if: ${{ matrix.os.cargo_build_target == 'x86_64-unknown-linux-musl' }} + run: target/${{ matrix.os.cargo_build_target }}/release/manager pg-ephemeral github-actions cache-registry test + - name: Set up Ruby 3.4 for macOS gem build if: ${{ endsWith(matrix.os.cargo_build_target, '-darwin') }} uses: ruby/setup-ruby@4c56a21280b36d862b5fc31348f463d60bdc55d5 # v1.301.0 diff --git a/Cargo.lock b/Cargo.lock index 81565c77..8f722419 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2425,6 +2425,7 @@ dependencies = [ "stratosphere-core", "syn", "tar", + "thiserror 2.0.18", "tokio", "toml", "toml_edit", diff --git a/manager/Cargo.toml b/manager/Cargo.toml index ae789ba8..b8daf66e 100644 --- a/manager/Cargo.toml +++ b/manager/Cargo.toml @@ -30,6 +30,7 @@ similar = "3" stratosphere-core.workspace = true syn.workspace = true tar = "0.4" +thiserror.workspace = true toml.workspace = true toml_edit.workspace = true url.workspace = true diff --git a/manager/src/pg_ephemeral.rs b/manager/src/pg_ephemeral.rs index b57e1489..8815e0c2 100644 --- a/manager/src/pg_ephemeral.rs +++ b/manager/src/pg_ephemeral.rs @@ -1,3 +1,4 @@ +pub(crate) mod github_actions; pub(crate) mod npm; pub(crate) mod ruby; @@ -13,6 +14,16 @@ pub(crate) enum Command { #[clap(subcommand)] command: npm::Command, }, + /// GitHub Actions-only commands + /// + /// Subcommands under this namespace assume they run from a GitHub Actions + /// workflow with the environment, secrets, and pre-setup (e.g. + /// `docker login`) that the workflow provides. They are not expected to + /// work from a vanilla local checkout. + GithubActions { + #[clap(subcommand)] + command: github_actions::Command, + }, /// Sync all generated files with Rust source of truth Sync { /// Fail if git is dirty after syncing (for CI verification) @@ -26,6 +37,7 @@ impl Command { match self { Self::Ruby { command } => command.run().await, Self::Npm { command } => command.run().await, + Self::GithubActions { command } => command.run().await, Self::Sync { reject_dirty } => { ruby::sync(*reject_dirty).await?; npm::sync(*reject_dirty).await?; diff --git a/manager/src/pg_ephemeral/github_actions.rs b/manager/src/pg_ephemeral/github_actions.rs new file mode 100644 index 00000000..8ae28de1 --- /dev/null +++ b/manager/src/pg_ephemeral/github_actions.rs @@ -0,0 +1,18 @@ +pub(crate) mod cache_registry; + +#[derive(Debug, clap::Parser)] +pub(crate) enum Command { + /// Cache registry end-to-end tests. + CacheRegistry { + #[clap(subcommand)] + command: cache_registry::Command, + }, +} + +impl Command { + pub(crate) async fn run(&self) -> Result<(), Box> { + match self { + Self::CacheRegistry { command } => command.run().await, + } + } +} diff --git a/manager/src/pg_ephemeral/github_actions/cache_registry.rs b/manager/src/pg_ephemeral/github_actions/cache_registry.rs new file mode 100644 index 00000000..1d2f7cea --- /dev/null +++ b/manager/src/pg_ephemeral/github_actions/cache_registry.rs @@ -0,0 +1,306 @@ +use std::path::{Path, PathBuf}; + +use cmd_proc::EnvVariableName; + +const ENV_GITHUB_REPOSITORY: EnvVariableName = + EnvVariableName::from_static_or_panic("GITHUB_REPOSITORY"); +const ENV_GITHUB_RUN_ID: EnvVariableName = EnvVariableName::from_static_or_panic("GITHUB_RUN_ID"); + +#[derive(Debug, thiserror::Error)] +pub(crate) enum Error { + #[error( + "pg-ephemeral binary not found at {path}. Build it first with `cargo build --release --package pg-ephemeral`." + )] + BinaryMissing { path: PathBuf }, + #[error("unsupported host platform: {arch}-{os}")] + UnsupportedHostPlatform { + arch: &'static str, + os: &'static str, + }, + #[error("failed to prepare scratch directory at {path}")] + Scratch { + path: PathBuf, + #[source] + source: std::io::Error, + }, + #[error("pg-ephemeral {args} failed")] + PgEphemeral { + args: String, + #[source] + source: cmd_proc::CommandError, + }, + #[error("failed to parse cache status JSON")] + StatusParse(#[source] serde_json::Error), + #[error("cache status JSON missing required field: {path}")] + StatusShape { path: &'static str }, + #[error( + "cache status sanity check failed: seed {seed} expected status {expected}, got {actual}" + )] + StatusMismatch { + seed: String, + expected: &'static str, + actual: String, + }, + #[error("cache status returned no seeds — test configuration produced an empty chain")] + EmptySeeds, +} + +#[derive(Debug, clap::Parser)] +pub(crate) enum Command { + /// Run the end-to-end cache registry round trip. + /// + /// Populates a small test cache, pushes it to ghcr.io, clears the + /// local store, pulls it back, and verifies the tip becomes a local + /// hit again. Assumes `docker login ghcr.io` has already been done + /// by the caller (the CI workflow handles this via `GITHUB_TOKEN`). + Test, +} + +impl Command { + pub(crate) async fn run(&self) -> Result<(), Box> { + match self { + Self::Test => Ok(test().await?), + } + } +} + +/// Construct the ghcr.io registry path the test uses. +/// +/// On GitHub Actions, `GITHUB_REPOSITORY` is set to `owner/repo` (e.g. +/// `mbj/mrs`). Locally, we fall back to `mbj/mrs` so the command is at +/// least invocable off-CI for debugging. +fn cache_registry() -> String { + let repository = + std::env::var(ENV_GITHUB_REPOSITORY.as_str()).unwrap_or_else(|_| "mbj/mrs".to_string()); + format!("ghcr.io/{repository}/pg-ephemeral-cache-test") +} + +/// Generate a unique, validly-shaped instance name for this run. +/// +/// `InstanceName` only allows `[a-z0-9-]`, no leading/trailing dash, +/// max 63 bytes. We use `GITHUB_RUN_ID` (numeric, always valid) when +/// running on CI and fall back to the local process id otherwise. +fn instance_name() -> String { + let suffix = std::env::var(ENV_GITHUB_RUN_ID.as_str()) + .unwrap_or_else(|_| format!("local-{}", std::process::id())); + format!("ci-{suffix}") +} + +fn pg_ephemeral_binary() -> Result { + let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("manager crate has a parent directory") + .to_path_buf(); + + // Match the cargo target the current manager binary was built with. + // When manager runs from the CI release build, `CARGO_BUILD_TARGET` + // is set via the workflow env. Locally, fall back to the native host + // triple. + let rust_target = match std::env::var("CARGO_BUILD_TARGET") { + Ok(value) => value, + Err(_) => match (std::env::consts::ARCH, std::env::consts::OS) { + ("x86_64", "linux") => "x86_64-unknown-linux-musl".to_string(), + ("aarch64", "linux") => "aarch64-unknown-linux-musl".to_string(), + ("aarch64", "macos") => "aarch64-apple-darwin".to_string(), + (arch, os) => { + return Err(Error::UnsupportedHostPlatform { arch, os }); + } + }, + }; + + let path = workspace_root + .join("target") + .join(&rust_target) + .join("release") + .join("pg-ephemeral"); + + if !path.exists() { + return Err(Error::BinaryMissing { path }); + } + + Ok(path) +} + +/// Build a fresh `database.toml` + seed files inside a scratch directory. +/// The caller is responsible for cleanup. +fn prepare_test_directory(registry: &str, instance_name: &str) -> Result { + let dir = std::env::temp_dir().join(format!( + "pg-ephemeral-ci-cache-registry-test-{}", + std::process::id() + )); + + let wrap = |source: std::io::Error| Error::Scratch { + path: dir.clone(), + source, + }; + + // Start from a clean slate in case a prior local run left crumbs. + if dir.exists() { + std::fs::remove_dir_all(&dir).map_err(wrap)?; + } + std::fs::create_dir_all(&dir).map_err(wrap)?; + + std::fs::write( + dir.join("schema.sql"), + "CREATE TABLE cache_registry_test (id INTEGER PRIMARY KEY);\n", + ) + .map_err(wrap)?; + + std::fs::write( + dir.join("data.sql"), + "INSERT INTO cache_registry_test (id) VALUES (42);\n", + ) + .map_err(wrap)?; + + let toml = format!( + "cache_registry = \"{registry}\"\n\ + \n\ + [instances.{instance_name}.seeds.schema]\n\ + type = \"sql-file\"\n\ + path = \"schema.sql\"\n\ + \n\ + [instances.{instance_name}.seeds.data]\n\ + type = \"sql-file\"\n\ + path = \"data.sql\"\n" + ); + std::fs::write(dir.join("database.toml"), toml).map_err(wrap)?; + + Ok(dir) +} + +fn display_args(args: &[&str]) -> String { + args.join(" ") +} + +async fn run_pg_ephemeral(binary: &Path, working_dir: &Path, args: &[&str]) -> Result<(), Error> { + // `cmd_proc::Command::status` returns `Err(CommandError)` on any + // non-zero exit unless `accept_nonzero_exit()` is set, so we just + // propagate. + cmd_proc::Command::new(binary) + .arguments(args) + .working_directory(working_dir) + .status() + .await + .map_err(|source| Error::PgEphemeral { + args: display_args(args), + source, + }) +} + +async fn capture_pg_ephemeral( + binary: &Path, + working_dir: &Path, + args: &[&str], +) -> Result { + cmd_proc::Command::new(binary) + .arguments(args) + .working_directory(working_dir) + .stdout_capture() + .string() + .await + .map_err(|source| Error::PgEphemeral { + args: display_args(args), + source, + }) +} + +fn parse_cache_status(json: &str) -> Result { + serde_json::from_str(json).map_err(Error::StatusParse) +} + +fn assert_all_stages_have_status( + status_json: &serde_json::Value, + expected: &'static str, +) -> Result<(), Error> { + let seeds = status_json["seeds"].as_array().ok_or(Error::StatusShape { + path: "seeds (expected array)", + })?; + if seeds.is_empty() { + return Err(Error::EmptySeeds); + } + + for seed in seeds { + let name = seed["name"].as_str().ok_or(Error::StatusShape { + path: "seeds[].name", + })?; + let status = seed["status"].as_str().ok_or(Error::StatusShape { + path: "seeds[].status", + })?; + if status != expected { + return Err(Error::StatusMismatch { + seed: name.to_string(), + expected, + actual: status.to_string(), + }); + } + } + Ok(()) +} + +fn assert_tip_hit(status_json: &serde_json::Value) -> Result<(), Error> { + let seeds = status_json["seeds"].as_array().ok_or(Error::StatusShape { + path: "seeds (expected array)", + })?; + let tip = seeds.last().ok_or(Error::EmptySeeds)?; + let name = tip["name"].as_str().ok_or(Error::StatusShape { + path: "seeds[last].name", + })?; + let status = tip["status"].as_str().ok_or(Error::StatusShape { + path: "seeds[last].status", + })?; + if status != "hit" { + return Err(Error::StatusMismatch { + seed: name.to_string(), + expected: "hit", + actual: status.to_string(), + }); + } + Ok(()) +} + +async fn test() -> Result<(), Error> { + let registry = cache_registry(); + let instance = instance_name(); + let binary = pg_ephemeral_binary()?; + + log::info!("Using cache_registry: {registry}"); + log::info!("Using instance name: {instance}"); + log::info!("Using pg-ephemeral: {}", binary.display()); + + let dir = prepare_test_directory(®istry, &instance)?; + let instance_arg: &[&str] = &["--instance", instance.as_str()]; + + let populate_args: Vec<&str> = [&["cache", "populate"], instance_arg].concat(); + let push_args: Vec<&str> = [&["cache", "push"], instance_arg].concat(); + let reset_args: Vec<&str> = [&["cache", "reset", "--force"], instance_arg].concat(); + let pull_args: Vec<&str> = [&["cache", "pull"], instance_arg].concat(); + let status_args: Vec<&str> = [&["cache", "status", "--json"], instance_arg].concat(); + + log::info!("Step 1/6: cache populate"); + run_pg_ephemeral(&binary, &dir, &populate_args).await?; + + log::info!("Step 2/6: cache push"); + run_pg_ephemeral(&binary, &dir, &push_args).await?; + + log::info!("Step 3/6: cache reset --force (clear local cache)"); + run_pg_ephemeral(&binary, &dir, &reset_args).await?; + + log::info!("Step 4/6: verify cache is empty locally"); + let status_after_reset = capture_pg_ephemeral(&binary, &dir, &status_args).await?; + let parsed = parse_cache_status(&status_after_reset)?; + assert_all_stages_have_status(&parsed, "miss")?; + + log::info!("Step 5/6: cache pull (should walk back and land on tip)"); + run_pg_ephemeral(&binary, &dir, &pull_args).await?; + + log::info!("Step 6/6: verify tip is now a local hit"); + let status_after_pull = capture_pg_ephemeral(&binary, &dir, &status_args).await?; + let parsed = parse_cache_status(&status_after_pull)?; + assert_tip_hit(&parsed)?; + + log::info!("Cleanup: remove scratch directory"); + let _ = std::fs::remove_dir_all(&dir); + + log::info!("Cache registry end-to-end test PASSED"); + Ok(()) +} diff --git a/ociman/src/backend.rs b/ociman/src/backend.rs index d5f37474..8ca9b003 100644 --- a/ociman/src/backend.rs +++ b/ociman/src/backend.rs @@ -1,5 +1,119 @@ use cmd_proc::*; +/// Substring used to detect the OCI distribution-spec `MANIFEST_UNKNOWN` +/// error code as rendered on stderr by docker, podman, and skopeo when a +/// registry reports that a tag does not exist. +/// +/// **This is load-bearing string matching and we do not like it.** We fall +/// back on it because there is no better option available today: +/// +/// - Neither `docker pull` nor `podman pull` has a `--json` / `--format` +/// flag. The CLIs only expose human-readable stderr. +/// - Exit codes are useless: both tools return `1` (or `125` for podman) +/// for every failure mode — not-found, auth, network, tls — without +/// discrimination. +/// - The docker/podman engine REST APIs do stream NDJSON, but the error +/// `message` / `errorDetail.message` fields contain the same human +/// string (`... manifest unknown`) — the daemons do not surface the +/// registry's structured error code — so switching to the socket would +/// just move the substring match from stderr to a JSON field, at the +/// cost of a ~400KB HTTP client dependency. No actual signal gain. +/// - `docker manifest inspect` still requires `experimental: enabled` as +/// of Docker 28, and `podman manifest inspect` is local-only. `skopeo +/// inspect` is clean but is a separate binary not always installed +/// (Docker Desktop ships without it). +/// - The only path to a clean spec-defined signal is talking to the +/// registry HTTP API directly, which means reimplementing bearer-token +/// auth, cred-helper integration, and auth challenges — substantial +/// work for a library that otherwise just shells out to the CLI. +/// +/// The OCI Distribution Spec v1.1.0 defines the `MANIFEST_UNKNOWN` error +/// code (code-7) that registries MUST return when a manifest is absent: +/// +/// +/// **However the spec does not mandate this stderr string.** The spec only +/// mandates the uppercase `code` field in the registry's JSON response; +/// the human-readable `message` field is OPTIONAL and its content is +/// unspecified. The lowercase `"manifest unknown"` substring this constant +/// matches is a de-facto convention that docker, podman, and skopeo all +/// happen to use when rendering the error to stderr. If a future CLI +/// version changes its wording, this constant must be updated and a +/// corresponding test will break. +const MANIFEST_UNKNOWN_STDERR_SIGNAL: &str = "manifest unknown"; + +#[derive(Debug, thiserror::Error)] +pub enum PullError { + // The `reference` field is boxed because `image::Reference` is ~176 + // bytes (Name + Vec of PathComponents + Tag + Digest), and + // `clippy::result-large-err` trips on anything >128 bytes inside a + // `Result::Err`. + #[error("image not found in registry: {reference}")] + NotFound { + reference: Box, + }, + #[error("pull failed for {reference}: {message}")] + Other { + reference: Box, + message: String, + }, +} + +/// Turn a pull subprocess exit status + captured stderr into a +/// [`PullError`] (or [`Ok`] on success). +/// +/// Split out from [`Backend::pull_image`] so it can be unit-tested with +/// canned stderr bytes — no network, no daemon, no registry. +fn classify_pull_result( + reference: &crate::image::Reference, + success: bool, + stderr: &[u8], +) -> Result<(), PullError> { + if success { + return Ok(()); + } + + let stderr = String::from_utf8_lossy(stderr); + if stderr.contains(MANIFEST_UNKNOWN_STDERR_SIGNAL) { + Err(PullError::NotFound { + reference: Box::new(reference.clone()), + }) + } else { + Err(PullError::Other { + reference: Box::new(reference.clone()), + message: stderr.trim().to_string(), + }) + } +} + +#[derive(Debug, thiserror::Error)] +#[error("push failed for {reference}: {message}")] +pub struct PushError { + // Boxed for the same reason as `PullError`'s reference fields — see + // that type's comment. + pub reference: Box, + pub message: String, +} + +/// Turn a push subprocess exit status + captured stderr into a +/// [`PushError`] (or [`Ok`] on success). +/// +/// Split out from [`Backend::push_image`] for the same reason as +/// [`classify_pull_result`] — unit-testable without a network or daemon. +fn classify_push_result( + reference: &crate::image::Reference, + success: bool, + stderr: &[u8], +) -> Result<(), PushError> { + if success { + return Ok(()); + } + + Err(PushError { + reference: Box::new(reference.clone()), + message: String::from_utf8_lossy(stderr).trim().to_string(), + }) +} + #[derive(Copy, Clone, Debug, Eq, PartialEq, serde::Deserialize, clap::ValueEnum)] #[serde(rename_all = "snake_case")] pub enum Selection { @@ -73,29 +187,54 @@ impl Backend { .unwrap(); } - /// Pull an image from a registry - pub async fn pull_image(&self, reference: &crate::image::Reference) { - self.command() + /// Pull an image from a registry. + /// + /// Stdout streams to the parent so users see layer progress. Stderr is + /// captured and parsed to distinguish [`PullError::NotFound`] (registry + /// reports `manifest unknown`) from other failures. + pub async fn pull_image(&self, reference: &crate::image::Reference) -> Result<(), PullError> { + let output = self + .command() .arguments(["pull", &reference.to_string()]) - .status() + .stderr_capture() + .accept_nonzero_exit() + .run() .await .unwrap(); + + classify_pull_result(reference, output.status.success(), &output.bytes) } - /// Pull an image only if it's not already present - pub async fn pull_image_if_absent(&self, reference: &crate::image::Reference) { - if !self.is_image_present(reference).await { - self.pull_image(reference).await; + /// Pull an image only if it's not already present locally. + pub async fn pull_image_if_absent( + &self, + reference: &crate::image::Reference, + ) -> Result<(), PullError> { + if self.is_image_present(reference).await { + Ok(()) + } else { + self.pull_image(reference).await } } - /// Push an image to a registry - pub async fn push_image(&self, reference: &crate::image::Reference) { - self.command() + /// Push an image to a registry. + /// + /// Stdout streams to the parent so users see upload progress. Stderr + /// is captured and surfaced as [`PushError`] on non-zero exit. Unlike + /// pull, there's no useful sub-discrimination here: every push failure + /// (auth, network, rate limit, missing local image) collapses into the + /// same "it didn't upload" outcome as far as callers are concerned. + pub async fn push_image(&self, reference: &crate::image::Reference) -> Result<(), PushError> { + let output = self + .command() .arguments(["push", &reference.to_string()]) - .status() + .stderr_capture() + .accept_nonzero_exit() + .run() .await .unwrap(); + + classify_push_result(reference, output.status.success(), &output.bytes) } pub async fn remove_image(&self, reference: &crate::image::Reference) { @@ -539,6 +678,85 @@ pub mod resolve { mod tests { use super::*; + fn pull_test_reference() -> crate::image::Reference { + "ghcr.io/myorg/pg-ephemeral/main:abc123".parse().unwrap() + } + + #[test] + fn test_classify_pull_result_success() { + let reference = pull_test_reference(); + assert!(classify_pull_result(&reference, true, b"").is_ok()); + } + + #[test] + fn test_classify_pull_result_not_found_podman() { + let reference = pull_test_reference(); + // Representative podman stderr for a non-existent tag. + let stderr = b"Error: initializing source docker://ghcr.io/myorg/pg-ephemeral/main:abc123: reading manifest abc123 in ghcr.io/myorg/pg-ephemeral/main: manifest unknown"; + match classify_pull_result(&reference, false, stderr) { + Err(PullError::NotFound { reference: r }) => assert_eq!(*r, reference), + other => panic!("expected PullError::NotFound, got {other:?}"), + } + } + + #[test] + fn test_classify_pull_result_not_found_docker() { + let reference = pull_test_reference(); + // Representative docker stderr for a non-existent tag. + let stderr = b"Error response from daemon: manifest for ghcr.io/myorg/pg-ephemeral/main:abc123 not found: manifest unknown: manifest unknown"; + match classify_pull_result(&reference, false, stderr) { + Err(PullError::NotFound { reference: r }) => assert_eq!(*r, reference), + other => panic!("expected PullError::NotFound, got {other:?}"), + } + } + + #[test] + fn test_classify_pull_result_auth_failure_is_other() { + let reference = pull_test_reference(); + // Auth failure must NOT be misclassified as NotFound. + let stderr = b"Error response from daemon: pull access denied for ghcr.io/myorg/pg-ephemeral/main, repository does not exist or may require 'docker login': denied: requested access to the resource is denied"; + match classify_pull_result(&reference, false, stderr) { + Err(PullError::Other { + reference: r, + message, + }) => { + assert_eq!(*r, reference); + assert!(message.contains("denied")); + } + other => panic!("expected PullError::Other, got {other:?}"), + } + } + + #[test] + fn test_classify_pull_result_network_error_is_other() { + let reference = pull_test_reference(); + let stderr = b"Error response from daemon: Get https://ghcr.io/v2/: dial tcp: lookup ghcr.io: no such host"; + let result = classify_pull_result(&reference, false, stderr); + assert!(matches!(result, Err(PullError::Other { .. }))); + } + + #[test] + fn test_classify_push_result_success() { + let reference = pull_test_reference(); + assert!(classify_push_result(&reference, true, b"").is_ok()); + } + + #[test] + fn test_classify_push_result_failure_captures_stderr() { + let reference = pull_test_reference(); + let stderr = b"unauthorized: authentication required\n"; + match classify_push_result(&reference, false, stderr) { + Err(PushError { + reference: r, + message, + }) => { + assert_eq!(*r, reference); + assert_eq!(message, "unauthorized: authentication required"); + } + Ok(()) => panic!("expected PushError"), + } + } + #[tokio::test] async fn test_container_resolver_localhost() { let backend = crate::test_backend_setup!(); @@ -674,7 +892,7 @@ mod tests { // Create test images by tagging alpine let source = crate::testing::ALPINE_LATEST_IMAGE.clone(); - backend.pull_image_if_absent(&source).await; + backend.pull_image_if_absent(&source).await.unwrap(); let target_a: crate::image::Reference = "localhost/ociman-test/image-references-by-name:a" .parse() diff --git a/ociman/src/reference.rs b/ociman/src/reference.rs index 1f501cf7..240dd00a 100644 --- a/ociman/src/reference.rs +++ b/ociman/src/reference.rs @@ -47,7 +47,7 @@ use nom::{ IResult, Parser, branch::alt, - bytes::complete::{tag, take_while, take_while_m_n, take_while1}, + bytes::complete::{take_while, take_while_m_n, take_while1}, character::complete::{char, digit1, u16}, combinator::{all_consuming, opt, recognize, verify}, multi::{many0, separated_list1}, @@ -508,13 +508,94 @@ impl std::fmt::Display for Domain { /// ); /// ``` #[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct PathComponent(String); +pub struct PathComponent(std::borrow::Cow<'static, str>); + +/// Const-friendly consumer for [`PathComponent`]. +/// +/// Implements the full grammar +/// `path-component := alpha-numeric (separator alpha-numeric)*` +/// where `alpha-numeric := [a-z0-9]+` and +/// `separator := [_.] | __ | [-]*`. +/// +/// Returns the number of bytes consumed from the start of `input`. The +/// callers split: +/// - [`PathComponent::from_static_or_panic`] requires the full input to be +/// consumed (rejects trailing data). +/// - [`PathComponent`]'s `Parse` impl uses the consumed length as the nom +/// match boundary, leaving the remainder for the next combinator. +/// +/// This is the single source of truth for the grammar — both compile-time +/// static literals and runtime nom parsing go through it. +const fn consume_path_component(input: &str) -> Result { + let bytes = input.as_bytes(); + let len = bytes.len(); + + // Initial alpha-numeric block (mandatory, at least one byte). + let mut pos = 0; + while pos < len { + let byte = bytes[pos]; + if !(byte.is_ascii_lowercase() || byte.is_ascii_digit()) { + break; + } + pos += 1; + } + if pos == 0 { + return Err("path component must start with [a-z0-9]"); + } + + // Loop: try to consume (separator + alpha-numeric). If the trailing + // alpha-numeric is missing, rewind the separator and stop. + loop { + let saved = pos; + + // Separator: __ | _ | . | [-]* + if pos + 1 < len && bytes[pos] == b'_' && bytes[pos + 1] == b'_' { + pos += 2; + } else if pos < len && (bytes[pos] == b'_' || bytes[pos] == b'.') { + pos += 1; + } else { + while pos < len && bytes[pos] == b'-' { + pos += 1; + } + } + + // Trailing alpha-numeric (mandatory after a non-empty separator + // attempt; required for the iteration to succeed). + let inner_start = pos; + while pos < len { + let byte = bytes[pos]; + if !(byte.is_ascii_lowercase() || byte.is_ascii_digit()) { + break; + } + pos += 1; + } + + if pos == inner_start { + return Ok(saved); + } + } +} impl PathComponent { #[must_use] pub fn as_str(&self) -> &str { &self.0 } + + /// Create a [`PathComponent`] from a static string at compile time. + /// + /// # Panics + /// + /// Panics if `input` is not a valid path component, or if it contains + /// trailing bytes that cannot be consumed by the grammar. + #[must_use] + pub const fn from_static_or_panic(input: &'static str) -> Self { + match consume_path_component(input) { + Ok(consumed) if consumed == input.len() => Self(std::borrow::Cow::Borrowed(input)), + Ok(_) => panic!("path component has trailing input"), + Err(message) => panic!("{}", message), + } + } } impl AsRef for PathComponent { @@ -523,27 +604,18 @@ impl AsRef for PathComponent { } } -impl PathComponent { - /// Pattern: `[a-z0-9]+` - fn parse_alpha_numeric(input: &str) -> IResult<&str, &str> { - take_while1(|character: char| character.is_ascii_lowercase() || character.is_ascii_digit()) - .parse(input) - } - - /// Pattern: `[_.]|__|[-]*` - fn parse_separator(input: &str) -> IResult<&str, &str> { - alt((tag("__"), tag("_"), tag("."), recognize(many0(char('-'))))).parse(input) - } -} - impl Parse for PathComponent { fn parse(input: &str) -> IResult<&str, Self> { - recognize(pair( - Self::parse_alpha_numeric, - many0(pair(Self::parse_separator, Self::parse_alpha_numeric)), - )) - .map(|string: &str| Self(string.to_string())) - .parse(input) + match consume_path_component(input) { + Ok(consumed) => { + let (matched, rest) = input.split_at(consumed); + Ok((rest, Self(std::borrow::Cow::Owned(matched.to_string())))) + } + Err(_) => Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::TakeWhile1, + ))), + } } } @@ -557,6 +629,9 @@ impl std::fmt::Display for PathComponent { /// /// Pattern: `path-component ['/' path-component]*` /// +/// Non-emptiness is encoded in the type: a [`Path`] is `(first, rest)`, where +/// `first` is always present and `rest` may be empty. +/// /// # Examples /// /// ``` @@ -564,11 +639,11 @@ impl std::fmt::Display for PathComponent { /// /// let path: Path = "library/alpine".parse().unwrap(); /// assert_eq!(path.to_string(), "library/alpine"); -/// assert_eq!(path.components().len(), 2); +/// assert_eq!(path.iter().count(), 2); /// /// let path: Path = "owner/repo/subpath".parse().unwrap(); /// assert_eq!(path.to_string(), "owner/repo/subpath"); -/// assert_eq!(path.components().len(), 3); +/// assert_eq!(path.iter().count(), 3); /// /// // Rejects empty input /// assert_eq!( @@ -589,26 +664,48 @@ impl std::fmt::Display for PathComponent { /// ); /// ``` #[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] -pub struct Path(Vec); +pub struct Path { + first: PathComponent, + rest: Vec, +} impl Path { + /// Iterate over all components, starting with the first. + pub fn iter(&self) -> impl Iterator { + std::iter::once(&self.first).chain(self.rest.iter()) + } + + /// Append a component to the path. #[must_use] - pub fn components(&self) -> &[PathComponent] { - &self.0 + pub fn extended(mut self, component: PathComponent) -> Self { + self.rest.push(component); + self + } +} + +impl From for Path { + fn from(first: PathComponent) -> Self { + Self { + first, + rest: Vec::new(), + } } } impl Parse for Path { fn parse(input: &str) -> IResult<&str, Self> { - separated_list1(char('/'), PathComponent::parse) - .map(Self) - .parse(input) + pair( + PathComponent::parse, + many0(preceded(char('/'), PathComponent::parse)), + ) + .map(|(first, rest)| Self { first, rest }) + .parse(input) } } impl std::fmt::Display for Path { fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write_interspersed(formatter, &self.0, "/") + write_interspersed(formatter, self.iter().map(PathComponent::as_str), "/") } } @@ -644,12 +741,21 @@ impl std::fmt::Display for Path { /// assert_eq!(name.domain.unwrap().to_string(), "localhost"); /// assert_eq!(name.path.to_string(), "pg-ephemeral/main"); /// ``` -#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd, serde::Deserialize)] +#[serde(try_from = "String")] pub struct Name { pub domain: Option, pub path: Path, } +impl TryFrom for Name { + type Error = String; + + fn try_from(value: String) -> Result { + value.parse() + } +} + impl Parse for Name { /// Parse a name, applying the distribution/reference convention for domain /// detection on top of the OCI spec: the segment before the first '/' is @@ -1077,4 +1183,56 @@ mod tests { assert!(name.domain.is_none()); assert_eq!(name.path.to_string(), "myorg/myproject/myimage"); } + + #[test] + fn test_name_deserialize_from_string() { + let name: Name = serde_json::from_str("\"ghcr.io/myorg\"").unwrap(); + assert_eq!(name.domain.unwrap().to_string(), "ghcr.io"); + assert_eq!(name.path.to_string(), "myorg"); + } + + #[test] + fn test_name_deserialize_invalid_string_fails() { + let result: Result = serde_json::from_str("\"bad name with spaces\""); + assert!(result.is_err()); + } + + #[test] + fn test_path_component_from_static_or_panic_const_context() { + // Exercise the const constructor in a const context — this would + // fail at compile time if the grammar rejected the input. + const PLAIN: PathComponent = PathComponent::from_static_or_panic("alpine"); + const WITH_DASH: PathComponent = PathComponent::from_static_or_panic("pg-ephemeral"); + const WITH_UNDERSCORE: PathComponent = PathComponent::from_static_or_panic("foo_bar"); + const WITH_DOT: PathComponent = PathComponent::from_static_or_panic("foo.bar"); + const WITH_DOUBLE_UNDERSCORE: PathComponent = + PathComponent::from_static_or_panic("foo__bar"); + + assert_eq!(PLAIN.as_str(), "alpine"); + assert_eq!(WITH_DASH.as_str(), "pg-ephemeral"); + assert_eq!(WITH_UNDERSCORE.as_str(), "foo_bar"); + assert_eq!(WITH_DOT.as_str(), "foo.bar"); + assert_eq!(WITH_DOUBLE_UNDERSCORE.as_str(), "foo__bar"); + } + + #[test] + #[should_panic(expected = "path component must start with [a-z0-9]")] + fn test_path_component_from_static_or_panic_rejects_leading_dash() { + let _ = PathComponent::from_static_or_panic("-alpine"); + } + + #[test] + #[should_panic(expected = "path component has trailing input")] + fn test_path_component_from_static_or_panic_rejects_trailing_dash() { + // "foo-" consumes as "foo" (grammar rewinds the orphan dash), + // leaving a non-zero trailing remainder that `from_static_or_panic` + // refuses. + let _ = PathComponent::from_static_or_panic("foo-"); + } + + #[test] + #[should_panic(expected = "path component must start with [a-z0-9]")] + fn test_path_component_from_static_or_panic_rejects_empty() { + let _ = PathComponent::from_static_or_panic(""); + } } diff --git a/ociman/tests/integration.rs b/ociman/tests/integration.rs index bf03fbc6..77c895ba 100644 --- a/ociman/tests/integration.rs +++ b/ociman/tests/integration.rs @@ -267,7 +267,7 @@ async fn test_image_tag() { let target: ociman::image::Reference = ociman::testing::test_reference("ociman-test-tagged:latest"); - backend.pull_image_if_absent(&source).await; + backend.pull_image_if_absent(&source).await.unwrap(); assert!(!backend.is_image_present(&target).await); @@ -285,7 +285,7 @@ async fn test_image_pull_if_absent() { let reference = ociman::testing::ALPINE_LATEST_IMAGE.clone(); - backend.pull_image_if_absent(&reference).await; + backend.pull_image_if_absent(&reference).await.unwrap(); assert!(backend.is_image_present(&reference).await); } diff --git a/pg-ephemeral/CHANGELOG.md b/pg-ephemeral/CHANGELOG.md index 967fb560..9cb9cbfb 100644 --- a/pg-ephemeral/CHANGELOG.md +++ b/pg-ephemeral/CHANGELOG.md @@ -2,6 +2,25 @@ ## Unreleased +### Added + +- `cache_registry` config field and `--cache-registry` CLI flag. When set, + all cache image references are prefixed with the given OCI registry name + (e.g. `ghcr.io/myorg`), so cache images can be pushed and pulled to share + warm cache state across machines. The cache key hash is unaffected — + different registries do not fragment the cache. +- `pg-ephemeral cache pull` subcommand. Walks the seed chain from tip + backwards and pulls the newest stage that exists in the configured + registry, then stops. Requires `cache_registry` to be set. +- `pg-ephemeral cache push` subcommand. Pushes every locally-cached + stage to the configured registry. Requires `cache_registry` to be set. + +### Changed + +- CLI errors are now printed using their user-facing `Display` form + (e.g. "Error: cache_registry must be set …") instead of the internal + `Debug` form (e.g. "CacheSync(RegistryNotSet)"). + ### Fixed - Suppress spurious `PID N is not a PostgreSQL backend process` warnings when diff --git a/pg-ephemeral/README.md b/pg-ephemeral/README.md index 83e87d4f..3535f30b 100644 --- a/pg-ephemeral/README.md +++ b/pg-ephemeral/README.md @@ -50,12 +50,13 @@ cache = { type = "none" } ### Top-level fields -| Field | Description | -|--------------------------|----------------------------------------------------------------------| -| `image` | PostgreSQL version / image tag (e.g. `"17.1"`) | -| `backend` | `"docker"`, `"podman"`, or omit for auto-detection (see below) | -| `ssl_config` | SSL configuration with `hostname` field | -| `wait_available_timeout` | How long to wait for PostgreSQL to accept connections (e.g. `"30s"`) | +| Field | Description | +|--------------------------|-----------------------------------------------------------------------------| +| `image` | PostgreSQL version / image tag (e.g. `"17.1"`) | +| `backend` | `"docker"`, `"podman"`, or omit for auto-detection (see below) | +| `cache_registry` | OCI registry prefix for cache images (e.g. `"ghcr.io/myorg"`). See [Sharing cache across machines](#sharing-cache-across-machines). | +| `ssl_config` | SSL configuration with `hostname` field | +| `wait_available_timeout` | How long to wait for PostgreSQL to accept connections (e.g. `"30s"`) | ### Backend selection @@ -152,6 +153,12 @@ pg-ephemeral cache status --json # Pre-populate the cache without running an interactive session pg-ephemeral cache populate +# Pull cache images from the configured registry (requires cache_registry) +pg-ephemeral cache pull + +# Push locally-cached stages to the configured registry (requires cache_registry) +pg-ephemeral cache push + # Remove cached images pg-ephemeral cache reset @@ -170,6 +177,57 @@ For `command` type seeds, the `cache` field controls how the cache key is comput | `{ type = "key-script", script = "..." }` | Run a script whose stdout is hashed as the cache key. | | `{ type = "none" }` | Disable caching. Breaks the cache chain for this and all subsequent seeds. | +### Sharing cache across machines + +By default, cache images are named `pg-ephemeral/:` and live +only in the local Docker/Podman image store. Set `cache_registry` to a remote +OCI registry prefix and every cache image gains that prefix, so references +become push/pullable addresses in a registry you can share across machines +(CI runners, developer laptops, production build hosts): + +```toml +image = "17.1" +cache_registry = "ghcr.io/myorg" + +[instances.main.seeds.schema] +type = "sql-file" +path = "schema.sql" +``` + +The `cache_registry` value can be any valid OCI registry name — just a host +(`ghcr.io`), a host plus namespace (`ghcr.io/myorg`), or a private registry +(`registry.example.com:5000/team`). `pg-ephemeral cache status` will now +report references like `ghcr.io/myorg/pg-ephemeral/main:`. + +**The cache key hash is not affected by `cache_registry`.** Two machines +pointed at different registries still compute the same hex for the same +content, and switching a project from no registry to a registry (or between +registries) does not invalidate any existing cache. + +Once `cache_registry` is set, use the two dedicated subcommands to move +cache images between the local image store and the remote registry: + +```sh +# Pull the newest cached stage from the registry that exists remotely. +# Walks the seed chain from tip backwards and stops on the first hit. +pg-ephemeral cache pull + +# Populate anything still missing locally, then push everything that's +# now cached locally to the registry. Typical CI shape: +pg-ephemeral cache pull && pg-ephemeral cache populate && pg-ephemeral cache push +``` + +Registry authentication is handled entirely by the underlying container +CLI (`docker login`, `podman login`, or cred-helper integration) — no +pg-ephemeral-specific setup required. + +You can override the registry on a single invocation with `--cache-registry` +without editing `database.toml`: + +```sh +pg-ephemeral --cache-registry ghcr.io/myorg cache pull +``` + ## Rust Library pg-ephemeral can be used as a Rust library for integration tests or any code that needs @@ -392,18 +450,19 @@ Commands: container-psql Run interactive psql inside the container container-shell Run interactive shell inside the container container-schema-dump Dump schema from the container - cache Cache management (status, populate, reset) + cache Cache management (status, populate, pull, push, reset) integration-server Run integration server (pipe-based control protocol) list List defined instances platform Platform support checks Options: - --config-file Config file path (default: database.toml) - --no-config-file Use defaults, ignore any config file - --backend Override backend (docker, podman) - --image Override PostgreSQL image - --ssl-hostname Enable SSL with the specified hostname - --instance Target instance (default: main) + --config-file Config file path (default: database.toml) + --no-config-file Use defaults, ignore any config file + --backend Override backend (docker, podman) + --cache-registry Override cache_registry from config (e.g. ghcr.io/myorg) + --image Override PostgreSQL image + --ssl-hostname Enable SSL with the specified hostname + --instance Target instance (default: main) ``` ## How it compares to testcontainers diff --git a/pg-ephemeral/src/bin/pg-ephemeral.rs b/pg-ephemeral/src/bin/pg-ephemeral.rs index 66dd7ff2..20f95fa8 100644 --- a/pg-ephemeral/src/bin/pg-ephemeral.rs +++ b/pg-ephemeral/src/bin/pg-ephemeral.rs @@ -1,10 +1,13 @@ use pg_ephemeral::cli; #[tokio::main(flavor = "current_thread")] -async fn main() -> Result<(), cli::Error> { +async fn main() { env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); use clap::Parser; - cli::App::parse().run().await + if let Err(error) = cli::App::parse().run().await { + eprintln!("Error: {error}"); + std::process::exit(1); + } } diff --git a/pg-ephemeral/src/cli.rs b/pg-ephemeral/src/cli.rs index abf04988..f03fdb6c 100644 --- a/pg-ephemeral/src/cli.rs +++ b/pg-ephemeral/src/cli.rs @@ -9,6 +9,8 @@ pub enum Error { Config(#[from] crate::config::Error), #[error(transparent)] Container(#[from] crate::container::Error), + #[error(transparent)] + CacheSync(#[from] crate::definition::CacheSyncError), #[error("Unknown instance: {0}")] UnknownInstance(InstanceName), } @@ -54,6 +56,13 @@ pub struct App { /// If the autodetection fails exits with an error. #[arg(long)] backend: Option, + /// Overwrite cache registry + /// + /// When set, all cache image references are prefixed with this registry + /// name (e.g. `ghcr.io/myorg`), enabling push/pull against a remote + /// registry. Does not affect cache key hashing. + #[arg(long)] + cache_registry: Option, /// Overwrite image #[arg(long)] image: Option, @@ -68,6 +77,7 @@ impl App { pub async fn run(&self) -> Result<(), Error> { let overwrites = crate::config::InstanceDefinition { backend: self.backend, + cache_registry: self.cache_registry.clone(), image: self.image.clone(), seeds: indexmap::IndexMap::new(), ssl_config: self @@ -132,6 +142,17 @@ pub enum CacheCommand { }, /// Populate cache by running seeds and committing at each cacheable point Populate, + /// Pull cache images from the configured registry. + /// + /// Walks the seed chain from tip backwards and pulls the newest stage + /// that exists remotely. Requires `cache_registry` to be set. + Pull, + /// Push all locally-cached stages to the configured registry. + /// + /// Pushes every stage currently stored locally (status "hit"). Stages + /// not yet populated locally are skipped. Requires `cache_registry` to + /// be set. + Push, } #[derive(Clone, Debug, clap::Parser)] @@ -304,6 +325,20 @@ impl Command { definition.populate_cache(instance_name).await?; definition.print_cache_status(instance_name, false).await?; } + CacheCommand::Pull => { + let definition = Self::get_instance(instance_map, instance_name)? + .definition(instance_name) + .await + .unwrap(); + definition.pull_cache(instance_name).await?; + } + CacheCommand::Push => { + let definition = Self::get_instance(instance_map, instance_name)? + .definition(instance_name) + .await + .unwrap(); + definition.push_cache(instance_name).await?; + } }, Self::ContainerPsql { instance_name } => { let definition = Self::get_instance(instance_map, instance_name)? diff --git a/pg-ephemeral/src/config.rs b/pg-ephemeral/src/config.rs index a2e6acf1..61f13686 100644 --- a/pg-ephemeral/src/config.rs +++ b/pg-ephemeral/src/config.rs @@ -7,6 +7,7 @@ use crate::seed::{Command, CommandCacheConfig, Seed, SeedName}; pub struct Instance { pub application_name: Option, pub backend: ociman::backend::Selection, + pub cache_registry: Option, pub database: pg_client::Database, pub seeds: indexmap::IndexMap, pub ssl_config: Option, @@ -22,6 +23,7 @@ impl Instance { Self { backend, application_name: None, + cache_registry: None, seeds: indexmap::IndexMap::new(), ssl_config: None, superuser: pg_client::User::POSTGRES, @@ -40,6 +42,7 @@ impl Instance { instance_name: instance_name.clone(), application_name: self.application_name.clone(), backend: self.backend.resolve().await?, + cache_registry: self.cache_registry.clone(), database: self.database.clone(), seeds: self.seeds.clone(), ssl_config: self.ssl_config.clone(), @@ -148,6 +151,7 @@ pub struct SslConfigDefinition { #[serde(deny_unknown_fields)] pub struct InstanceDefinition { pub backend: Option, + pub cache_registry: Option, pub image: Option, #[serde(default)] pub seeds: indexmap::IndexMap, @@ -161,6 +165,7 @@ impl InstanceDefinition { pub fn empty() -> Self { Self { backend: None, + cache_registry: None, image: None, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -195,6 +200,13 @@ impl InstanceDefinition { .or(defaults.backend) .unwrap_or(ociman::backend::Selection::Auto); + let cache_registry = overwrites + .cache_registry + .as_ref() + .or(self.cache_registry.as_ref()) + .or(defaults.cache_registry.as_ref()) + .cloned(); + let seeds = self .seeds .into_iter() @@ -219,6 +231,7 @@ impl InstanceDefinition { Ok(Instance { application_name: None, backend, + cache_registry, database: pg_client::Database::POSTGRES, seeds, ssl_config, @@ -235,6 +248,7 @@ impl InstanceDefinition { pub struct Config { image: Option, backend: Option, + cache_registry: Option, ssl_config: Option, #[serde(default, with = "humantime_serde")] wait_available_timeout: Option, @@ -246,6 +260,7 @@ impl std::default::Default for Config { Self { image: Some(Image::default()), backend: None, + cache_registry: None, ssl_config: None, wait_available_timeout: None, instances: None, @@ -331,6 +346,7 @@ impl Config { ) -> Result { let defaults = InstanceDefinition { backend: self.backend, + cache_registry: self.cache_registry.clone(), image: self.image.clone(), seeds: indexmap::IndexMap::new(), ssl_config: self.ssl_config.clone(), diff --git a/pg-ephemeral/src/definition.rs b/pg-ephemeral/src/definition.rs index e99aa1c3..91b01560 100644 --- a/pg-ephemeral/src/definition.rs +++ b/pg-ephemeral/src/definition.rs @@ -14,6 +14,22 @@ pub enum SeedApplyError { Sql(#[from] sqlx::Error), } +/// Errors from cache sync operations ([`Definition::pull_cache`] / +/// [`Definition::push_cache`]). +#[derive(Debug, thiserror::Error)] +pub enum CacheSyncError { + #[error( + "cache_registry must be set in database.toml or via --cache-registry to use this command" + )] + RegistryNotSet, + #[error(transparent)] + SeedLoad(#[from] LoadError), + #[error(transparent)] + Pull(#[from] ociman::backend::PullError), + #[error(transparent)] + Push(#[from] ociman::backend::PushError), +} + #[derive(Clone, Debug, PartialEq)] pub enum SslConfig { Generated { @@ -27,6 +43,7 @@ pub struct Definition { pub instance_name: crate::InstanceName, pub application_name: Option, pub backend: ociman::Backend, + pub cache_registry: Option, pub database: pg_client::Database, pub seeds: indexmap::IndexMap, pub ssl_config: Option, @@ -48,6 +65,7 @@ impl Definition { instance_name, backend, application_name: None, + cache_registry: None, seeds: indexmap::IndexMap::new(), ssl_config: None, superuser: pg_client::User::POSTGRES, @@ -98,6 +116,7 @@ impl Definition { &self.seeds, &self.backend, instance_name, + self.cache_registry.as_ref(), ) .await } @@ -334,6 +353,106 @@ impl Definition { Ok((previous_cache_reference.cloned(), Vec::new())) } + /// Pull cache images from the configured registry by walking the seed + /// chain from tip backwards. + /// + /// Returns as soon as any cacheable stage lands locally: + /// - Already present locally (Hit) → return immediately, nothing to pull. + /// - Missing locally (Miss) → attempt to pull from the registry; on + /// success return, on [`PullError::NotFound`] walk to the next older + /// stage, on [`PullError::Other`] abort. + /// - Uncacheable → skip and continue walking. + /// + /// Returns `Ok(())` even if no cached stage was found in the registry — + /// the caller can tell the difference via logs. + /// + /// # Errors + /// + /// Returns [`CacheSyncError::RegistryNotSet`] if the definition has no + /// `cache_registry` configured. + pub async fn pull_cache( + &self, + instance_name: &crate::InstanceName, + ) -> Result<(), CacheSyncError> { + if self.cache_registry.is_none() { + return Err(CacheSyncError::RegistryNotSet); + } + + let loaded_seeds = self.load_seeds(instance_name).await?; + let seeds: Vec<&LoadedSeed> = loaded_seeds.iter_seeds().collect(); + + for seed in seeds.iter().rev() { + use crate::seed::CacheStatus; + match seed.cache_status() { + CacheStatus::Uncacheable => { + log::debug!("cache pull: skipping uncacheable seed {}", seed.name()); + continue; + } + CacheStatus::Hit { reference } => { + log::info!( + "cache pull: {} already present locally at {reference}", + seed.name() + ); + return Ok(()); + } + CacheStatus::Miss { reference } => { + log::info!("cache pull: attempting {reference}"); + match self.backend.pull_image(reference).await { + Ok(()) => { + log::info!("cache pull: pulled {reference}"); + return Ok(()); + } + Err(ociman::backend::PullError::NotFound { .. }) => { + log::debug!("cache pull: {reference} not in registry, walking back"); + continue; + } + Err(error) => return Err(error.into()), + } + } + } + } + + log::info!("cache pull: no cached stage found in registry"); + Ok(()) + } + + /// Push all locally-cached stages to the configured registry. + /// + /// Iterates the seed chain in order and pushes every stage whose local + /// cache status is [`CacheStatus::Hit`](crate::seed::CacheStatus::Hit). + /// [`CacheStatus::Miss`](crate::seed::CacheStatus::Miss) and + /// [`CacheStatus::Uncacheable`](crate::seed::CacheStatus::Uncacheable) + /// stages are skipped. Aborts on the first push failure. + /// + /// # Errors + /// + /// Returns [`CacheSyncError::RegistryNotSet`] if the definition has no + /// `cache_registry` configured. + pub async fn push_cache( + &self, + instance_name: &crate::InstanceName, + ) -> Result<(), CacheSyncError> { + if self.cache_registry.is_none() { + return Err(CacheSyncError::RegistryNotSet); + } + + let loaded_seeds = self.load_seeds(instance_name).await?; + let mut pushed_any = false; + + for seed in loaded_seeds.iter_seeds() { + if let crate::seed::CacheStatus::Hit { reference } = seed.cache_status() { + log::info!("cache push: pushing {reference}"); + self.backend.push_image(reference).await?; + pushed_any = true; + } + } + + if !pushed_any { + log::info!("cache push: no locally cached stages to push"); + } + Ok(()) + } + pub async fn run_integration_server( &self, result_fd: std::os::fd::RawFd, diff --git a/pg-ephemeral/src/seed.rs b/pg-ephemeral/src/seed.rs index c73ed85b..19fe3822 100644 --- a/pg-ephemeral/src/seed.rs +++ b/pg-ephemeral/src/seed.rs @@ -1,6 +1,40 @@ use git_proc::Build; -type CacheKey = [u8; 32]; +type CacheKey = sha2::digest::Output; + +const PG_EPHEMERAL_COMPONENT: ociman::reference::PathComponent = + ociman::reference::PathComponent::from_static_or_panic("pg-ephemeral"); + +fn build_cache_reference( + cache_registry: Option<&ociman::reference::Name>, + instance_name: &crate::InstanceName, + cache_key: CacheKey, +) -> ociman::Reference { + let instance_component: ociman::reference::PathComponent = + instance_name.as_str().parse().unwrap(); + + let (domain, path) = match cache_registry { + Some(registry) => ( + registry.domain.clone(), + registry + .path + .clone() + .extended(PG_EPHEMERAL_COMPONENT.clone()) + .extended(instance_component), + ), + None => ( + None, + ociman::reference::Path::from(PG_EPHEMERAL_COMPONENT.clone()) + .extended(instance_component), + ), + }; + + ociman::Reference { + name: ociman::reference::Name { domain, path }, + tag: Some(cache_key.into()), + digest: None, + } +} #[derive(Clone, Debug, PartialEq)] pub enum CacheStatus { @@ -14,12 +48,11 @@ impl CacheStatus { cache_key: Option, backend: &ociman::Backend, instance_name: &crate::InstanceName, + cache_registry: Option<&ociman::reference::Name>, ) -> Self { match cache_key { Some(key) => { - let reference = format!("pg-ephemeral/{}:{}", instance_name, hex::encode(key)) - .parse() - .unwrap(); + let reference = build_cache_reference(cache_registry, instance_name, key); if backend.is_image_present(&reference).await { Self::Hit { reference } } else { @@ -259,6 +292,7 @@ impl Seed { hash_chain: &mut HashChain, backend: &ociman::Backend, instance_name: &crate::InstanceName, + cache_registry: Option<&ociman::reference::Name>, ) -> Result { match self { Seed::SqlFile { path } => { @@ -276,6 +310,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, name, @@ -316,6 +351,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, name, @@ -422,6 +458,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, cache_key_output, @@ -437,6 +474,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, name, @@ -451,6 +489,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, name, @@ -478,6 +517,7 @@ impl Seed { hash_chain.cache_key(), backend, instance_name, + cache_registry, ) .await, name, @@ -620,9 +660,7 @@ impl HashChain { fn cache_key(&self) -> Option { use sha2::Digest; - self.hasher - .as_ref() - .map(|hasher| hasher.clone().finalize().into()) + self.hasher.as_ref().map(|hasher| hasher.clone().finalize()) } fn stop(&mut self) { @@ -643,6 +681,7 @@ impl<'a> LoadedSeeds<'a> { seeds: &indexmap::IndexMap, backend: &ociman::Backend, instance_name: &crate::InstanceName, + cache_registry: Option<&ociman::reference::Name>, ) -> Result { let mut hash_chain = HashChain::new(); let mut loaded_seeds = Vec::new(); @@ -662,7 +701,13 @@ impl<'a> LoadedSeeds<'a> { for (name, seed) in seeds { let loaded_seed = seed - .load(name.clone(), &mut hash_chain, backend, instance_name) + .load( + name.clone(), + &mut hash_chain, + backend, + instance_name, + cache_registry, + ) .await?; loaded_seeds.push(loaded_seed); } diff --git a/pg-ephemeral/tests/base.rs b/pg-ephemeral/tests/base.rs index f73709d3..f517531b 100644 --- a/pg-ephemeral/tests/base.rs +++ b/pg-ephemeral/tests/base.rs @@ -5,7 +5,7 @@ async fn pull_test_images() { let backend = ociman::test_backend_setup!(); let default_image: ociman::image::Reference = (&pg_ephemeral::Image::default()).into(); - backend.pull_image(&default_image).await; + backend.pull_image(&default_image).await.unwrap(); for image in [ &*common::POSTGRES_IMAGE, @@ -13,7 +13,7 @@ async fn pull_test_images() { &*common::NODE_IMAGE, &*ociman::testing::ALPINE_LATEST_IMAGE, ] { - backend.pull_image(image).await; + backend.pull_image(image).await.unwrap(); } } @@ -69,6 +69,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -83,6 +84,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Podman, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -107,6 +109,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -121,6 +124,7 @@ fn test_config_file() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -135,6 +139,7 @@ fn test_config_file() { "tests/database.toml", &pg_ephemeral::config::InstanceDefinition { backend: Some(ociman::backend::Selection::Docker), + cache_registry: None, image: Some("18.0".parse().unwrap()), seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -153,6 +158,7 @@ fn test_config_file_no_explicit_instance() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -175,6 +181,7 @@ fn test_config_file_no_explicit_instance() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Podman, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -188,6 +195,7 @@ fn test_config_file_no_explicit_instance() { "tests/database_no_explicit_instance.toml", &pg_ephemeral::config::InstanceDefinition { backend: Some(ociman::backend::Selection::Podman), + cache_registry: None, image: Some("18.0".parse().unwrap()), seeds: indexmap::IndexMap::new(), ssl_config: None, @@ -218,6 +226,7 @@ fn test_config_ssl() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: Some(pg_ephemeral::definition::SslConfig::Generated { @@ -576,6 +585,7 @@ fn test_config_image_with_sha256_digest() { pg_ephemeral::Instance { application_name: None, backend: ociman::backend::Selection::Docker, + cache_registry: None, database: pg_client::Database::POSTGRES, seeds: indexmap::IndexMap::new(), ssl_config: None, diff --git a/pg-ephemeral/tests/cache.rs b/pg-ephemeral/tests/cache.rs index b6b31e85..113d899a 100644 --- a/pg-ephemeral/tests/cache.rs +++ b/pg-ephemeral/tests/cache.rs @@ -303,6 +303,129 @@ async fn test_cache_status_change_with_image() { assert_ne!(stdout2, stdout1); } +#[tokio::test] +async fn test_cache_registry_prefixes_reference_without_changing_hash() { + let _backend = ociman::test_backend_setup!(); + let dir = TestDir::new("cache-registry-test"); + + dir.write_file("schema.sql", "CREATE TABLE users (id INTEGER PRIMARY KEY);"); + + // Baseline: no cache_registry. + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + let baseline = run_pg_ephemeral(&["cache", "status", "--json"], &dir.path).await; + let baseline: serde_json::Value = serde_json::from_str(&baseline).unwrap(); + let baseline_reference = baseline["seeds"][0]["reference"].as_str().unwrap(); + assert!(baseline_reference.starts_with("pg-ephemeral/main:")); + + // Same config plus cache_registry. + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + cache_registry = "ghcr.io/mbj" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + let prefixed = run_pg_ephemeral(&["cache", "status", "--json"], &dir.path).await; + let prefixed: serde_json::Value = serde_json::from_str(&prefixed).unwrap(); + let prefixed_reference = prefixed["seeds"][0]["reference"].as_str().unwrap(); + + // The prefixed reference should be the baseline reference with the registry prepended, + // proving (a) the registry prefix is applied and (b) the hash is unaffected. + assert_eq!( + prefixed_reference, + format!("ghcr.io/mbj/{baseline_reference}") + ); +} + +async fn run_pg_ephemeral_expect_failure( + args: &[&str], + current_dir: &std::path::Path, +) -> (String, String) { + let pg_ephemeral_bin = env!("CARGO_BIN_EXE_pg-ephemeral"); + let output = cmd_proc::Command::new(pg_ephemeral_bin) + .arguments(args) + .working_directory(current_dir) + .stdout_capture() + .stderr_capture() + .accept_nonzero_exit() + .run() + .await + .unwrap(); + + assert!( + !output.status.success(), + "expected pg-ephemeral {} to fail, but it succeeded\nstdout:\n{}", + args.join(" "), + String::from_utf8_lossy(&output.stdout) + ); + + ( + String::from_utf8_lossy(&output.stdout).into_owned(), + String::from_utf8_lossy(&output.stderr).into_owned(), + ) +} + +#[tokio::test] +async fn test_cache_pull_without_registry_errors() { + let _backend = ociman::test_backend_setup!(); + let dir = TestDir::new("cache-pull-no-registry"); + + dir.write_file("schema.sql", "CREATE TABLE users (id INTEGER PRIMARY KEY);"); + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + + let (_stdout, stderr) = run_pg_ephemeral_expect_failure(&["cache", "pull"], &dir.path).await; + assert!( + stderr.contains("cache_registry must be set"), + "expected registry-not-set error, got stderr:\n{stderr}" + ); +} + +#[tokio::test] +async fn test_cache_push_without_registry_errors() { + let _backend = ociman::test_backend_setup!(); + let dir = TestDir::new("cache-push-no-registry"); + + dir.write_file("schema.sql", "CREATE TABLE users (id INTEGER PRIMARY KEY);"); + dir.write_file( + "database.toml", + indoc::indoc! {r#" + image = "17.1" + + [instances.main.seeds.schema] + type = "sql-file" + path = "schema.sql" + "#}, + ); + + let (_stdout, stderr) = run_pg_ephemeral_expect_failure(&["cache", "push"], &dir.path).await; + assert!( + stderr.contains("cache_registry must be set"), + "expected registry-not-set error, got stderr:\n{stderr}" + ); +} + #[tokio::test] async fn test_cache_status_chain_propagates() { let _backend = ociman::test_backend_setup!();