Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 144 additions & 2 deletions crates/pikahut/src/testing/scenarios/openclaw.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use std::fs;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::time::{Duration, SystemTime, UNIX_EPOCH};

use anyhow::{Context, Result, anyhow, bail};
Expand Down Expand Up @@ -142,6 +142,42 @@ fn write_openclaw_buildstamp(openclaw_dir: &Path, git_head: Option<&str>) -> Res
Ok(())
}

fn write_pnpm_shim(bin_dir: &Path) -> Result<PathBuf> {
fs::create_dir_all(bin_dir)
.with_context(|| format!("create OpenClaw tool shim dir {}", bin_dir.display()))?;
let shim_path = bin_dir.join("pnpm");
let script = "#!/bin/sh\nset -eu\nexec npx --yes pnpm@10 \"$@\"\n";
fs::write(&shim_path, script)
.with_context(|| format!("write OpenClaw pnpm shim {}", shim_path.display()))?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;

let mut perms = fs::metadata(&shim_path)?.permissions();
perms.set_mode(0o755);
fs::set_permissions(&shim_path, perms)?;
}
Ok(shim_path)
}

fn openclaw_runtime_env(context: &TestContext) -> Result<BTreeMap<String, String>> {
let mut env = BTreeMap::new();
if super::common::command_exists("pnpm") {
return Ok(env);
}

let shim_dir = context.state_dir().join("openclaw-tools/bin");
let _shim = write_pnpm_shim(&shim_dir)?;
let path = match std::env::var_os("PATH") {
Some(existing) if !existing.is_empty() => {
format!("{}:{}", shim_dir.display(), existing.to_string_lossy())
}
_ => shim_dir.to_string_lossy().to_string(),
};
env.insert("PATH".to_string(), path);
Ok(env)
}

fn ensure_openclaw_runtime_ready(
openclaw_dir: &Path,
runner: &CommandRunner,
Expand Down Expand Up @@ -294,6 +330,7 @@ pub async fn run_openclaw_e2e(args: OpenclawE2eRequest) -> Result<ScenarioRunOut

let runner = CommandRunner::new(&context);
let mut command_outcomes = Vec::new();
let openclaw_runtime_env = openclaw_runtime_env(&context)?;

let sidecar_cmd = if let Ok(binary) = std::env::var(PIKACHAT_BIN_ENV) {
binary
Expand Down Expand Up @@ -398,6 +435,7 @@ pub async fn run_openclaw_e2e(args: OpenclawE2eRequest) -> Result<ScenarioRunOut
let mut gateway = runner.spawn(
&CommandSpec::node()
.cwd(&openclaw_dir)
.envs(openclaw_runtime_env)
.env(
"OPENCLAW_STATE_DIR",
openclaw_state_dir.to_string_lossy().to_string(),
Expand Down Expand Up @@ -493,7 +531,27 @@ pub async fn run_openclaw_e2e(args: OpenclawE2eRequest) -> Result<ScenarioRunOut

#[cfg(test)]
mod tests {
use super::openclaw_runtime_needs_prebuild;
use std::fs;
use std::process::Command;

use super::{openclaw_runtime_needs_prebuild, write_pnpm_shim};

fn has_node() -> bool {
Command::new("node")
.arg("--version")
.status()
.map(|status| status.success())
.unwrap_or(false)
}

fn node_bin() -> String {
let output = Command::new("sh")
.args(["-c", "command -v node"])
.output()
.unwrap();
assert!(output.status.success());
String::from_utf8(output.stdout).unwrap().trim().to_string()
}

#[test]
fn runtime_needs_prebuild_when_dist_entry_is_missing() {
Expand Down Expand Up @@ -531,4 +589,88 @@ mod tests {
Some("new-head")
));
}

#[cfg(unix)]
#[test]
fn pnpm_shim_makes_openclaw_tsdown_build_fallback_work_without_real_pnpm() {
if !has_node() {
return;
}

let dir = tempfile::tempdir().unwrap();
let repo_dir = dir.path().join("openclaw");
let scripts_dir = repo_dir.join("scripts");
let fake_bin_dir = dir.path().join("fake-bin");
let shim_bin_dir = dir.path().join("shim-bin");
let npx_log = dir.path().join("npx.log");
let node_bin = node_bin();

fs::create_dir_all(&scripts_dir).unwrap();
fs::create_dir_all(&fake_bin_dir).unwrap();

fs::write(
scripts_dir.join("tsdown-build.mjs"),
r#"#!/usr/bin/env node
import { spawnSync } from "node:child_process";
const result = spawnSync(
"pnpm",
["exec", "tsdown", "--config-loader", "unrun", "--logLevel", "warn", ...process.argv.slice(2)],
{ stdio: "inherit", shell: process.platform === "win32" },
);
if (typeof result.status === "number") {
process.exit(result.status);
}
process.exit(1);
"#,
)
.unwrap();

fs::write(
fake_bin_dir.join("npx"),
format!(
"#!/bin/sh\nset -eu\nprintf '%s\n' \"$@\" > {}\n",
npx_log.display()
),
)
.unwrap();
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;

let mut perms = fs::metadata(fake_bin_dir.join("npx"))
.unwrap()
.permissions();
perms.set_mode(0o755);
fs::set_permissions(fake_bin_dir.join("npx"), perms).unwrap();
}

let failed = Command::new(&node_bin)
.arg("scripts/tsdown-build.mjs")
.arg("--no-clean")
.current_dir(&repo_dir)
.env("PATH", &fake_bin_dir)
.output()
.unwrap();
assert!(!failed.status.success());

write_pnpm_shim(&shim_bin_dir).unwrap();
let success = Command::new(&node_bin)
.arg("scripts/tsdown-build.mjs")
.arg("--no-clean")
.current_dir(&repo_dir)
.env(
"PATH",
format!("{}:{}", shim_bin_dir.display(), fake_bin_dir.display()),
)
.output()
.unwrap();
assert!(success.status.success());

let recorded = fs::read_to_string(&npx_log).unwrap();
assert!(recorded.contains("--yes"));
assert!(recorded.contains("pnpm@10"));
assert!(recorded.contains("exec"));
assert!(recorded.contains("tsdown"));
assert!(recorded.contains("--no-clean"));
}
}
18 changes: 13 additions & 5 deletions todos/tests-living-plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,18 @@ Working assumptions:
- `crates/pikahut/tests/support.rs` needed only a small enabling seam for this: a backend-fixture config writer that points `FfiApp` at the local agent API with the allowlist probe enabled, plus a narrow selector-only DB rewrite that maps the mocked provisioned agent identity onto a real relay-backed peer so the success path can open a real chat without hosted microVM infra
- the remaining lower/unowned edges in this family are the true guest-backed post-launch behavior after the chat opens: actual managed-agent key-package publication, first agent-authored reply semantics, and any hosted/runtime-specific launch failures beyond the mocked local spawner/control-plane contract
27. Slice 28 started the same ownership cleanup on startup/router/deep-link behavior:
- `crates/pikahut/tests/integration_deterministic.rs::chat_deep_link_opens_note_to_self_boundary` now gives this family one readable deterministic selector-owned contract: from the signed-in shell, a raw `pika://chat/<npub>` payload normalizes through Rust and lands in the intended note-to-self chat state
- `crates/pikahut/tests/support.rs` now drives that selector directly through `FfiApp`: create account, confirm the signed-in chat-list route, feed the raw deep-link payload through `normalize_peer_key`, open the chat, and verify the resulting composer/chat preview path works
- `rust/tests/app_flows.rs` now reads more honestly underneath that contract: `create_account_sets_logged_in_auth_and_chat_list_route` keeps the lower-level single-app auth/router state owner, while `create_chat_rejects_invalid_peer_key_without_navigation` keeps the invalid peer-key mapping/no-navigation semantics
- native overlap is now called out more honestly for this path: iOS/Android deep-link tests own URL/intent parsing and platform text-entry/scanner glue, not the canonical Rust-owned normalized deep-link chat-state contract
- the startup/router/deep-link family is materially cleaner now; the remaining caveat is intentional rather than blurry: cold-start OS delivery and callback plumbing still belong to native glue, not `pikahut`
- `crates/pikahut/tests/integration_deterministic.rs::chat_deep_link_opens_note_to_self_boundary` now gives this family one readable deterministic selector-owned contract: from the signed-in shell, a raw `pika://chat/<npub>` payload normalizes through Rust and lands in the intended note-to-self chat state
- `crates/pikahut/tests/support.rs` now drives that selector directly through `FfiApp`: create account, confirm the signed-in chat-list route, feed the raw deep-link payload through `normalize_peer_key`, open the chat, and verify the resulting composer/chat preview path works
- `rust/tests/app_flows.rs` now reads more honestly underneath that contract: `create_account_sets_logged_in_auth_and_chat_list_route` keeps the lower-level single-app auth/router state owner, while `create_chat_rejects_invalid_peer_key_without_navigation` keeps the invalid peer-key mapping/no-navigation semantics
- native overlap is now called out more honestly for this path: iOS/Android deep-link tests own URL/intent parsing and platform text-entry/scanner glue, not the canonical Rust-owned normalized deep-link chat-state contract
- the startup/router/deep-link family is materially cleaner now; the remaining caveat is intentional rather than blurry: cold-start OS delivery and callback plumbing still belong to native glue, not `pikahut`
28. Slice 29 used fresh GitHub Actions evidence to take the next live reliability problem instead of another ownership-only pass:
- the newest `check-fixture` failure on `ci-cleanup` was not a test flake at all; it failed immediately because the workflow was missing `PIKA_BUILD_SSH_HOST` / `PIKA_BUILD_SSH_KEY`, so it was excluded as config drift rather than a suite-quality target
- the recurring current test failure was `check-pikachat-openclaw-e2e`: recent failed runs (`gh run` IDs `23122543679` and `23122576279`) both died inside `integration_openclaw::openclaw_gateway_e2e` with the same bare `node scripts/tsdown-build.mjs --no-clean` exit and no captured stdout/stderr
- the actual mechanism was an environment split inside the OpenClaw gateway bootstrap, not a random relay or keypackage race: our scenario already falls back to `npx --yes pnpm@10 install/build` when `pnpm` is missing, but the later runtime build path re-entered upstream OpenClaw's `scripts/tsdown-build.mjs`, which hardcodes `pnpm exec tsdown ...` and exits `1` silently when bare `pnpm` is absent
- `crates/pikahut/src/testing/scenarios/openclaw.rs` now injects a tiny checked-in `pnpm` shim into the gateway environment only when the runner lacks real `pnpm`; the shim delegates to `npx --yes pnpm@10 "$@"`, so the later runtime build path uses the same fallback contract as the earlier scenario prep
- the new focused unit test `pnpm_shim_makes_openclaw_tsdown_build_fallback_work_without_real_pnpm` reproduces the exact silent failure shape under a PATH that has `npx` but no `pnpm`, then proves the shim makes the same `node scripts/tsdown-build.mjs --no-clean` call succeed
- this should materially reduce the recurring `check-pikachat-openclaw-e2e` instability without a broad OpenClaw harness rewrite; if the lane still flakes after this, the next likely issue is higher-level gateway/plugin startup timing rather than the current missing-`pnpm` runtime-build trap
## Progress Update

Completed on 2026-03-10:
Expand Down Expand Up @@ -413,6 +420,7 @@ Verified in the repo today:
- `rust/tests/app_flows.rs::paging_loads_older_messages_in_pages` has been replaced by the narrower `paging_reveals_older_messages_until_history_is_exhausted` smoke because the old exact-count assertion was the flaky part.
- We still need to watch for any renewed paging flakes below the `FfiApp` layer; if they recur, the next step should be the shared-runtime/core pagination owner, not another broad app-layer timeout tweak.
- `crates/pika-agent-microvm/src/lib.rs::openclaw_autostart_reports_keypackage_publish_timeout_separately_from_service_timeout` now stays on the real OpenClaw health-probe path with a short deterministic keypackage timeout window; if it recurs, the remaining issue is likely generic autostart-script test harness timing, not the OpenClaw-specific timeout-kind split.
- `crates/pikahut/tests/integration_openclaw.rs::openclaw_gateway_e2e` had a distinct live CI failure mode after the autostart-timeout fix: on runners without bare `pnpm`, the gateway's later `node scripts/tsdown-build.mjs --no-clean` runtime-build path could still die silently even though our earlier prep used `npx pnpm@10`. Slice 29 now injects a checked-in `pnpm` shim into the gateway PATH to close that gap; if the lane still flakes, the next suspect is gateway/plugin startup timing above the build-tool layer.

11. We now have a canonical fast local smoke layer for catching common CI failures before full lanes run.
- The supported habitual command is `just pre-commit`.
Expand Down
Loading