Files
openclaw/extensions/qa-lab/src/multipass.runtime.test.ts
Peter Steinberger bb46b79d3c refactor: internalize OpenClaw agent runtime (#85341)
* refactor: extract agent core package

Introduce packages/agent-core as the OpenClaw-owned home for reusable agent loop, harness, session, prompt, and runtime dependency contracts.

* refactor: extract shared llm runtime

Move provider model registries, stream wrappers, OAuth helpers, and LLM utilities into src/llm with plugin-sdk barrels instead of depending on the old embedded runtime layout.

* refactor: remove pi runtime internals

Rename remaining Pi-shaped agent surfaces to OpenClaw agent runtime names, delete obsolete Pi docs and package graph checks, and add the third-party notice for incorporated code.

* refactor: tighten agent session runtime

Make agent-core/runtime dependencies explicit, consolidate compaction and session transcript helpers, and move model/session helpers behind OpenClaw-owned contracts.

* refactor: remove static model and pi auth paths

Drop static model catalogs and Pi auth bridges, move model/provider facts to manifest-owned runtime contracts, and harden internal embedded-agent utilities.

* refactor: remove legacy provider compat paths

* docs: remove agent parity notes

* fix: skip provider wildcard metadata parsing

* refactor: share session extension sdk loading

* refactor: inline acpx proxy error formatter

* refactor: fold edit recovery into edit tool

* fix: accept extension batch separator

* test: align startup provider plugin expectations

* fix: restore provider-scoped release discovery

* test: align static asset packaging expectations

* fix: run static provider catalogs during scoped discovery

* fix: add provider entry catalogs for scoped live discovery

* fix: load lightweight provider catalog entries

* fix: refresh provider-scoped plugin metadata

* fix: keep provider catalog entries on release live path

* fix: keep static manifest models in release live checks

* fix: harden release model discovery

* fix: reduce OpenAI live cache probe reasoning

* fix: disable OpenAI cache probe reasoning

* ci: extend OpenAI gateway live timeout

* fix: extend live gateway model budget

* fix: stabilize release validation regressions

* fix: honor provider aliases in model rows

* fix: stabilize release validation lanes

* fix: stabilize release memory qa

* ci: stabilize release validation lanes

* ci: prefer ipv4 for live docker node calls

* fix: restore shared tool-call stream wrapper

* ci: remove legacy pi test shard alias

* fix: clean up embedded agent test drift

* fix: stabilize runtime alias status

* fix: clean up embedded agent ci drift

* fix: restore release ci invariants

* fix: clean up post-rebase runtime drift

* fix: restore release ci checks

* fix: restore release ci after rebase

* fix: remove stale pi runtime path

* test: align compaction runtime expectations

* test: update plugin prerelease expectations

* fix: handle claude live tool approvals

* fix: stabilize release validation gates

* fix: finish agent runtime import

* test: finish post-rebase agent runtime mocks

* fix: keep codex compaction native

* fix: stabilize codex app-server hook tests

* test: isolate codex diagnostic active run

* test: remove codex diagnostic completion race

# Conflicts:
#	extensions/codex/src/app-server/run-attempt.test.ts

* ci: fix full release manifest performance run id

* refactor: narrow llm plugin sdk boundary

* chore: drop generated google boundary stamps

* fix: repair rebase fallout

* fix: clean up rebased runtime references

* fix: decode codex jwt payloads as base64url

* fix: preserve shipped pi runtime alias

* fix: add scoped sdk virtual modules

* fix: decode llm codex oauth jwt as base64url

* fix: avoid stale vertex adc negative cache

* fix: harden tool arg decoding and codeql path

* fix: keep vertex adc negative checks live

* refactor: consolidate codex jwt and edit helpers

* fix: await codex oauth node runtime imports

* fix: preserve sdk tool and notice contracts

* fix: preserve shipped compat config boundaries

* fix: align codex oauth callback host

* fix: terminate agent-core loop streams on failure

* fix: keep codex oauth callback alive during fallback

* ci: include session tools in critical codeql scans

* fix: keep Cloudflare Anthropic provider auth header

* docs: redirect legacy pi runtime pages

* fix: honor bundled web provider compat discovery

* fix: protect session output spill files

* fix: keep legacy agent dir env blocked

* fix: contain auto-discovered skill symlinks

* fix: harden agent core sdk proxy surfaces

* fix: restore approval reaction sdk compat

* fix: keep live docker runs bounded

* fix: keep codex oauth redirect host aligned

* fix: resolve post-rebase agent runtime drift

* fix: redact anthropic oauth parse failures

* fix: preserve responses strict tool shaping

* fix: repair agent runtime rebase cleanup

* docs: redirect retired parity pages

* fix: bound auto-discovered resources to roots

* fix: repair post-rebase agent test drift

* fix: preserve bundled provider allowlist migration

* fix: preserve manifest-owned provider aliases

* fix: declare photon image dependency

* fix: keep provider headers out of proxy body

* fix: preserve shipped env aliases

* fix: refresh control ui i18n generated state

* fix: quote read fallback paths

* fix: preview edits through configured backend

* test: satisfy core test typecheck

* fix: preserve ZAI usage auth fallback

* test: repair codex diagnostic test

* fix: repair agent runtime rebase drift

* test: finish embedded runner import rename

* fix: repair agent runtime rebase integrations

* test: align compaction oauth fallback expectations

* fix: allow sdk-auth session models

* fix: update doctor tool schema import

* fix: preserve bedrock plugin region

* fix: stream harmony-like prose immediately

* ci: include session runtime in codeql shards

* fix: repair latest rebase integrations

* fix: honor explicit codex websocket transport

* fix: keep openai-compatible credentials provider-scoped

* fix: refresh sdk api baseline after rebase

* fix: route cli runtime aliases through openclaw harness

* test: rename stale harness mock expectation

* test: rename embedded agent overflow calls

* test: clean embedded auth test wording

* test: use openclaw stream types in deepinfra cache test

* fix: refresh sdk api baseline on latest main

* fix: honor bundled discovery compat allowlists

* fix: refresh sdk api baseline after latest rebase

* fix: remove stale rebase imports

* test: rename stale model catalog mock

* test: mock renamed doctor runtime modules

* fix: map canonical kimi env auth

* fix: use internal model registry in bench script

* fix: migrate deepinfra provider catalog entry

* fix: enforce builtin tool suppression

* fix: route compaction auth and proxy payloads safely

* refactor: prune unused llm registry leftovers

* test: update codex hooks session import

* test: fix model picker ci coverage

* test: align model picker auth mock types
2026-05-27 19:24:04 +01:00

293 lines
11 KiB
TypeScript

import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path";
import { afterEach, beforeEach, describe, expect, it, vi, type Mock } from "vitest";
const execFileMock = vi.hoisted(() => vi.fn());
function readRootPackageManager() {
const packageJson = JSON.parse(
fs.readFileSync(path.join(process.cwd(), "package.json"), "utf8"),
) as {
packageManager?: string;
};
return packageJson.packageManager;
}
vi.mock("node:child_process", async () => {
const actual = await vi.importActual<typeof import("node:child_process")>("node:child_process");
return {
...actual,
execFile: execFileMock,
};
});
import {
createQaMultipassPlan,
renderQaMultipassGuestScript,
runQaMultipass,
} from "./multipass.runtime.js";
describe("qa multipass runtime", () => {
beforeEach(() => {
vi.clearAllMocks();
});
afterEach(() => {
vi.unstubAllEnvs();
vi.restoreAllMocks();
});
it("rejects output directories outside the mounted repo root", () => {
expect(() =>
createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir: "/tmp/qa-out",
}),
).toThrow("qa suite --runner multipass requires --output-dir to stay under the repo root");
});
it("rejects repo-local symlink output directories that escape the repo root", () => {
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-multipass-"));
const repoRoot = path.join(tempRoot, "repo");
const outsideRoot = path.join(tempRoot, "outside");
const symlinkPath = path.join(repoRoot, "artifacts-link");
fs.mkdirSync(repoRoot, { recursive: true });
fs.mkdirSync(outsideRoot, { recursive: true });
fs.writeFileSync(
path.join(repoRoot, "package.json"),
JSON.stringify({ packageManager: "pnpm@10.32.1" }),
"utf8",
);
fs.symlinkSync(outsideRoot, symlinkPath);
try {
expect(() =>
createQaMultipassPlan({
repoRoot,
outputDir: path.join(symlinkPath, "qa-out"),
}),
).toThrow("qa suite --runner multipass requires --output-dir to stay under the repo root");
} finally {
fs.rmSync(tempRoot, { recursive: true, force: true });
}
});
it("reuses suite scenario semantics and resolves mounted artifact paths", () => {
const repoRoot = process.cwd();
const outputDir = path.join(repoRoot, ".artifacts", "qa-e2e", "multipass-test");
const plan = createQaMultipassPlan({
repoRoot,
outputDir,
});
expect(plan.outputDir).toBe(outputDir);
expect(plan.scenarioIds).toStrictEqual([]);
expect(plan.qaCommand).not.toContain("--scenario");
expect(plan.guestOutputDir).toBe("/workspace/openclaw-host/.artifacts/qa-e2e/multipass-test");
expect(plan.reportPath).toBe(path.join(outputDir, "qa-suite-report.md"));
expect(plan.summaryPath).toBe(path.join(outputDir, "qa-suite-summary.json"));
});
it("renders a guest script that runs the live qa suite by default", () => {
const plan = createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-test"),
scenarioIds: ["channel-chat-baseline", "thread-follow-up"],
});
const script = renderQaMultipassGuestScript(plan);
expect(script).toContain("pnpm install --frozen-lockfile");
expect(script).toContain("pnpm build");
expect(script).toContain(`corepack prepare '${readRootPackageManager()}' --activate`);
expect(script).toContain("'pnpm' 'openclaw' 'qa' 'suite' '--transport' 'qa-channel'");
expect(script).toContain("'--provider-mode' 'live-frontier'");
expect(script).toContain("'--scenario' 'channel-chat-baseline'");
expect(script).toContain("'--scenario' 'thread-follow-up'");
expect(script).toContain("/workspace/openclaw-host/.artifacts/qa-e2e/multipass-test");
});
it("carries live suite flags and forwarded auth env into the guest command", () => {
vi.stubEnv("OPENAI_API_KEY", "test-openai-key");
const plan = createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-live-test"),
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.5",
alternateModel: "openai/gpt-5.5",
fastMode: true,
scenarioIds: ["channel-chat-baseline"],
});
const script = renderQaMultipassGuestScript(plan);
expect(plan.qaCommand).toContain("--provider-mode");
expect(plan.qaCommand).toContain("live-frontier");
expect(plan.qaCommand).toContain("--model");
expect(plan.qaCommand).toContain("openai/gpt-5.5");
expect(plan.qaCommand).toContain("--alt-model");
expect(plan.qaCommand).toContain("--fast");
expect(plan.forwardedEnv.OPENAI_API_KEY).toBe("test-openai-key");
expect(script).toContain("OPENAI_API_KEY='test-openai-key'");
expect(script).toContain("'pnpm' 'openclaw' 'qa' 'suite' '--transport' 'qa-channel'");
expect(script).toContain("'--provider-mode' 'live-frontier'");
});
it("forwards --allow-failures into the guest qa suite command when requested", () => {
const plan = createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-allow-failures-test"),
allowFailures: true,
scenarioIds: ["channel-chat-baseline"],
});
expect(plan.qaCommand).toContain("--allow-failures");
});
it("forwards --runtime-pair into the guest qa suite command when requested", () => {
const plan = createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-runtime-pair-test"),
runtimePair: ["openclaw", "codex"],
scenarioIds: ["channel-chat-baseline"],
});
expect(plan.qaCommand).toEqual(expect.arrayContaining(["--runtime-pair", "openclaw,codex"]));
});
it("redacts forwarded live secrets in the persisted artifact script", () => {
vi.stubEnv("OPENAI_API_KEY", "test-openai-key");
const plan = createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-live-test"),
providerMode: "live-frontier",
scenarioIds: ["channel-chat-baseline"],
});
const redactedScript = renderQaMultipassGuestScript(plan, { redactSecrets: true });
expect(redactedScript).toContain("OPENAI_API_KEY='<redacted>'");
expect(redactedScript).not.toContain("OPENAI_API_KEY='test-openai-key'");
});
it("forwards live key list and numbered key env shapes", () => {
vi.stubEnv("OPENCLAW_LIVE_ANTHROPIC_KEYS", "anthropic-a anthropic-b");
vi.stubEnv("OPENCLAW_LIVE_CODEX_API_KEY", "codex-live");
vi.stubEnv("CODEX_API_KEY", "codex-direct");
vi.stubEnv("OPENAI_API_KEY_1", "openai-one");
vi.stubEnv("GEMINI_API_KEY_2", "gemini-two");
const plan = createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-live-test"),
providerMode: "live-frontier",
scenarioIds: ["channel-chat-baseline"],
});
expect(plan.forwardedEnv.OPENCLAW_LIVE_ANTHROPIC_KEYS).toBe("anthropic-a anthropic-b");
expect(plan.forwardedEnv.OPENCLAW_LIVE_CODEX_API_KEY).toBe("codex-live");
expect(plan.forwardedEnv.CODEX_API_KEY).toBe("codex-direct");
expect(plan.forwardedEnv.OPENAI_API_KEY_1).toBe("openai-one");
expect(plan.forwardedEnv.GEMINI_API_KEY_2).toBe("gemini-two");
});
it("skips stale CODEX_HOME values that do not exist on the host", () => {
vi.stubEnv("CODEX_HOME", "/tmp/does-not-exist-openclaw-codex-home");
const plan = createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-live-test"),
providerMode: "live-frontier",
});
expect(plan.forwardedEnv.CODEX_HOME).toBeUndefined();
expect(plan.hostCodexHomePath).toBeUndefined();
expect(plan.guestCodexHomePath).toBeUndefined();
});
it("falls back to os.homedir() when HOME is unset for CODEX_HOME discovery", () => {
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-multipass-home-"));
const fakeHome = path.join(tempRoot, "home");
const fakeCodexHome = path.join(fakeHome, ".codex");
fs.mkdirSync(fakeCodexHome, { recursive: true });
vi.stubEnv("HOME", "");
vi.stubEnv("CODEX_HOME", "");
vi.spyOn(os, "homedir").mockReturnValue(fakeHome);
try {
const plan = createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-live-test"),
providerMode: "live-frontier",
});
expect(plan.forwardedEnv.CODEX_HOME).toBe(fakeCodexHome);
expect(plan.hostCodexHomePath).toBe(fakeCodexHome);
expect(plan.guestCodexHomePath).toBe("/workspace/openclaw-codex-home");
} finally {
fs.rmSync(tempRoot, { recursive: true, force: true });
}
});
it("does not leave a temp guest transfer script behind when multipass is missing", async () => {
const outputDir = path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-missing-test");
vi.spyOn(Date, "now").mockReturnValue(1_717_171_717_171);
vi.spyOn(Math, "random").mockReturnValue(0.123456789);
(execFileMock as unknown as Mock).mockImplementation((...args: unknown[]) => {
const callback = args[3] as (error: Error | null, stdout: string, stderr: string) => void;
const error = new Error("spawn multipass ENOENT") as NodeJS.ErrnoException;
error.code = "ENOENT";
callback(error, "", "");
});
const expectedVmName = createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir,
scenarioIds: ["channel-chat-baseline"],
}).vmName;
const expectedTransferDir = path.join(
resolvePreferredOpenClawTmpDir(),
`${expectedVmName}-qa-suite-`,
);
await expect(
runQaMultipass({
repoRoot: process.cwd(),
outputDir,
scenarioIds: ["channel-chat-baseline"],
}),
).rejects.toThrow("Multipass is not installed on this host.");
const tempEntries = fs
.readdirSync(resolvePreferredOpenClawTmpDir())
.filter((entry) => entry.startsWith(path.basename(expectedTransferDir)));
expect(tempEntries).toStrictEqual([]);
fs.rmSync(outputDir, { recursive: true, force: true });
});
it("preserves non-install multipass probe failures", async () => {
const outputDir = path.join(
process.cwd(),
".artifacts",
"qa-e2e",
"multipass-probe-error-test",
);
(execFileMock as unknown as Mock).mockImplementation((...args: unknown[]) => {
const callback = args[3] as (error: Error | null, stdout: string, stderr: string) => void;
const error = new Error("multipassd is not running") as NodeJS.ErrnoException;
error.code = "EACCES";
callback(error, "", "multipassd is not running");
});
await expect(
runQaMultipass({
repoRoot: process.cwd(),
outputDir,
scenarioIds: ["channel-chat-baseline"],
}),
).rejects.toThrow("Unable to verify Multipass availability: multipassd is not running.");
fs.rmSync(outputDir, { recursive: true, force: true });
});
});