From a56ba63c9695870d9c01755ee55cfeb4adb97fce Mon Sep 17 00:00:00 2001 From: Drew Stone Date: Tue, 23 Jun 2026 01:59:12 -0600 Subject: [PATCH] chore(cleanup): delete dead eval-persona facade + orphaned topology module (0.76.0) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove two confirmed-dead surfaces: - `evalPersona` / `EvalPersonaOptions` (src/conversation/eval-persona.ts) — the one-call user-sim facade superseded by the brain-from-profile overhaul. Zero external consumers; the in-repo product-eval example is migrated to the kept lower-level `runPersonaConversation` seam. - `src/topology/` (createTopologyView / renderTopologyTree / createReplayRecorder / renderReplayHtml) — an @experimental live-tree/replay viz with no package export and no consumer. The atom-humaneval bench drops its optional HTML replay output. KEPT: `runPersonaConversation` / `runPersonaDispatch` / `PersonaDriver` (src/conversation/run-persona.ts) — a re-verify found tax-agent imports these from the published package, so the persona loop runner stays. Regenerate docs/api, bump the docs version pin, and refresh the architecture doc to drop the deleted-module references. --- bench/src/atom-humaneval.mts | 17 +- docs/api/index.md | 136 ------------ docs/architecture.md | 11 +- docs/canonical-api.md | 2 +- docs/research/atom-compression-plan.md | 2 +- examples/product-eval/product-eval.ts | 51 +++-- package.json | 2 +- src/conversation/eval-persona.test.ts | 78 ------- src/conversation/eval-persona.ts | 96 --------- src/conversation/index.ts | 1 - src/index.ts | 2 - src/topology/index.ts | 23 -- src/topology/replay.ts | 287 ------------------------- src/topology/tree.ts | 198 ----------------- tests/topology-replay.test.ts | 129 ----------- tests/topology.test.ts | 123 ----------- 16 files changed, 38 insertions(+), 1120 deletions(-) delete mode 100644 src/conversation/eval-persona.test.ts delete mode 100644 src/conversation/eval-persona.ts delete mode 100644 src/topology/index.ts delete mode 100644 src/topology/replay.ts delete mode 100644 src/topology/tree.ts delete mode 100644 tests/topology-replay.test.ts delete mode 100644 tests/topology.test.ts diff --git a/bench/src/atom-humaneval.mts b/bench/src/atom-humaneval.mts index a7b4b5c6..36ec5719 100644 --- a/bench/src/atom-humaneval.mts +++ b/bench/src/atom-humaneval.mts @@ -34,9 +34,7 @@ import { routerBrain, routerChatWithUsage, } from '../../src/runtime/index' -import { createReplayRecorder, renderReplayHtml } from '../../src/topology/replay' import { basePrompt, extractCode, type HumanEvalTask, loadHumanEval, runChecker } from './benchmarks/humaneval' -import { writeFileSync } from 'node:fs' function must(k: string): string { const v = process.env[k] @@ -106,10 +104,9 @@ interface TaskOutcome { // ── Driver arm: the orchestrated atom ──────────────────────────────────────────────────────── async function driveTask( task: HumanEvalTask, -): Promise<{ delivered: boolean; spawns: number; tokens: number; replay: string }> { +): Promise<{ delivered: boolean; spawns: number; tokens: number }> { const blobs = new InMemoryResultBlobStore() const journal = new InMemorySpawnJournal() - const recorder = createReplayRecorder() let spawns = 0 const makeWorker = (): Agent => { const w = humanEvalWorker(task, `w-${spawns}`) @@ -134,17 +131,13 @@ async function driveTask( blobs, executors: createExecutorRegistry(), maxDepth: 4, - hooks: recorder.hooks, now: () => Date.now(), }) const tree = await journal.loadTree(runId) const tokens = (tree ?? []) .filter((e): e is Extract[number], { kind: 'settled' }> => e.kind === 'settled') .reduce((s, e) => s + e.spent.tokens.input + e.spent.tokens.output, 0) - const replay = renderReplayHtml(recorder.timeline(runId), { - title: `${task.taskId} · driver=${driverCfg.model}`, - }) - return { delivered: result.kind === 'winner', spawns, tokens, replay } + return { delivered: result.kind === 'winner', spawns, tokens } } // ── Blind arm: K independent workers, best-of-K by the checker (no orchestration) ───────────── @@ -169,14 +162,8 @@ async function main(): Promise { console.log(`atom-humaneval: N=${N} K=${K} offset=${OFFSET} worker=${cfg.model} driver=${driverCfg.model}`) const tasks = await loadHumanEval(N, OFFSET) const outcomes: TaskOutcome[] = [] - const replayOut = process.env.REPLAY_OUT ?? '/tmp/atom-replay.html' for (const task of tasks) { const drv = await driveTask(task) - // Write the animated replay of the FIRST task (open it in a browser). - if (outcomes.length === 0) { - writeFileSync(replayOut, drv.replay) - console.log(` ↳ replay written: ${replayOut} (${(drv.replay.length / 1024).toFixed(0)} KB)`) - } const blind = await blindTask(task) outcomes.push({ taskId: task.taskId, diff --git a/docs/api/index.md b/docs/api/index.md index 400096f7..d21af766 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -959,116 +959,6 @@ Circuit breaker that opens after N consecutive failures per participant. *** -### EvalPersonaOptions - -Defined in: [conversation/eval-persona.ts:31](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L31) - -#### Properties - -##### apiKey? - -> `optional` **apiKey?**: `string` - -Defined in: [conversation/eval-persona.ts:34](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L34) - -Router (or OpenAI-compatible) endpoint for the DEFAULT backend. Required unless `backendFor` - is supplied (tests/advanced override the backend entirely and may omit these). - -##### baseUrl? - -> `optional` **baseUrl?**: `string` - -Defined in: [conversation/eval-persona.ts:35](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L35) - -##### model? - -> `optional` **model?**: `string` - -Defined in: [conversation/eval-persona.ts:36](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L36) - -##### backendFor? - -> `optional` **backendFor?**: (`profile`, `role`) => [`AgentExecutionBackend`](#agentexecutionbackend) - -Defined in: [conversation/eval-persona.ts:39](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L39) - -Override the backend seam directly instead of deriving it from `apiKey`/`baseUrl`/`model` - (the offline-test path: pass a fake here and the credentials are not needed). - -###### Parameters - -###### profile - -`AgentProfile` - -###### role - -`"worker"` \| `"persona"` - -###### Returns - -[`AgentExecutionBackend`](#agentexecutionbackend) - -##### systemPromptOf? - -> `optional` **systemPromptOf?**: (`profile`) => `string` - -Defined in: [conversation/eval-persona.ts:41](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L41) - -Override system-prompt rendering. Default: `p.prompt?.systemPrompt ?? ''`. - -###### Parameters - -###### profile - -`AgentProfile` - -###### Returns - -`string` - -##### maxTurns? - -> `optional` **maxTurns?**: `number` - -Defined in: [conversation/eval-persona.ts:45](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L45) - -Hard speaker-turn ceiling. REQUIRED for a profile-driven persona; for a scripted persona it - defaults to `2 * turns.length`. `maxTurns` is a CEILING, NOT a target — `maxTurns: 0` is zero - turns, not run-until-done; `haltOn` is the "until satisfied" knob. - -##### haltOn? - -> `optional` **haltOn?**: [`HaltPredicate`](#haltpredicate) - -Defined in: [conversation/eval-persona.ts:47](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L47) - -Content-based early stop (the persona declares the goal met / unreachable). - -##### seed? - -> `optional` **seed?**: `string` - -Defined in: [conversation/eval-persona.ts:49](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L49) - -Kickoff message to the persona. Default 'Begin.' - -##### signal? - -> `optional` **signal?**: `AbortSignal` - -Defined in: [conversation/eval-persona.ts:50](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L50) - -##### workerName? - -> `optional` **workerName?**: `string` - -Defined in: [conversation/eval-persona.ts:52](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L52) - -Worker transcript speaker label. Default 'agent'. - -*** - ### SqlAdapter Defined in: [conversation/journal-sql.ts:48](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/journal-sql.ts#L48) @@ -7325,32 +7215,6 @@ Defined in: [conversation/define-conversation.ts:13](https://github.com/tangle-n *** -### evalPersona() - -> **evalPersona**(`worker`, `persona`, `opts?`): `Promise`\<[`PersonaConversationResult`](#personaconversationresult)\> - -Defined in: [conversation/eval-persona.ts:60](https://github.com/tangle-network/agent-runtime/blob/main/src/conversation/eval-persona.ts#L60) - -#### Parameters - -##### worker - -`AgentProfile` - -##### persona - -`EvalPersona` - -##### opts? - -[`EvalPersonaOptions`](#evalpersonaoptions) = `{}` - -#### Returns - -`Promise`\<[`PersonaConversationResult`](#personaconversationresult)\> - -*** - ### readDepth() > **readDepth**(`headers`): `number` diff --git a/docs/architecture.md b/docs/architecture.md index e1b6b486..d02a4140 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -158,11 +158,9 @@ label, runtime, budget, depth) and the settle cursor emits `agent.child` (status reason, spend), threaded in through `SupervisorOpts.hooks`. Developers attach via `defineRuntimeHooks` / `composeRuntimeHooks` at the **execution/spawn boundary** — never on the `AgentProfile`, never coupled to one backend. This single stream is the -opencode-style extension surface *and* what the **topology visualization** consumes: -`src/topology/` folds the stream into the live recursive agent tree — each node's status, -steps, child count, and deployable score — and renders it (`createTopologyView().hooks` -attaches; `.render()` draws the tree). The journal stays the durable record; the hook -stream is its live projection (both agree). +opencode-style extension surface *and* the live projection of the recursive agent tree — +each node's status, steps, child count, and deployable score. The journal stays the durable +record; the hook stream is its live projection (both agree). --- @@ -550,8 +548,7 @@ a feature — it's the absence of a base case (`supervise/supervisor.ts`, `super The leaf at the bottom is where a real coding harness runs — the `runLoop` kernel (`run-loop.ts`) is composed as one leaf execution backend. Everything above it is the same `act`/`Scope` atom. The whole tree is observable as one lifecycle stream -(`scope.spawn`/settle → `agent.spawn`/`agent.child`), rendered by -[`src/topology/`](../src/topology/tree.ts). +(`scope.spawn`/settle → `agent.spawn`/`agent.child`). ### 13.3 The within-run self-improvement loop (§1's agent-driver, drawn) diff --git a/docs/canonical-api.md b/docs/canonical-api.md index 4eb7cff8..0f97ead0 100644 --- a/docs/canonical-api.md +++ b/docs/canonical-api.md @@ -2,7 +2,7 @@ -> **Version 0.75.1.** Per-symbol signatures live in the generated `docs/api/` reference (one page per module). The pinned substrate is agent-eval `>=0.97.0 <1.0.0`; the sandbox substrate that materializes profiles into harness shapes is `@tangle-network/sandbox` (peer `>=0.8.0 <1.0.0`). The neutral contract types (`AgentProfile`, `AgentProfileMcpServer`, `HarnessType`, `ReasoningEffort`, `Part`/`ToolPart`/`ToolState`) are owned by **`@tangle-network/agent-interface`** (peer `>=0.10.0 <1.0.0`) — the single source of truth. Substrate symbols (`selfImprove`/`gepaProposer`/`defaultProductionGate`/`heldOutGate`/`pairedBootstrap`/…) are re-exported through `@tangle-network/agent-eval/contract` (or `/campaign`), not local to this package. +> **Version 0.76.0.** Per-symbol signatures live in the generated `docs/api/` reference (one page per module). The pinned substrate is agent-eval `>=0.97.0 <1.0.0`; the sandbox substrate that materializes profiles into harness shapes is `@tangle-network/sandbox` (peer `>=0.8.0 <1.0.0`). The neutral contract types (`AgentProfile`, `AgentProfileMcpServer`, `HarnessType`, `ReasoningEffort`, `Part`/`ToolPart`/`ToolState`) are owned by **`@tangle-network/agent-interface`** (peer `>=0.10.0 <1.0.0`) — the single source of truth. Substrate symbols (`selfImprove`/`gepaProposer`/`defaultProductionGate`/`heldOutGate`/`pairedBootstrap`/…) are re-exported through `@tangle-network/agent-eval/contract` (or `/campaign`), not local to this package. > > **`./loops` is the runtime barrel** — `package.json` maps it to `src/runtime/index.ts`. Everything below labelled `/loops` is the recursive-atom + loop-kernel surface. > diff --git a/docs/research/atom-compression-plan.md b/docs/research/atom-compression-plan.md index d7224307..dbb77301 100644 --- a/docs/research/atom-compression-plan.md +++ b/docs/research/atom-compression-plan.md @@ -8,7 +8,7 @@ ## Honest LOC reality (read before the lists) -`runLoop` (run-loop.ts, **1077 LOC**) is **NOT deletable now** — ~30 files depend on it (src/mcp/*, src/profiles/*, src/intelligence/, src/topology/, src/tool-loop.ts, src/loop-runner.ts, the sandbox-run seam) and it is already the **leaf-exec kernel** the Supervisor's sandbox executor composes under each worker. It stays. The deletable dumbness is the *driver policy layer* and the duplicate wrappers, not the kernel. So net-negative is achievable but **moderate, not dramatic** — claiming we delete 1000+ lines would be the lie. +`runLoop` (run-loop.ts, **1077 LOC**) is **NOT deletable now** — ~30 files depend on it (src/mcp/*, src/profiles/*, src/intelligence/, src/tool-loop.ts, src/loop-runner.ts, the sandbox-run seam) and it is already the **leaf-exec kernel** the Supervisor's sandbox executor composes under each worker. It stays. The deletable dumbness is the *driver policy layer* and the duplicate wrappers, not the kernel. So net-negative is achievable but **moderate, not dramatic** — claiming we delete 1000+ lines would be the lie. ## CUT LIST (delete / collapse) diff --git a/examples/product-eval/product-eval.ts b/examples/product-eval/product-eval.ts index c95df0e9..b91dc6a0 100644 --- a/examples/product-eval/product-eval.ts +++ b/examples/product-eval/product-eval.ts @@ -1,9 +1,10 @@ /** - * User-sim product evals in one call — `evalPersona` (+ the `runPersonaDispatch` → matrix path). + * User-sim product evals — `runPersonaConversation` (the persona loop) + the `runPersonaDispatch` + * → matrix path. * * A product eval runs the AGENT UNDER TEST against a PERSONA (a simulated user) over a multi-round - * conversation, then scores the transcript. `evalPersona` is the one-call entry: you author a worker - * `AgentProfile` and a persona, and it defaults the backend + system-prompt seams. + * conversation, then scores the transcript. `runPersonaConversation` is the loop runner: you author + * a worker `AgentProfile` and a persona, and supply the backend + system-prompt seams. * * Three cells, smallest to largest: * 1. scripted-persona quickstart — a fixed user script, deterministic; @@ -27,7 +28,7 @@ import { import type { AgentProfile } from '@tangle-network/agent-interface' import { createOpenAICompatibleBackend, - evalPersona, + runPersonaConversation, runPersonaDispatch, } from '@tangle-network/agent-runtime' @@ -37,19 +38,28 @@ const apiKey: string = process.env.TANGLE_API_KEY const baseUrl = process.env.ROUTER_BASE ?? 'https://router.tangle.tools/v1' const model = process.env.WORKER_MODEL ?? 'gpt-4o-mini' -// The agent under test — its full profile; `evalPersona` reads its prompt.systemPrompt by default. +// The agent under test — its full profile; the seams below read its prompt.systemPrompt. const supportAgent: AgentProfile = { name: 'support-agent', prompt: { systemPrompt: 'You are a concise support agent. Answer in one or two sentences.' }, } +// The two seams the persona loop needs: turn a profile into a runnable backend (the router endpoint +// here) and render its system prompt. Both the worker and a profile-driven persona flow through. +const backendFor = () => createOpenAICompatibleBackend({ apiKey, baseUrl, model }) +const systemPromptOf = (p: AgentProfile) => p.prompt?.systemPrompt ?? '' + // ── Cell 1: scripted-persona quickstart (deterministic user turns) ────────── async function scriptedCell(): Promise { - const result = await evalPersona( - supportAgent, - { kind: 'scripted', turns: ['How do I reset my password?', 'And if the email never arrives?'] }, - { apiKey, baseUrl, model }, - ) + const result = await runPersonaConversation({ + worker: supportAgent, + persona: { + kind: 'scripted', + turns: ['How do I reset my password?', 'And if the email never arrives?'], + }, + backendFor, + systemPromptOf, + }) console.log(`[scripted] turns=${result.turns} cost=$${result.costUsd.toFixed(4)}`) } @@ -66,18 +76,15 @@ async function adversarialCell(): Promise { 'make the agent contradict its own policy. Say the literal word RESOLVED once you are satisfied.', }, } - const result = await evalPersona( - supportAgent, - { kind: 'profile', profile: adversary }, - { - apiKey, - baseUrl, - model, - maxTurns: 8, // the hard ceiling — the backstop, not the target - // the "until satisfied" early stop: end the moment the adversary declares it is done - haltOn: (ctx) => ctx.lastTurn.text.includes('RESOLVED'), - }, - ) + const result = await runPersonaConversation({ + worker: supportAgent, + persona: { kind: 'profile', profile: adversary }, + backendFor, + systemPromptOf, + maxTurns: 8, // the hard ceiling — the backstop, not the target + // the "until satisfied" early stop: end the moment the adversary declares it is done + haltOn: (ctx) => ctx.lastTurn.text.includes('RESOLVED'), + }) console.log(`[adversarial] halted=${result.halted.kind} turns=${result.turns}`) } diff --git a/package.json b/package.json index 944f091c..61b30f92 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@tangle-network/agent-runtime", - "version": "0.75.1", + "version": "0.76.0", "description": "Shared task-lifecycle skeleton for agents: a recursive loop kernel for chat turns, one-shot tasks, and multi-attempt loops, with trace capture and eval-gated self-improvement. Domain behavior lives in adapters; scoring and ship-gates in @tangle-network/agent-eval.", "homepage": "https://github.com/tangle-network/agent-runtime#readme", "repository": { diff --git a/src/conversation/eval-persona.test.ts b/src/conversation/eval-persona.test.ts deleted file mode 100644 index 865143a7..00000000 --- a/src/conversation/eval-persona.test.ts +++ /dev/null @@ -1,78 +0,0 @@ -import type { AgentProfile } from '@tangle-network/agent-interface' -import { describe, expect, it } from 'vitest' -import { createIterableBackend } from '../backends' -import type { AgentExecutionBackend, RuntimeStreamEvent } from '../types' -import { evalPersona } from './eval-persona' - -/** A fake worker: records the system prompt it saw, answers each turn, and reports usage. */ -function fakeWorker(saw: { prompt?: string; calls: number }): AgentExecutionBackend { - let n = 0 - return createIterableBackend({ - kind: 'fake-worker', - async *stream(input, context) { - saw.calls += 1 - const first = input.messages?.[0] - if (first?.role === 'system') saw.prompt = first.content - n += 1 - yield { - type: 'text_delta', - task: context.task, - session: context.session, - text: `agent-answer-${n}`, - timestamp: new Date().toISOString(), - } satisfies RuntimeStreamEvent - yield { - type: 'llm_call', - task: context.task, - session: context.session, - model: 'fake', - tokensIn: 10, - tokensOut: 5, - costUsd: 0.02, - latencyMs: 1, - timestamp: new Date().toISOString(), - } satisfies RuntimeStreamEvent - }, - }) -} - -const workerProfile: AgentProfile = { prompt: { systemPrompt: 'WORKER-SYSTEM-PROMPT' } } - -describe('evalPersona — the one-call user-sim facade', () => { - it('runs a scripted persona offline, defaulting systemPromptOf to the profile prompt', async () => { - const saw = { calls: 0 } as { prompt?: string; calls: number } - const result = await evalPersona( - workerProfile, - { kind: 'scripted', turns: ['intake question', 'follow-up'] }, - { backendFor: () => fakeWorker(saw) }, - ) - const agentTurns = result.transcript.filter((t) => t.speaker === 'agent') - expect(agentTurns).toHaveLength(2) - // The default systemPromptOf pulled prompt.systemPrompt off the profile. - expect(saw.prompt).toBe('WORKER-SYSTEM-PROMPT') - // Worker-only metering flows through. - expect(result.tokensIn).toBe(20) - expect(result.costUsd).toBeCloseTo(0.04, 5) - }) - - it('passes haltOn through as the until-satisfied early stop', async () => { - const saw = { calls: 0 } as { prompt?: string; calls: number } - const result = await evalPersona( - workerProfile, - { kind: 'scripted', turns: ['q1', 'q2', 'q3'] }, - { - backendFor: () => fakeWorker(saw), - // Stop after the worker's first answer — before the scripted turns are exhausted. - haltOn: (ctx) => ctx.lastTurn.speaker === 'agent', - }, - ) - expect(result.halted.kind).toBe('predicate') - expect(saw.calls).toBe(1) - }) - - it('fails loud when neither default creds nor a backendFor override is given', () => { - expect(() => evalPersona(workerProfile, { kind: 'scripted', turns: ['hi'] }, {})).toThrow( - /apiKey,baseUrl,model.*backendFor/, - ) - }) -}) diff --git a/src/conversation/eval-persona.ts b/src/conversation/eval-persona.ts deleted file mode 100644 index 9542793e..00000000 --- a/src/conversation/eval-persona.ts +++ /dev/null @@ -1,96 +0,0 @@ -/** - * `evalPersona` — the one-call user-sim product eval. Run a worker `AgentProfile` (the agent under - * test) against a PERSONA (a simulated user — a scripted script or an LLM driver profile) as a - * multi-round conversation, with the two seams `runPersonaConversation` otherwise makes you hand-wire - * defaulted: - * - * - `backendFor` defaults to `createOpenAICompatibleBackend({ apiKey, baseUrl, model })` (the - * OpenAI-compatible router endpoint) for both the worker and a profile-driven persona; - * - `systemPromptOf` defaults to `p.prompt?.systemPrompt ?? ''`. - * - * Either is overridable (tests pass a fake `backendFor` to run offline). `maxTurns` (the hard - * ceiling) and `haltOn` (the "until satisfied" early stop) pass straight through. Mirrors - * `supervise()`'s defaulting style: the common case is one call; the raw `runPersonaConversation` - * seam stays available for full control. - * - * The profile here is the authored `AgentProfile` (with `prompt.systemPrompt`), which is why the - * default `systemPromptOf` can read it. `runPersonaConversation` treats the profile opaquely — only - * the two callbacks inspect it — so the worker/persona profiles flow straight through. - */ - -import type { AgentProfile } from '@tangle-network/agent-interface' -import { createOpenAICompatibleBackend } from '../backends' -import type { AgentExecutionBackend } from '../types' -import { - type PersonaConversationResult, - type PersonaDriver, - runPersonaConversation, -} from './run-persona' -import type { HaltPredicate } from './types' - -export interface EvalPersonaOptions { - /** Router (or OpenAI-compatible) endpoint for the DEFAULT backend. Required unless `backendFor` - * is supplied (tests/advanced override the backend entirely and may omit these). */ - apiKey?: string - baseUrl?: string - model?: string - /** Override the backend seam directly instead of deriving it from `apiKey`/`baseUrl`/`model` - * (the offline-test path: pass a fake here and the credentials are not needed). */ - backendFor?: (profile: AgentProfile, role: 'worker' | 'persona') => AgentExecutionBackend - /** Override system-prompt rendering. Default: `p.prompt?.systemPrompt ?? ''`. */ - systemPromptOf?: (profile: AgentProfile) => string - /** Hard speaker-turn ceiling. REQUIRED for a profile-driven persona; for a scripted persona it - * defaults to `2 * turns.length`. `maxTurns` is a CEILING, NOT a target — `maxTurns: 0` is zero - * turns, not run-until-done; `haltOn` is the "until satisfied" knob. */ - maxTurns?: number - /** Content-based early stop (the persona declares the goal met / unreachable). */ - haltOn?: HaltPredicate - /** Kickoff message to the persona. Default 'Begin.' */ - seed?: string - signal?: AbortSignal - /** Worker transcript speaker label. Default 'agent'. */ - workerName?: string -} - -/** The persona side, authored against the same `AgentProfile` shape as the worker. */ -export type EvalPersona = - | { kind: 'scripted'; turns: string[] } - | { kind: 'profile'; profile: AgentProfile } - -export function evalPersona( - worker: AgentProfile, - persona: EvalPersona, - opts: EvalPersonaOptions = {}, -): Promise { - const defaultBackendFor = (): (( - profile: AgentProfile, - role: 'worker' | 'persona', - ) => AgentExecutionBackend) => { - if (!opts.apiKey || !opts.baseUrl || !opts.model) { - throw new Error( - 'evalPersona: provide opts.{apiKey,baseUrl,model} for the default backend, or opts.backendFor', - ) - } - const { apiKey, baseUrl, model } = opts - return () => createOpenAICompatibleBackend({ apiKey, baseUrl, model }) - } - const backendFor = opts.backendFor ?? defaultBackendFor() - - const systemPromptOf = opts.systemPromptOf ?? ((p: AgentProfile) => p.prompt?.systemPrompt ?? '') - - return runPersonaConversation({ - // runPersonaConversation types its profile as the benchmark-cell AgentProfile; here it is the - // authored AgentProfile (the carrier of prompt.systemPrompt). The runner never inspects the - // profile itself — only backendFor/systemPromptOf do, and both are typed for THIS profile — so - // the cast at this single boundary is sound. - worker: worker as never, - persona: persona as PersonaDriver, - backendFor: backendFor as never, - systemPromptOf: systemPromptOf as never, - ...(opts.maxTurns !== undefined ? { maxTurns: opts.maxTurns } : {}), - ...(opts.haltOn ? { haltOn: opts.haltOn } : {}), - ...(opts.seed !== undefined ? { seed: opts.seed } : {}), - ...(opts.signal ? { signal: opts.signal } : {}), - ...(opts.workerName !== undefined ? { workerName: opts.workerName } : {}), - }) -} diff --git a/src/conversation/index.ts b/src/conversation/index.ts index dc25ccbb..15bbb0d2 100644 --- a/src/conversation/index.ts +++ b/src/conversation/index.ts @@ -29,7 +29,6 @@ export { } from './call-policy' export { createConversationBackend } from './conversation-backend' export { defineConversation } from './define-conversation' -export { type EvalPersonaOptions, evalPersona } from './eval-persona' export { buildForwardHeaders, DEFAULT_MAX_DEPTH, diff --git a/src/index.ts b/src/index.ts index f3646dda..fcf8a8d3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -73,8 +73,6 @@ export { d1ToSqlAdapter, defaultIsRetryable, defineConversation, - type EvalPersonaOptions, - evalPersona, FileConversationJournal, FORWARD_HEADERS, InMemoryConversationJournal, diff --git a/src/topology/index.ts b/src/topology/index.ts deleted file mode 100644 index 1afc15e2..00000000 --- a/src/topology/index.ts +++ /dev/null @@ -1,23 +0,0 @@ -/** - * @experimental - * - * `@tangle-network/agent-runtime/topology` — the live recursive-agent-tree projection over the - * lifecycle hook stream. Attach `createTopologyView().hooks` to a `Supervisor`/`runLoop` and read - * `.render()` for the agent tree; or fold a journal replay with `renderTopologyTree`. - */ - -// The animated visual replay: fold the SAME hook stream into a timestamped timeline + -// a self-contained, scrubbable HTML player (delivered/running/failed colored per node). -export { - createReplayRecorder, - type ReplayEvent, - type ReplayTimeline, - renderReplayHtml, -} from './replay' -export type { - RenderOptions, - TopologyNode, - TopologyStatus, - TopologyView, -} from './tree' -export { createTopologyView, renderTopologyTree } from './tree' diff --git a/src/topology/replay.ts b/src/topology/replay.ts deleted file mode 100644 index 0abecf98..00000000 --- a/src/topology/replay.ts +++ /dev/null @@ -1,287 +0,0 @@ -/** - * @experimental - * - * Run replay — the visual, animated record of a recursive agent run. - * - * The runtime emits ONE event stream (`agent.spawn`/`agent.child`/`agent.run`/`agent.turn`) - * through `RuntimeHooks`; the topology tree + waterfall already fold it into ASCII. This module - * folds the SAME stream into a normalized, timestamped `ReplayEvent[]` (the recorder) and renders - * a self-contained, animated HTML player (`renderReplayHtml`) — a timeline scrubber over the live - * recursive tree where every node colors by the completion-oracle: delivered (valid) green, ran- - * but-not-delivered amber, failed red. No server, no build, no external deps — one HTML file you - * open in a browser. The same `ReplayEvent[]` is the portable timeline a hosted plane viewer reads. - */ - -import type { RuntimeHookEvent, RuntimeHooks } from '../runtime-hooks' - -/** One normalized animation frame — a node appearing, settling, or stepping, at a wall-clock ms. */ -export interface ReplayEvent { - t: number - kind: 'root' | 'spawn' | 'settle' | 'step' - id: string - parentId?: string - label?: string - runtime?: string - depth?: number - status?: 'running' | 'done' | 'down' - /** The completion-oracle signal: delivered ⟺ a deployable check passed (not self-report). */ - valid?: boolean - score?: number - reason?: string - tokens?: number - usd?: number -} - -export interface ReplayTimeline { - runId: string - events: ReplayEvent[] - /** Wall-clock window [t0, t1] the player scrubs over. */ - t0: number - t1: number -} - -interface RecordedSpend { - tokens?: { input?: number; output?: number } - usd?: number -} - -function spendTokens(s: RecordedSpend | undefined): number { - if (!s?.tokens) return 0 - return (s.tokens.input ?? 0) + (s.tokens.output ?? 0) -} - -/** - * A `RuntimeHooks` sink that records every lifecycle event in arrival order as `ReplayEvent`s. - * Attach it to `SupervisorOpts.hooks` (or merge with another hooks object) and read `timeline()` - * after the run. Pure capture — no I/O, no throwing; an unrecognized event is ignored. - */ -export function createReplayRecorder(): { - hooks: RuntimeHooks - events: ReplayEvent[] - timeline(runId?: string): ReplayTimeline -} { - const events: ReplayEvent[] = [] - let runId = 'run' - - const onEvent = (e: RuntimeHookEvent): void => { - if (e.runId) runId = e.runId - const t = e.timestamp - const p = (e.payload ?? {}) as Record - switch (e.target) { - case 'agent.run': { - // The root driver's lifecycle. `before` plants the root node; `after`/`error` settle it. - if (e.phase === 'before') { - events.push({ - t, - kind: 'root', - id: e.runId, - label: String(p.driver ?? 'root'), - depth: 0, - status: 'running', - }) - } else if (e.phase === 'after' || e.phase === 'error') { - events.push({ - t, - kind: 'settle', - id: e.runId, - status: e.phase === 'error' ? 'down' : 'done', - }) - } - break - } - case 'agent.spawn': { - events.push({ - t, - kind: 'spawn', - id: String(p.childId ?? e.id), - ...(e.parentId ? { parentId: e.parentId } : {}), - label: String(p.label ?? p.childId ?? '?'), - ...(p.runtime ? { runtime: String(p.runtime) } : {}), - ...(typeof p.depth === 'number' ? { depth: p.depth } : {}), - status: 'running', - }) - break - } - case 'agent.child': { - const spent = p.spent as RecordedSpend | undefined - events.push({ - t, - kind: 'settle', - id: String(p.childId ?? e.id), - status: (p.status as 'done' | 'down') ?? 'done', - ...(typeof p.valid === 'boolean' ? { valid: p.valid } : {}), - ...(typeof p.score === 'number' ? { score: p.score } : {}), - ...(p.reason ? { reason: String(p.reason) } : {}), - tokens: spendTokens(spent), - usd: spent?.usd ?? 0, - }) - break - } - default: { - // agent.turn / agent.plan / agent.decision / agent.tool_call → a step pulse on the owner. - const owner = e.parentId ?? e.runId - if (owner) events.push({ t, kind: 'step', id: owner }) - } - } - } - - return { - hooks: { onEvent }, - events, - timeline(rid?: string): ReplayTimeline { - const ts = events.map((e) => e.t) - const t0 = ts.length ? Math.min(...ts) : 0 - const t1 = ts.length ? Math.max(...ts) : 0 - // Synthesize any node referenced as a parent but never spawned (the supervisor's root - // driver, and each nested driver's tree root, run via `act`, not `spawn`) so the player - // renders the WHOLE recursion — driver → worker — not just the spawned leaves. - const defined = new Set( - events.filter((e) => e.kind === 'spawn' || e.kind === 'root').map((e) => e.id), - ) - const synthetic: ReplayEvent[] = [] - for (const id of new Set(events.map((e) => e.parentId).filter((p): p is string => !!p))) { - if (!defined.has(id)) - synthetic.push({ - t: t0, - kind: 'root', - id, - label: shortRoot(id), - depth: 0, - status: 'running', - }) - } - return { runId: rid ?? runId, events: [...synthetic, ...events], t0, t1 } - }, - } -} - -/** Render a self-contained animated HTML replay player for a timeline. Open the file in a browser. */ -export function renderReplayHtml(timeline: ReplayTimeline, opts?: { title?: string }): string { - const title = opts?.title ?? `agent replay · ${timeline.runId}` - const data = JSON.stringify(timeline) - return ` - -${escapeHtml(title)} - -
- ${escapeHtml(title)} - runningdeliveredfailed - t 0.0s - nodes 0 - delivered 0 - tokens 0 - $0.000 -
-
-

node

hover a node…
-
- - - - -
-` -} - -/** A readable label for a synthesized root node (the last path segment of a nested tree key). */ -function shortRoot(id: string): string { - const seg = id.split('/').pop() ?? id - return seg.length > 22 ? `…${seg.slice(-21)}` : seg -} - -function escapeHtml(s: string): string { - return s.replace( - /[<>&"]/g, - (c) => ({ '<': '<', '>': '>', '&': '&', '"': '"' })[c] ?? c, - ) -} diff --git a/src/topology/tree.ts b/src/topology/tree.ts deleted file mode 100644 index a97267e7..00000000 --- a/src/topology/tree.ts +++ /dev/null @@ -1,198 +0,0 @@ -/** - * @experimental - * - * TOPOLOGY VIEW — the live recursive agent tree, folded from the ONE lifecycle stream - * (`src/runtime-hooks.ts`). It is a pure projection: feed it the `RuntimeHookEvent`s that - * `runLoop`, `toolLoop`, and the keystone `Scope` already emit, and it maintains the tree of - * agents + renders it. Attach `view.hooks` to a `Supervisor`/`runLoop` and the tree updates live. - * - * Two node sources, ONE stream: - * - an agent node is born from `agent.spawn` (id = `childId`, parent = the spawner) or the root - * `agent.run` (id = `runId`); `agent.child`/`agent.run:after` settle it (status + score/reason). - * - a STEP (`agent.{turn,tool_call,plan,decision}`) is not a node — it advances the step count of - * the agent it belongs to (matched by `runId`/`parentId`). - * - * No I/O, no timers, no backend coupling — the same projection drives a CLI render, a TUI, or a - * web tree. Rendering is deterministic given the event order (the stream is the source of truth). - */ - -import type { RuntimeHookEvent, RuntimeHooks } from '../runtime-hooks' - -export type TopologyStatus = 'running' | 'done' | 'down' - -/** One agent in the tree. A leaf never spawns; a driver's `childIds` is non-empty. */ -export interface TopologyNode { - readonly id: string - /** Display label (spawn `label`, or the driver name on the root). */ - label: string - /** Leaf runtime (`router`/`sandbox`/`cli`) when known. */ - runtime?: string - /** Parent agent id; undefined ⇒ a root. */ - parentId?: string - /** Recursion depth (root = 0). */ - depth: number - status: TopologyStatus - /** Count of in-agent steps (turns + tool calls + plan/decision rounds) folded so far. */ - steps: number - /** Deployable score in [0,1] once settled `done`. */ - score?: number - /** Failure reason once settled `down`. */ - reason?: string - /** Children in spawn order. */ - readonly childIds: string[] -} - -export interface RenderOptions { - /** Cap the rendered depth (deeper nodes collapse to a `… N more` line). Default: no cap. */ - readonly maxDepth?: number - /** Drop the per-node detail suffix (steps/children/score) — labels only. Default: false. */ - readonly compact?: boolean -} - -export interface TopologyView { - /** The `RuntimeHooks` sink — attach to `SupervisorOpts.hooks` / `runLoop` options. */ - readonly hooks: RuntimeHooks - /** Fold one event into the tree (the same call `hooks.onEvent` makes — exposed for replay). */ - ingest(event: RuntimeHookEvent): void - /** Every node, insertion order. */ - nodes(): TopologyNode[] - /** Nodes with no in-tree parent (the run roots). */ - roots(): TopologyNode[] - /** One node by id. */ - node(id: string): TopologyNode | undefined - /** Render the tree as an aligned ASCII forest. */ - render(opts?: RenderOptions): string -} - -const stepTargets = new Set(['agent.turn', 'agent.tool_call', 'agent.plan', 'agent.decision']) - -/** Build a live topology view. Stateful — one per run (or per replay). */ -export function createTopologyView(): TopologyView { - // Insertion-ordered so render is stable and roots() reflects spawn order. - const byId = new Map() - - const ensure = (id: string, seed: Partial = {}): TopologyNode => { - const existing = byId.get(id) - if (existing) return existing - const node: TopologyNode = { - id, - label: seed.label ?? id, - depth: seed.depth ?? 0, - status: 'running', - steps: 0, - childIds: [], - ...seed, - } - byId.set(id, node) - return node - } - - const str = (v: unknown): string | undefined => (typeof v === 'string' ? v : undefined) - const num = (v: unknown): number | undefined => (typeof v === 'number' ? v : undefined) - - const ingest = (event: RuntimeHookEvent): void => { - const p = (event.payload ?? {}) as Record - - if (event.target === 'agent.spawn' && event.phase === 'after') { - const id = str(p.childId) - if (!id) return - const parent = event.parentId ? ensure(event.parentId) : undefined - const node = ensure(id, { - label: str(p.label) ?? id, - runtime: str(p.runtime), - parentId: event.parentId, - depth: num(p.depth) ?? (parent ? parent.depth + 1 : 0), - }) - if (parent && !parent.childIds.includes(id)) parent.childIds.push(id) - node.status = 'running' - return - } - - if (event.target === 'agent.child' && event.phase === 'after') { - const id = str(p.childId) - if (!id) return - const node = ensure(id, { parentId: event.parentId }) - node.status = str(p.status) === 'down' ? 'down' : 'done' - node.score = num(p.score) - node.reason = str(p.reason) - return - } - - if (event.target === 'agent.run') { - const node = ensure(event.runId, { - label: str(p.driver) ?? event.runId, - parentId: event.parentId, - }) - if (event.phase === 'before') node.status = 'running' - else if (event.phase === 'error') node.status = 'down' - else if (event.phase === 'after' && node.status === 'running') node.status = 'done' - return - } - - // A step advances the agent it belongs to: tool-loop/run-loop key the agent by `runId`, - // a within-agent sub-event names it via `parentId`. Only count `after`/`event` phases so a - // before/after pair is one step, not two. An unknown owner is dropped (no phantom node). - if (stepTargets.has(event.target) && (event.phase === 'after' || event.phase === 'event')) { - const owner = - (event.parentId && byId.get(event.parentId)) || byId.get(event.runId) || undefined - if (owner) owner.steps += 1 - } - } - - const nodes = (): TopologyNode[] => [...byId.values()] - const roots = (): TopologyNode[] => - nodes().filter((n) => n.parentId === undefined || !byId.has(n.parentId)) - - return { - hooks: { onEvent: (e) => ingest(e) }, - ingest, - nodes, - roots, - node: (id) => byId.get(id), - render: (opts) => renderTopologyTree({ roots: roots(), node: (id) => byId.get(id) }, opts), - } -} - -const glyph: Record = { running: '◐', done: '✓', down: '✗' } - -/** Render a forest of `TopologyNode`s to an aligned ASCII tree. Pure — given the same roots + - * node lookup it returns the same string. Exposed so a caller can render a tree it folded - * itself (e.g. from a journal replay) without the live view. */ -export function renderTopologyTree( - tree: { roots: TopologyNode[]; node: (id: string) => TopologyNode | undefined }, - opts: RenderOptions = {}, -): string { - const lines: string[] = [] - - const detail = (n: TopologyNode): string => { - if (opts.compact) return '' - const parts: string[] = [] - if (n.runtime) parts.push(n.runtime) - if (n.steps > 0) parts.push(`${n.steps} ${n.steps === 1 ? 'step' : 'steps'}`) - if (n.childIds.length > 0) parts.push(`${n.childIds.length} children`) - if (n.status === 'done' && n.score !== undefined) parts.push(`score ${n.score.toFixed(2)}`) - if (n.status === 'down' && n.reason) parts.push(`down: ${n.reason}`) - return parts.length ? ` (${parts.join(' · ')})` : '' - } - - const walk = (n: TopologyNode, prefix: string, isLast: boolean, depth: number): void => { - const branch = depth === 0 ? '' : isLast ? '└─ ' : '├─ ' - lines.push(`${prefix}${branch}${glyph[n.status]} ${n.label}${detail(n)}`) - - if (opts.maxDepth !== undefined && depth >= opts.maxDepth && n.childIds.length > 0) { - const childPrefix = prefix + (depth === 0 ? '' : isLast ? ' ' : '│ ') - lines.push(`${childPrefix}└─ … ${n.childIds.length} more`) - return - } - const childPrefix = prefix + (depth === 0 ? '' : isLast ? ' ' : '│ ') - const kids = n.childIds.map((id) => tree.node(id)).filter((c): c is TopologyNode => !!c) - kids.forEach((c, i) => { - walk(c, childPrefix, i === kids.length - 1, depth + 1) - }) - } - - tree.roots.forEach((r, i) => { - walk(r, '', i === tree.roots.length - 1, 0) - }) - return lines.join('\n') -} diff --git a/tests/topology-replay.test.ts b/tests/topology-replay.test.ts deleted file mode 100644 index 017cfd75..00000000 --- a/tests/topology-replay.test.ts +++ /dev/null @@ -1,129 +0,0 @@ -import { describe, expect, it } from 'vitest' -import type { RuntimeHookEvent } from '../src/runtime-hooks' -import { createReplayRecorder, renderReplayHtml } from '../src/topology/replay' - -function ev( - p: Partial & { target: string; timestamp: number }, -): RuntimeHookEvent { - return { - id: p.id ?? `${p.target}:${p.timestamp}`, - runId: p.runId ?? 'run', - phase: p.phase ?? 'after', - ...p, - } as RuntimeHookEvent -} - -describe('createReplayRecorder — folds the hook stream into a timestamped timeline', () => { - it('captures spawn + settle, carrying the completion-oracle `valid` signal', () => { - const r = createReplayRecorder() - r.hooks.onEvent?.( - ev({ - target: 'agent.spawn', - timestamp: 100, - parentId: 'run', - payload: { childId: 'run:s0', label: 'worker', runtime: 'router', depth: 0 }, - }), - {}, - ) - r.hooks.onEvent?.( - ev({ - target: 'agent.child', - timestamp: 250, - parentId: 'run', - payload: { - childId: 'run:s0', - status: 'done', - valid: true, - score: 1, - spent: { tokens: { input: 10, output: 20 }, usd: 0.001 }, - }, - }), - {}, - ) - const tl = r.timeline('run') - const spawn = tl.events.find((e) => e.kind === 'spawn' && e.id === 'run:s0') - const settle = tl.events.find((e) => e.kind === 'settle' && e.id === 'run:s0') - expect(spawn?.label).toBe('worker') - expect(spawn?.runtime).toBe('router') - expect(settle?.status).toBe('done') - expect(settle?.valid).toBe(true) // delivered — the oracle signal survives into the timeline - expect(settle?.score).toBe(1) - expect(settle?.tokens).toBe(30) - expect(tl.t0).toBe(100) - expect(tl.t1).toBe(250) - }) - - it('synthesizes the unspawned root driver so the whole recursion renders', () => { - const r = createReplayRecorder() - // A worker whose parent (`run`, the root driver run via act) never emitted a spawn event. - r.hooks.onEvent?.( - ev({ - target: 'agent.spawn', - timestamp: 10, - parentId: 'run', - payload: { childId: 'run:s0', label: 'w' }, - }), - {}, - ) - const tl = r.timeline('run') - const root = tl.events.find((e) => e.kind === 'root' && e.id === 'run') - expect(root).toBeDefined() // a synthetic root node, so the worker isn't an orphan - expect(tl.events.indexOf(root!)).toBe(0) // prepended before the events that reference it - }) - - it('marks a ran-but-not-delivered child distinctly from a delivered one', () => { - const r = createReplayRecorder() - r.hooks.onEvent?.( - ev({ - target: 'agent.spawn', - timestamp: 1, - parentId: 'run', - payload: { childId: 'run:s0', label: 'a' }, - }), - {}, - ) - r.hooks.onEvent?.( - ev({ - target: 'agent.child', - timestamp: 2, - parentId: 'run', - payload: { childId: 'run:s0', status: 'done', valid: false, score: 0 }, - }), - {}, - ) - const settle = r.timeline('run').events.find((e) => e.kind === 'settle') - expect(settle?.status).toBe('done') - expect(settle?.valid).toBe(false) // ran, produced output, but did NOT deliver - }) -}) - -describe('renderReplayHtml — a self-contained animated player', () => { - it('emits standalone HTML embedding the timeline + the player scaffold', () => { - const r = createReplayRecorder() - r.hooks.onEvent?.( - ev({ - target: 'agent.spawn', - timestamp: 0, - parentId: 'run', - payload: { childId: 'run:s0', label: 'worker' }, - }), - {}, - ) - r.hooks.onEvent?.( - ev({ - target: 'agent.child', - timestamp: 5, - parentId: 'run', - payload: { childId: 'run:s0', status: 'done', valid: true }, - }), - {}, - ) - const html = renderReplayHtml(r.timeline('run'), { title: 'unit' }) - expect(html.startsWith('')).toBe(true) - expect(html).toContain('const TL = {') - expect(html).toContain('"id":"run:s0"') - expect(html).toContain('id="scrub"') // the timeline scrubber - expect(html).toContain('') // the tree stage - expect(html).not.toContain('') // no injection from the data - }) -}) diff --git a/tests/topology.test.ts b/tests/topology.test.ts deleted file mode 100644 index a7898347..00000000 --- a/tests/topology.test.ts +++ /dev/null @@ -1,123 +0,0 @@ -import { describe, expect, it } from 'vitest' -import type { RuntimeHookEvent, RuntimeHookTarget } from '../src/runtime-hooks' -import { createTopologyView, renderTopologyTree } from '../src/topology/tree' - -// The events here mirror EXACTLY what the keystone emits — `agent.spawn`/`agent.child` from -// `Scope` (proven real by tests/loops/supervise.test.ts §9) and `agent.turn` from the tool loop. -// The view is a pure projection of that contract, so folding the contract is the honest test. - -let seq = 0 -function ev( - target: RuntimeHookTarget, - phase: RuntimeHookEvent['phase'], - over: Partial & { payload?: Record }, -): RuntimeHookEvent { - seq += 1 - return { id: `e${seq}`, runId: 'run', target, phase, timestamp: seq, ...over } -} - -/** A two-level tree: root `run` drives `planner` (itself a sub-driver) + `coder`; planner drives - * `subtask`. coder takes 2 turns then fails its tests; planner + subtask succeed. */ -function buildSampleRun() { - const view = createTopologyView() - const e = (event: RuntimeHookEvent) => view.ingest(event) - - e(ev('agent.run', 'before', { runId: 'run', payload: { driver: 'supervisor' } })) - e( - ev('agent.spawn', 'after', { - parentId: 'run', - payload: { childId: 'run:s0', label: 'planner', runtime: 'router', depth: 1 }, - }), - ) - e( - ev('agent.spawn', 'after', { - parentId: 'run', - payload: { childId: 'run:s1', label: 'coder', runtime: 'sandbox', depth: 1 }, - }), - ) - e( - ev('agent.spawn', 'after', { - parentId: 'run:s0', - payload: { childId: 'run:s0:s0', label: 'subtask', runtime: 'router', depth: 2 }, - }), - ) - // coder works two turns (tool-loop keys the agent by runId), then fails its deployable check. - e(ev('agent.turn', 'after', { runId: 'run:s1', payload: {} })) - e(ev('agent.turn', 'after', { runId: 'run:s1', payload: {} })) - e( - ev('agent.child', 'after', { - parentId: 'run:s0', - payload: { childId: 'run:s0:s0', status: 'done', score: 0.91, valid: true }, - }), - ) - e( - ev('agent.child', 'after', { - parentId: 'run', - payload: { childId: 'run:s0', status: 'done', score: 0.8, valid: true }, - }), - ) - e( - ev('agent.child', 'after', { - parentId: 'run', - payload: { childId: 'run:s1', status: 'down', reason: 'tests failed' }, - }), - ) - e(ev('agent.run', 'after', { runId: 'run', payload: {} })) - return view -} - -describe('topology view — folds the lifecycle stream into the recursive agent tree', () => { - it('builds the parent/child structure with status, score, and step counts', () => { - const view = buildSampleRun() - - expect(view.roots().map((n) => n.id)).toEqual(['run']) - expect(view.node('run')?.status).toBe('done') - expect(view.node('run')?.childIds).toEqual(['run:s0', 'run:s1']) - expect(view.node('run:s0')?.childIds).toEqual(['run:s0:s0']) - - // Depth flows from the spawn payload. - expect(view.node('run:s0:s0')?.depth).toBe(2) - - // Terminal status + the deployable verdict the viewer colors by. - expect(view.node('run:s0')).toMatchObject({ status: 'done', score: 0.8 }) - expect(view.node('run:s1')).toMatchObject({ status: 'down', reason: 'tests failed' }) - - // Steps attribute to the agent that ran them (coder took 2 turns; nobody else stepped). - expect(view.node('run:s1')?.steps).toBe(2) - expect(view.node('run:s0')?.steps).toBe(0) - }) - - it('renders an aligned ASCII forest with glyphs + per-node detail', () => { - const out = buildSampleRun().render() - const lines = out.split('\n') - - // Root first, then its children indented under tree branches. - expect(lines[0]).toBe('✓ supervisor (2 children)') - expect(out).toContain('├─ ✓ planner (router · 1 children · score 0.80)') - expect(out).toContain('│ └─ ✓ subtask (router · score 0.91)') - expect(out).toContain('└─ ✗ coder (sandbox · 2 steps · down: tests failed)') - }) - - it('compact mode drops detail; maxDepth collapses deep subtrees', () => { - const view = buildSampleRun() - expect(view.render({ compact: true })).toContain('├─ ✓ planner') - expect(view.render({ compact: true })).not.toContain('children') - - const capped = view.render({ maxDepth: 1 }) - expect(capped).toContain('└─ … 1 more') // subtask collapses under planner at depth 1 - expect(capped).not.toContain('subtask') - }) - - it('a step or settle for an unknown agent is dropped — no phantom node', () => { - const view = createTopologyView() - view.ingest(ev('agent.turn', 'after', { runId: 'ghost', payload: {} })) - expect(view.nodes()).toHaveLength(0) - }) - - it('renderTopologyTree is pure over a folded tree (replay-friendly)', () => { - const view = buildSampleRun() - const a = renderTopologyTree({ roots: view.roots(), node: (id) => view.node(id) }) - const b = view.render() - expect(a).toBe(b) - }) -})