diff --git a/.github/actions/bencher-track/action.yml b/.github/actions/bencher-track/action.yml
new file mode 100644
index 00000000..93d6dc25
--- /dev/null
+++ b/.github/actions/bencher-track/action.yml
@@ -0,0 +1,82 @@
+name: Track benchmarks on bencher
+description: >-
+  Run `bencher run` for the `ix` project, windowing deterministic "stepping"
+  measures to commits since the last lean-toolchain bump (and dropping them on
+  the bump commit via --thresholds-reset, so a toolchain step lands without a
+  spurious alert). The calling job must check out with fetch-depth: 0.
+
+inputs:
+  testbed:
+    description: Bencher testbed name.
+    required: true
+  file:
+    description: Bencher Metric Format JSON file to upload.
+    required: true
+  key:
+    description: BENCHER_API_KEY (project-scoped `bencher_run_*` or user-scoped `bencher_user_*`).
+    required: true
+  github-token:
+    description: GITHUB_TOKEN, for the --github-actions comment.
+    required: true
+  always-thresholds:
+    description: >-
+      Raw `--threshold-measure …` flags for measures tracked on every commit
+      (non-stepping). Word-split, so keep each flag a single token.
+    required: false
+    default: ""
+  stepping-measures:
+    description: >-
+      Space-separated measure names that step on a toolchain bump (e.g.
+      "file-size constants" or "fft-cost"). Windowed to commits-since-bump and
+      omitted on the bump commit.
+    required: false
+    default: ""
+
+runs:
+  using: composite
+  steps:
+    - uses: bencherdev/bencher@v0.6.7
+    - shell: bash
+      env:
+        BENCHER_KEY: ${{ inputs.key }}
+        GH_TOKEN: ${{ inputs.github-token }}
+        TESTBED: ${{ inputs.testbed }}
+        FILE: ${{ inputs.file }}
+        ALWAYS_THRESHOLDS: ${{ inputs.always-thresholds }}
+        STEPPING_MEASURES: ${{ inputs.stepping-measures }}
+      run: |
+        # Sample only commits since lean-toolchain last changed, so a stepping
+        # measure's baseline window never straddles a toolchain bump.
+        last_bump=$(git log -1 --format=%H -- lean-toolchain)
+        sample=$(git rev-list --count "${last_bump}..HEAD")
+        [ "$sample" -gt 64 ] && sample=64
+        echo "commits since last lean-toolchain bump (capped 64): $sample"
+        # On the bump commit (sample=0) omit the stepping measures entirely;
+        # --thresholds-reset then clears their models so the step doesn't alert.
+        # Between bumps these measures are deterministic, so pin them exactly
+        # (0% on both sides): any change in either direction is a real circuit
+        # or closure change, not noise, and should alert.
+        step_thresholds=()
+        if [ "$sample" -gt 0 ]; then
+          for m in $STEPPING_MEASURES; do
+            step_thresholds+=(
+              --threshold-measure "$m"
+              --threshold-test percentage
+              --threshold-max-sample-size "$sample"
+              --threshold-upper-boundary 0
+              --threshold-lower-boundary 0
+            )
+          done
+        fi
+        bencher run \
+          --project ix \
+          --key "$BENCHER_KEY" \
+          --branch "$GITHUB_REF_NAME" \
+          --hash "$GITHUB_SHA" \
+          --testbed "$TESTBED" \
+          --adapter json \
+          --github-actions "$GH_TOKEN" \
+          $ALWAYS_THRESHOLDS \
+          "${step_thresholds[@]}" \
+          --thresholds-reset \
+          --file "$FILE"
diff --git a/.github/workflows/aiur-bench.yml b/.github/workflows/aiur-bench.yml
new file mode 100644
index 00000000..5f94ff99
--- /dev/null
+++ b/.github/workflows/aiur-bench.yml
@@ -0,0 +1,259 @@
+name: Aiur benchmarks
+
+# One workflow, two benchmarks per library env, on every push to main:
+#   1. compile job — `ix compile` the Lean env to a `.ixe` (compile-throughput
+#                    metrics) and cache the `.ixe`.
+#   2. prove job   — restore that `.ixe` from the cache (no recompile) and
+#                    STARK-check selected constants over it via bench-typecheck
+#                    (Aiur execute + prove metrics).
+# The prove job reuses the exact `.ixe` the compile job built, so the compiler
+# runs once. Compile and prove report to separate bencher testbeds so each one's
+# `--thresholds-reset` only touches its own measures.
+
+on:
+  push:
+    branches: main
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  checks: write
+
+env:
+  COMPILE_DIR: Benchmarks/Compile
+
+jobs:
+  # Build + stage the `ix` and `bench-typecheck` binaries once, then restore
+  # them on the (more expensive) matrix runners.
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+      # Pinned Rust toolchain + cargo cache (~/.cargo + target/, via the action's
+      # built-in rust-cache), so `lake build`'s cargo step doesn't recompile the
+      # Plonky3/multi-stark deps from scratch on every run.
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
+      # `.cargo/config.toml` sets `-Ctarget-cpu=native`, and the binary is built
+      # once here (then only restored on the warp runners), so this build host's
+      # CPU fixes the instruction set baked in — incl. AVX-512, a big Plonky3
+      # speedup. Log it so a benchmark shift can be traced to a build-CPU change.
+      - name: Log build CPU
+        run: |
+          lscpu
+          grep -qw avx512f /proc/cpuinfo \
+            && echo "AVX-512F: present (compiled into the binary)" \
+            || echo "AVX-512F: absent"
+      - uses: leanprover/lean-action@v1
+        with:
+          auto-config: false
+          build: true
+          build-args: "ix --wfail -v"
+      - run: |
+          mkdir -p ~/.local/bin
+          echo | lake run install            # copies ix -> ~/.local/bin/ix
+          lake build bench-typecheck
+          cp .lake/build/bin/bench-typecheck ~/.local/bin/bench-typecheck
+          chmod +x ~/.local/bin/bench-typecheck
+      - uses: actions/cache/save@v5
+        with:
+          path: ~/.local/bin
+          key: aiur-bench-bins-${{ github.sha }}
+
+  # Compile each library env to a `.ixe` and track compile throughput. Caches
+  # the `.ixe` (keyed by sha + matrix job) for the prove job to consume.
+  compile:
+    needs: build
+    runs-on: warp-ubuntu-latest-x64-32x
+    strategy:
+      fail-fast: false
+      matrix:
+        bench: [InitStd, Lean, Mathlib, FLT] # Add FC if updated to latest Lean
+        include:
+          - bench: Mathlib
+            mathlib: true
+          - bench: FLT
+            cache_pkg: flt
+            mathlib: true
+          # - bench: FC
+          #   cache_pkg: formal_conjectures
+          #   mathlib: true
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+      - uses: actions/cache/restore@v5
+        with:
+          path: ~/.local/bin
+          key: aiur-bench-bins-${{ github.sha }}
+      - run: echo "$HOME/.local/bin" >> $GITHUB_PATH
+      # FC's library env lives in a sibling `${COMPILE_DIR}FC` package dir, so
+      # point COMPILE_DIR there for the FC matrix job.
+      # - if: matrix.bench == 'FC'
+      #   run: echo "COMPILE_DIR=${{ env.COMPILE_DIR }}FC" | tee -a $GITHUB_ENV
+      # Download elan; fetch the mathlib cache only for benches that import
+      # Mathlib (Mathlib, FLT) — InitStd/Lean would otherwise pull it for
+      # nothing, since the shared Benchmarks/Compile package depends on mathlib.
+      - uses: leanprover/lean-action@v1
+        with:
+          lake-package-directory: ${{ env.COMPILE_DIR }}
+          auto-config: false
+          use-github-cache: false
+          use-mathlib-cache: ${{ matrix.mathlib && 'true' || 'false' }}
+      # FLT and FC take a few minutes to rebuild, so cache their build artifacts.
+      - if: matrix.cache_pkg
+        uses: actions/cache@v5
+        with:
+          path: ${{ env.COMPILE_DIR }}/.lake/packages/${{ matrix.cache_pkg }}/.lake/build
+          key: ${{ matrix.cache_pkg }}-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles(format('{0}/lean-toolchain', env.COMPILE_DIR)) }}-${{ hashFiles(format('{0}/lake-manifest.json', env.COMPILE_DIR)) }}
+      # No `--wfail` here: formal-conjectures (FC) emits a copyright-notice
+      # warning that must not fail the build.
+      - run: lake build Compile${{ matrix.bench }}
+        working-directory: ${{ env.COMPILE_DIR }}
+      # Serialize the env to a `.ixe` and emit the `##benchmark##` line.
+      - name: Run ix compile
+        run: |
+          ix compile ${{ env.COMPILE_DIR }}/Compile${{ matrix.bench }}.lean \
+            --out ${{ matrix.bench }}.ixe 2>&1 | tee output.txt
+      # Cache the `.ixe` for the prove job (reused, never recompiled there).
+      # Only the matrix jobs the prove job consumes, to stay under the repo cache limit.
+      - if: matrix.bench == 'InitStd' || matrix.bench == 'Mathlib'
+        uses: actions/cache/save@v5
+        with:
+          path: ${{ matrix.bench }}.ixe
+          key: aiur-ixe-${{ github.sha }}-${{ matrix.bench }}
+      - name: Generate compile benchmark JSON
+        run: |
+          line=$(grep '^##benchmark##' output.txt)
+          elapsed_s=$(echo "$line" | awk '{printf "%.3f", $2 / 1000}')
+          bytes=$(echo "$line" | awk '{print $3}')
+          constants=$(echo "$line" | awk '{print $4}')
+          throughput=$(echo "$line" | awk '{if ($2 > 0) printf "%.2f", $4 * 1000 / $2; else print 0}')
+          cat > benchmark.json <<EOF
+          {
+            "${{ matrix.bench }}": {
+              "compile-time": {"value": ${elapsed_s}},
+              "file-size": {"value": ${bytes}},
+              "throughput": {"value": ${throughput}},
+              "constants": {"value": ${constants}}
+            }
+          }
+          EOF
+      # Upload compile metrics. file-size/constants step on a toolchain bump, so
+      # they're the bump-windowed stepping measures; compile-time/throughput ride
+      # their normal 64-window (throughput's regression is a drop, hence its
+      # lower boundary).
+      - uses: ./.github/actions/bencher-track
+        with:
+          testbed: warp-ubuntu-x64-32x
+          file: benchmark.json
+          key: ${{ secrets.BENCHER_API_KEY }}
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          stepping-measures: file-size constants
+          always-thresholds: |
+            --threshold-measure compile-time --threshold-test percentage
+            --threshold-max-sample-size 64 --threshold-upper-boundary 0.05
+            --threshold-lower-boundary _
+            --threshold-measure throughput --threshold-test percentage
+            --threshold-max-sample-size 64 --threshold-upper-boundary _
+            --threshold-lower-boundary 0.05
+
+  # Restore each matrix job's `.ixe` from the cache and run the Aiur execute + prove
+  # benchmark over selected constants. No compiler run here.
+  prove:
+    needs: compile
+    runs-on: warp-ubuntu-latest-x64-32x
+    timeout-minutes: 60
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - bench: InitStd
+            consts: Nat.add_comm Nat.sub_le_of_le_add String.append Array.append_assoc
+          - bench: Mathlib
+            consts: Nat.factorial Nat.Coprime Nat.Prime.two_le
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0   # full history for the toolchain-bump sample-size query
+      - uses: actions/cache/restore@v5
+        with:
+          path: ~/.local/bin
+          key: aiur-bench-bins-${{ github.sha }}
+      - run: echo "$HOME/.local/bin" >> $GITHUB_PATH
+      # Provision the toolchain so the bench-typecheck binary finds libleanshared
+      # (no package build). use-github-cache off: nothing to cache here, and
+      # parallel matrix jobs share a key and would race the cache save.
+      - uses: leanprover/lean-action@v1
+        with:
+          auto-config: false
+          build: false
+          use-github-cache: false
+      # Pull the `.ixe` the compile job built — do NOT recompile it here.
+      - uses: actions/cache/restore@v5
+        with:
+          path: ${{ matrix.bench }}.ixe
+          key: aiur-ixe-${{ github.sha }}-${{ matrix.bench }}
+          fail-on-cache-miss: true
+      # Run each constant in its own process so a clean failure or timeout drops
+      # only that constant from the report. NB: a constant heavy enough to OOM
+      # the runner host still cancels the whole job (an OOM SIGKILL of the host
+      # is uncatchable here), so every listed constant must fit in runner RAM.
+      # RAM is tracked via tracing-texray's machine-readable streaming lines
+      # (`[texray] <span> peak-rss-bytes=<N> (<X.YZ MiB>)`, emitted as each
+      # span closes): we parse the raw byte integer (awk's `$2+0` stops at
+      # the first non-digit, ignoring the parenthesized human suffix) and
+      # fold the max over spans in as `peak-rss` (bytes), the proving RSS
+      # high-water mark.
+      - name: Run Aiur typecheck benchmark
+        run: |
+          measure() {
+            local c="$1" rss
+            timeout 20m bench-typecheck --ixe ${{ matrix.bench }}.ixe "$c" \
+              --json "res-$c.json" --texray 2>"tx-$c.log" \
+              || echo "warning: $c failed (OOM/timeout); dropping it from this report"
+            rss=$(awk -F'peak-rss-bytes=' 'NF>1 && $2+0>max {max=$2+0} END {if (max>0) print max}' "tx-$c.log")
+            if [ -f "res-$c.json" ] && [ -n "$rss" ] && [ "$rss" -gt 0 ]; then
+              jq --argjson rss "$rss" 'map_values(. + {"peak-rss": $rss})' \
+                "res-$c.json" > "res-$c.json.tmp" && mv "res-$c.json.tmp" "res-$c.json" || true
+            fi
+          }
+          for c in ${{ matrix.consts }}; do measure "$c"; done
+          # Merge the per-constant results; if none produced anything, emit `{}`.
+          jq -s 'reduce .[] as $o ({}; . + $o)' res-*.json > results.json 2>/dev/null \
+            || echo '{}' > results.json
+          [ -s results.json ] || echo '{}' > results.json
+          # Wrap each metric value as { "value": v } for Bencher Metric Format.
+          # bench-typecheck already emits slug keys (constants, fft-cost,
+          # execute-time, prove-time, throughput = constants/prove-time); peak-rss
+          # is injected above.
+          jq '
+            map_values(to_entries | map({(.key): {value: .value}}) | add)
+          ' results.json > aiur.json
+          cat aiur.json
+      # Upload Aiur metrics. fft-cost and constants are deterministic and step on a
+      # toolchain bump (a different stdlib changes the constants' circuits and
+      # closure sizes), so they're the bump-windowed stepping measures — same
+      # treatment compile gives file-size/constants. prove-time/execute-time,
+      # peak-rss (texray's proving RSS high-water mark), and throughput
+      # (constants/prove-time, like compile's, where a drop is the regression)
+      # are noisy wall-clock and ride their normal 64-window.
+      - uses: ./.github/actions/bencher-track
+        with:
+          testbed: aiur-typecheck-x64-32x
+          file: aiur.json
+          key: ${{ secrets.BENCHER_API_KEY }}
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          stepping-measures: fft-cost constants
+          always-thresholds: |
+            --threshold-measure prove-time --threshold-test percentage
+            --threshold-max-sample-size 64 --threshold-upper-boundary 0.10
+            --threshold-lower-boundary _
+            --threshold-measure execute-time --threshold-test percentage
+            --threshold-max-sample-size 64 --threshold-upper-boundary 0.10
+            --threshold-lower-boundary _
+            --threshold-measure peak-rss --threshold-test percentage
+            --threshold-max-sample-size 64 --threshold-upper-boundary 0.10
+            --threshold-lower-boundary _
+            --threshold-measure throughput --threshold-test percentage
+            --threshold-max-sample-size 64 --threshold-upper-boundary _
+            --threshold-lower-boundary 0.10
diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
deleted file mode 100644
index b003b39f..00000000
--- a/.github/workflows/compile.yml
+++ /dev/null
@@ -1,145 +0,0 @@
-name: Benchmark Ix compiler
-
-on:
-  push:
-    branches: main
-  workflow_dispatch:
-
-permissions:
-  contents: read
-  checks: write
-
-env:
-  COMPILE_DIR: Benchmarks/Compile
-
-jobs:
-  # First build and cache the `ix` binary for use in each matrix job
-  build:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v6
-      - uses: leanprover/lean-action@v1
-        with:
-          auto-config: false
-          build: true
-          build-args: "ix --wfail -v"
-      - run: |
-          mkdir -p ~/.local/bin
-          echo | lake run install
-      - uses: actions/cache/save@v5
-        with:
-          path: ~/.local/bin/ix
-          key: ix-${{ github.sha }}
-
-  # Run the Ix compiler over each library env in Benchmarks/Compile
-  run-compiler:
-    needs: build
-    strategy:
-      matrix:
-        bench: [InitStd, Lean, Mathlib, FLT] # Add FC if updated to latest Lean
-        include:
-          - bench: FLT
-            cache_pkg: flt
-          # - bench: FC
-          #   cache_pkg: formal_conjectures
-    runs-on: warp-ubuntu-latest-x64-32x
-    steps:
-      - uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-      # Restore cached `ix` binary
-      - uses: actions/cache/restore@v5
-        with:
-          path: ~/.local/bin/ix
-          key: ix-${{ github.sha }}
-      - run: echo "$HOME/.local/bin" >> $GITHUB_PATH
-      - if: matrix.bench == 'FC'
-        run: echo "COMPILE_DIR=${{env.COMPILE_DIR}}FC" | tee -a $GITHUB_ENV
-      # Download elan and fetch mathlib cache
-      - uses: leanprover/lean-action@v1
-        with:
-          lake-package-directory: ${{ env.COMPILE_DIR }}
-          auto-config: false
-          use-github-cache: false
-      # FLT and FC take a few minutes to rebuild, so we cache the build artifacts
-      - if: matrix.cache_pkg
-        uses: actions/cache@v5
-        with:
-          path: ${{ env.COMPILE_DIR }}/.lake/packages/${{ matrix.cache_pkg }}/.lake/build
-          key: ${{ matrix.cache_pkg }}-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles(format('{0}/lean-toolchain', env.COMPILE_DIR)) }}-${{ hashFiles(format('{0}/lake-manifest.json', env.COMPILE_DIR)) }}
-      # Build the input library
-      # Allow warnings due to copyright notice in formal-conjectures
-      - run: lake build Compile${{ matrix.bench }}
-        working-directory: ${{ env.COMPILE_DIR }}
-      # Run the `ix` compiler over the input library env
-      - name: Run ix compile
-        run: ix compile ${{ env.COMPILE_DIR }}/Compile${{ matrix.bench }}.lean 2>&1 | tee output.txt
-      # Parse the ##benchmark## line into Bencher Metric Format JSON
-      - name: Generate benchmark JSON
-        run: |
-          line=$(grep '^##benchmark##' output.txt)
-          elapsed_s=$(echo "$line" | awk '{printf "%.3f", $2 / 1000}')
-          bytes=$(echo "$line" | awk '{print $3}')
-          constants=$(echo "$line" | awk '{print $4}')
-          throughput=$(echo "$line" | awk '{if ($2 > 0) printf "%.2f", $4 * 1000 / $2; else print 0}')
-          cat > benchmark.json <<EOF
-          {
-            "${{ matrix.bench }}": {
-              "build-time": {"value": ${elapsed_s}},
-              "file-size": {"value": ${bytes}},
-              "throughput": {"value": ${throughput}},
-              "constants": {"value": ${constants}}
-            }
-          }
-          EOF
-      # Rebase file-size and constants thresholds on toolchain bumps:
-      # sample only the metrics recorded since lean-toolchain last changed.
-      - name: Compute sample size for stepping measures
-        id: thresholds
-        run: |
-          last_bump=$(git log -1 --format=%H -- lean-toolchain)
-          sample=$(git rev-list --count "${last_bump}..HEAD")
-          [ "$sample" -gt 64 ] && sample=64
-          echo "sample=$sample" | tee -a "$GITHUB_OUTPUT"
-      # Deploy benchmark results to https://bencher.dev/console/projects/ix/plots
-      - uses: bencherdev/bencher@main
-      - name: Track benchmarks
-        run: |
-          # On a lean-toolchain bump (sample=0), omit file-size and constants
-          # thresholds entirely so the step change lands without an alert.
-          # --thresholds-reset deletes their models for this run; they're
-          # recreated on the next commit when sample >= 1.
-          sample="${{ steps.thresholds.outputs.sample }}"
-          step_thresholds=()
-          if [ "$sample" -gt 0 ]; then
-            for m in file-size constants; do
-              step_thresholds+=(
-                --threshold-measure "$m"
-                --threshold-test percentage
-                --threshold-max-sample-size "$sample"
-                --threshold-upper-boundary 0.10
-                --threshold-lower-boundary _
-              )
-            done
-          fi
-          bencher run \
-            --project ix \
-            --token '${{ secrets.BENCHER_API_TOKEN }}' \
-            --branch '${{ github.ref_name }}' \
-            --hash '${{ github.sha }}' \
-            --testbed warp-ubuntu-x64-32x \
-            --adapter json \
-            --github-actions '${{ secrets.GITHUB_TOKEN }}' \
-            --threshold-measure build-time \
-            --threshold-test percentage \
-            --threshold-max-sample-size 64 \
-            --threshold-upper-boundary 0.05 \
-            --threshold-lower-boundary _ \
-            --threshold-measure throughput \
-            --threshold-test percentage \
-            --threshold-max-sample-size 64 \
-            --threshold-upper-boundary _ \
-            --threshold-lower-boundary 0.05 \
-            "${step_thresholds[@]}" \
-            --thresholds-reset \
-            --file benchmark.json
diff --git a/Benchmarks/CheckNatAddComm.lean b/Benchmarks/CheckNatAddComm.lean
deleted file mode 100644
index f5748e05..00000000
--- a/Benchmarks/CheckNatAddComm.lean
+++ /dev/null
@@ -1,43 +0,0 @@
-import Ix.Meta
-import Ix.IxVM
-import Ix.Aiur.Protocol
-import Ix.Aiur.Compiler
-import Ix.Benchmark.Bench
-
-open BgroupM
-
-def commitmentParameters : Aiur.CommitmentParameters := {
-  logBlowup := 1
-  capHeight := 0
-}
-
-def friParameters : Aiur.FriParameters := {
-  logFinalPolyLen := 0
-  maxLogArity := 1
-  numQueries := 100
-  commitProofOfWorkBits := 20
-  queryProofOfWorkBits := 0
-}
-
-def main : IO Unit := do
-  let .ok toplevel := IxVM.ixVM
-    | throw (IO.userError "Merging failed")
-  let .ok compiled := toplevel.compile
-    | throw (IO.userError "Compilation failed")
-  let some funIdx := compiled.getFuncIdx `verify_claim
-    | throw (IO.userError "verify_claim entrypoint missing")
-  let aiurSystem := Aiur.AiurSystem.build compiled.bytecode commitmentParameters
-
-  let env ← get_env!
-  let ixonEnv ← IxVM.ClaimHarness.loadIxonEnv ``Nat.add_comm env
-  let target ← IxVM.ClaimHarness.lookupAddr ixonEnv ``Nat.add_comm
-  -- `assumptions = none` = full transitive typecheck.
-  let witness ← IO.ofExcept <| IxVM.ClaimHarness.buildClaimWitness ixonEnv
-    (Ix.Claim.check target none)
-
-  let _ ← bgroup "Kernel typechecking" { oneShot := true } do
-    throughput (.Elements ixonEnv.consts.size.toUInt64 "consts")
-    bench "check Nat.add_comm"
-      (aiurSystem.prove friParameters funIdx witness.input)
-      witness.inputIOBuffer
-  return
diff --git a/Benchmarks/Typecheck.lean b/Benchmarks/Typecheck.lean
new file mode 100644
index 00000000..ce4c64ca
--- /dev/null
+++ b/Benchmarks/Typecheck.lean
@@ -0,0 +1,261 @@
+import Cli
+import Ix.IxVM
+import Ix.Aiur.Protocol
+import Ix.Aiur.Compiler
+import Ix.Aiur.Statistics
+import Ix.TracingTexray
+import Ix.Benchmark.Bench
+import Ix.Cli.NameResolve
+
+/-!
+# Aiur typecheck benchmark
+
+Out-of-circuit **execution** and in-circuit **proving** benchmark for the IxVM
+kernel's `verify_claim` entrypoint, run over constants from a serialized
+`Ixon.Env` (`.ixe`). Reading the env from a `.ixe` keeps this on Ix's critical
+path (same load `ix check --ixe` uses) and avoids importing Lean modules at
+runtime. Useful standalone (per-constant timeline + RAM breakdown via
+tracing-texray) and as a machine source (neutral results JSON).
+
+```
+lake exe bench-typecheck --ixe <path> [names…] [flags]
+
+  --ixe <path>       serialized `Ixon.Env`, e.g. from `ix compile Foo.lean`
+                     (writes `foo.ixe`). Required.
+  [names…]           zero or more fully-qualified constant names to benchmark,
+                     e.g. `Nat.add_comm String.append`.
+  --manifest <path>  additionally read names from a file: one per line, blank
+                     lines and `#` comments ignored. Unions with [names…].
+
+  Names (from either source) resolve against the env's named map via
+  `String.toName` plus a `toString` fallback (mirrors `ix check --ixe`), so
+  numeric / private components round-trip (`Foo.0.Bar`, `_private.M.0.foo`).
+  Pass at least one name or a `--manifest`.
+
+  --json <path>  write per-constant results JSON to <path>. Off by default:
+                 normal CLI usage prints only the human-readable summary.
+  --texray       force the tracing-texray timeline + RAM breakdown on.
+  --no-texray    force it off. Default: on iff `--json` was NOT given, so a
+                 plain local run gets the breakdown while a JSON run stays quiet.
+```
+
+For each constant the harness STARK-checks `Ix.Claim.check addr none` (the full
+transitive typecheck) in two phases:
+
+1. **Execute** (every constant): run the bytecode out-of-circuit. Cheap and
+   deterministic, so we always record `constants` (closure size), `fft-cost`
+   (Σ width·height·log2(height) over circuits — the proving-cost proxy), and
+   `execute-time`.
+2. **Prove** (cheap→expensive by measured fft-cost): the end-to-end STARK prove,
+   recording `prove-time`. With texray on, each prove emits a per-span timeline
+   (`aiur/execute`, `aiur/witness`, `stark/...`) with RAM Δ/peak to stderr.
+
+When `--json` is set the file is rewritten after every prove, so an external
+`timeout` still leaves a complete file of the results collected so far (cheapest
+first). A name absent from the env or whose execution errors is skipped with a
+warning, so a single bad name never fails the run. The harness imposes no time
+limit; bound a run with an external `timeout` if needed.
+
+The JSON is a neutral, flat shape (`{ "<name>": { "constants": …, "fft-cost": …,
+"execute-time": …, "prove-time": …, "throughput": … } }`, where `prove-time` and
+`throughput` appear only for proven constants); any bencher-specific reshaping
+is the caller's job (see `.github/workflows/aiur-bench.yml`).
+-/
+
+open Lean (Json Name)
+
+def commitmentParameters : Aiur.CommitmentParameters := {
+  logBlowup := 1
+  capHeight := 0
+}
+
+def friParameters : Aiur.FriParameters := {
+  logFinalPolyLen := 0
+  maxLogArity := 1
+  numQueries := 100
+  commitProofOfWorkBits := 20
+  queryProofOfWorkBits := 0
+}
+
+/-- Manifest lines as raw strings: one name per line. Everything from a `#` to
+    end of line is a comment (whole-line or inline); blank lines are dropped.
+    `#` never appears in a Lean name, so splitting on it is safe. Resolution
+    against the env happens later (so the `toString` fallback can see the
+    displayed form the user wrote). -/
+def parseManifest (contents : String) : Array String :=
+  (contents.splitOn "\n").filterMap (fun line =>
+    let s := ((line.splitOn "#").head?.getD "").trimAscii
+    if s.isEmpty then none else some s.toString) |>.toArray
+
+
+/-- Per-constant measurements. `proveSec` is `none` when the constant was
+    executed but not (yet) proven. -/
+structure Result where
+  name : String
+  constants : Nat
+  fftCost : Float
+  executeSec : Float
+  proveSec : Option Float := none
+  deriving Inhabited
+
+/-- Round a Float to `d` decimal places, to keep the emitted JSON readable. -/
+def roundTo (d : Nat) (f : Float) : Float :=
+  let scale := (10.0 : Float) ^ d.toFloat
+  (f * scale).round / scale
+
+/-- Neutral, flat results object: `name → { constants, fft-cost, execute-time,
+    prove-time?, throughput? }`. No bencher-specific shaping. -/
+def Result.toJsonEntry (r : Result) : String × Json :=
+  let base : List (String × Json) :=
+    [ ("constants", Lean.toJson r.constants)
+    , ("fft-cost", Lean.toJson (roundTo 0 r.fftCost))
+    , ("execute-time", Lean.toJson (roundTo 6 r.executeSec)) ]
+  -- prove-time and the derived proving throughput (constants/prove-time, the
+  -- proving analog of compile's constants/sec) are present only once proven.
+  let fields := match r.proveSec with
+    | some p => base ++ [ ("prove-time", Lean.toJson (roundTo 6 p))
+                        , ("throughput", Lean.toJson (roundTo 2 (r.constants.toFloat / p))) ]
+    | none => base
+  (r.name, Json.mkObj fields)
+
+/-- Time a thunk, returning its value and the elapsed seconds. The result is
+    forced by `blackBoxIO` so a pure computation isn't optimized away. -/
+def timed (f : Unit → α) : IO (α × Float) := do
+  let t0 ← IO.monoNanosNow
+  let a ← blackBoxIO f ()
+  let t1 ← IO.monoNanosNow
+  return (a, (t1 - t0).toFloat / 1e9)
+
+def runTypecheckCmd (p : Cli.Parsed) : IO UInt32 := do
+  let some ixeArg := p.flag? "ixe"
+    | IO.eprintln "error: --ixe <path> is required"; return 1
+  let ixePath := ixeArg.as! String
+  -- Names come from the variadic positional args and/or a `--manifest` file.
+  let cliNames := p.variableArgsAs! String
+  let fileNames ← match p.flag? "manifest" with
+    | some f => pure (parseManifest (← IO.FS.readFile (f.as! String)))
+    | none => pure #[]
+  -- Union, preserving first-seen order, so the same const isn't proven twice.
+  let nameArgs := Id.run do
+    let mut seen : Std.HashSet String := {}
+    let mut acc : Array String := #[]
+    for n in cliNames ++ fileNames do
+      if !seen.contains n then seen := seen.insert n; acc := acc.push n
+    return acc
+  if nameArgs.isEmpty then
+    IO.eprintln "error: provide one or more constant names and/or --manifest <path>"
+    return 1
+  let jsonOut : Option String := (p.flag? "json").map (·.as! String)
+  -- subject-only: check just the target (`verify_const`, trusting its deps)
+  -- instead of re-checking the whole transitive closure (`verify_claim`).
+  let subjectOnly := p.hasFlag "subject-only"
+  -- Default: trace iff we're not in JSON/bencher mode.
+  let useTexray :=
+    if p.hasFlag "texray" then true
+    else if p.hasFlag "no-texray" then false
+    else jsonOut.isNone
+
+  -- Compile the IxVM kernel once; build the prover system once.
+  let .ok toplevel := IxVM.ixVM
+    | throw (IO.userError "Merging IxVM kernel failed")
+  let .ok compiled := toplevel.compile
+    | throw (IO.userError "Compilation of IxVM kernel failed")
+  let entrypoint := if subjectOnly then `verify_const else `verify_claim
+  let some funIdx := compiled.getFuncIdx entrypoint
+    | throw (IO.userError s!"{entrypoint} entrypoint missing")
+  let aiurSystem := Aiur.AiurSystem.build compiled.bytecode commitmentParameters
+
+  -- Load the serialized env lazily (the `ix check --ixe` path, #445): byte-window
+  -- constants over the backing buffer, so only the checked closure is ever
+  -- materialized — no whole-env (100+GB on mathlib) load just to check a few.
+  let ixonEnv ← match Ixon.deEnvAnon (← IO.FS.readBinFile ixePath) with
+    | .error e => IO.eprintln s!"deserialize {ixePath} failed: {e}"; return 1
+    | .ok env => pure env
+  IO.println s!"Loaded {ixePath}: {ixonEnv.namedCount} named, {ixonEnv.constCount} consts"
+  -- Build the witness for `addr`: subject-only (`verify_const`) trusts deps;
+  -- otherwise the full-closure check (`verify_claim`, `check addr none`).
+  let mkWitness (addr : Address) : IO IxVM.ClaimHarness.ClaimWitness :=
+    if subjectOnly then pure (IxVM.ClaimHarness.buildVerifyConst ixonEnv addr)
+    else IO.ofExcept (IxVM.ClaimHarness.buildClaimWitness ixonEnv (Ix.Claim.check addr none))
+  let mut targets : Array (String × Address) := #[]
+  for arg in nameArgs do
+    match Ix.Cli.NameResolve.resolveIxeAddr ixonEnv arg with
+    | some addr => targets := targets.push (arg, addr)
+    | none => IO.eprintln s!"warning: {arg} not found in {ixePath}; skipping"
+  if targets.isEmpty then
+    IO.eprintln "no requested constants were found in the env"
+    return 1
+
+  -- Phase 1: execute every constant (cheap, deterministic structural metrics).
+  -- Carry each target's address through so phase 2 can rebuild its witness.
+  IO.println "── Phase 1: execute (witness generation) ──"
+  let mut execed : Array (Result × Address) := #[]
+  for (label, addr) in targets do
+    try
+      let witness ← mkWitness addr
+      let (res, execSec) ← timed fun _ =>
+        Aiur.Bytecode.Toplevel.execute compiled.bytecode funIdx
+          witness.input witness.inputIOBuffer
+      match res with
+      | .error e => IO.eprintln s!"  execute {label} failed: {e}"
+      | .ok (_, _, queryCounts) =>
+        let stats := Aiur.computeStats compiled queryCounts
+        let constants := (IxVM.ClaimHarness.closureFrom ixonEnv addr).size
+        IO.println s!"  {label}: constants={constants} fft-cost={stats.totalFftCost} \
+          execute={execSec}s"
+        execed := execed.push
+          ({ name := label, constants, fftCost := stats.totalFftCost, executeSec := execSec }, addr)
+    catch e =>
+      IO.eprintln s!"  execute {label} threw: {e}"
+
+  -- Write the neutral results JSON, but only when `--json` was given. Rewritten
+  -- after each prove so a `timeout` kill still leaves a complete file.
+  let writeJson (results : Array Result) : IO Unit :=
+    match jsonOut with
+    | some path =>
+      IO.FS.writeFile path (Json.mkObj (results.map Result.toJsonEntry).toList).pretty
+    | none => pure ()
+
+  -- Phase 2: prove cheap→expensive. Refine each entry with its prove-time as it
+  -- lands. Install texray first so the prove spans (timeline + RAM Δ/peak) render.
+  if useTexray then TracingTexray.init {}
+  IO.println "── Phase 2: prove ──"
+  let mut ordered := execed.qsort (·.1.fftCost < ·.1.fftCost)
+  writeJson (ordered.map (·.1))
+  let mut spent : Float := 0.0
+  for i in [:ordered.size] do
+    let (r, addr) := ordered[i]!
+    try
+      let witness ← mkWitness addr
+      let (_, proveSec) ← timed fun _ =>
+        aiurSystem.prove friParameters funIdx witness.input witness.inputIOBuffer
+      spent := spent + proveSec
+      IO.println s!"  {r.name}: prove={proveSec}s (cumulative {spent}s)"
+      ordered := ordered.set! i ({ r with proveSec := some proveSec }, addr)
+      writeJson (ordered.map (·.1))
+    catch e =>
+      IO.eprintln s!"  prove {r.name} threw: {e}"
+
+  match jsonOut with
+  | some path => IO.println s!"wrote {ordered.size} benchmarks to {path} ({spent}s proving)"
+  | none => IO.println s!"proved {ordered.size} constants ({spent}s); pass --json <path> to emit results"
+  return 0
+
+def typecheckCmd : Cli.Cmd := `[Cli|
+  typecheck VIA runTypecheckCmd;
+  "Benchmark IxVM-kernel execution + proving of `Ix.Claim.check` over `.ixe` constants"
+
+  FLAGS:
+    "ixe"       : String; "Path to a serialized `Ixon.Env` (e.g. produced by `ix compile`). Required."
+    "manifest"  : String; "Additionally read constant names from a file (one per line; `#` comments and blank lines ignored). Unions with the positional names."
+    "json"      : String; "Write per-constant results JSON to this path. Off by default; normal CLI usage prints only the human-readable summary."
+    "subject-only";       "Check only each target itself (verify_const, trusting its deps) instead of re-checking its whole transitive closure (verify_claim)."
+    texray;               "Force the tracing-texray timeline + RAM breakdown on (per-prove spans on stderr)."
+    "no-texray";          "Force the breakdown off. Default: on iff --json was not given."
+
+  ARGS:
+    ...names : String;   "Fully-qualified constant name(s) to benchmark (e.g. `Nat.add_comm String.append`). Optional if `--manifest` is given."
+]
+
+def main (args : List String) : IO UInt32 :=
+  typecheckCmd.validate args
diff --git a/Cargo.lock b/Cargo.lock
index 1d573036..78ba8070 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3886,7 +3886,7 @@ dependencies = [
 [[package]]
 name = "tracing-texray"
 version = "0.2.0"
-source = "git+https://github.com/argumentcomputer/tracing-texray?rev=8ce04e3422cd48e68ef47fab95dba7d06b8c368c#8ce04e3422cd48e68ef47fab95dba7d06b8c368c"
+source = "git+https://github.com/argumentcomputer/tracing-texray?rev=31d194dd1bc50458d26f77c89bb68f67e5d1c149#31d194dd1bc50458d26f77c89bb68f67e5d1c149"
 dependencies = [
  "loom",
  "parking_lot",
diff --git a/Cargo.toml b/Cargo.toml
index e83b8384..d1ee422a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,7 +31,7 @@ n0-error = { version = "0.1", optional = true }
 getrandom = { version = "0.3", optional = true }
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
-tracing-texray = { git = "https://github.com/argumentcomputer/tracing-texray", rev = "8ce04e3422cd48e68ef47fab95dba7d06b8c368c" }
+tracing-texray = { git = "https://github.com/argumentcomputer/tracing-texray", rev = "31d194dd1bc50458d26f77c89bb68f67e5d1c149" }
 bincode = { version = "2.0.1", optional = true }
 serde = { version = "1.0.219", features = ["derive"], optional = true }
 
diff --git a/Ix/Cli/AddrOfCmd.lean b/Ix/Cli/AddrOfCmd.lean
index b21803fc..da70104e 100644
--- a/Ix/Cli/AddrOfCmd.lean
+++ b/Ix/Cli/AddrOfCmd.lean
@@ -17,16 +17,13 @@ public import Ix.Environment
 public import Ix.IxVM.ClaimHarness
 public import Ix.Ixon
 public import Ix.Meta
+public import Ix.Cli.NameResolve
 
 public section
 
-namespace Ix.Cli.AddrOfCmd
+open Ix.Cli.NameResolve
 
-private def parseName (arg : String) : Lean.Name :=
-  arg.splitOn "." |>.foldl (init := .anonymous)
-    fun acc s => match s.toNat? with
-      | some n => Lean.Name.mkNum acc n
-      | none   => Lean.Name.mkStr acc s
+namespace Ix.Cli.AddrOfCmd
 
 def runAddrOfCmd (p : Cli.Parsed) : IO UInt32 := do
   -- Suppress Rust-side `[compile_env]` scheduler noise; the only
@@ -34,7 +31,8 @@ def runAddrOfCmd (p : Cli.Parsed) : IO UInt32 := do
   Std.Internal.UV.System.osSetenv "IX_QUIET" "1"
   let some nameArg := p.positionalArg? "name"
     | p.printError "error: must specify <Lean.Name>"; return 1
-  let name := parseName (nameArg.as! String)
+  let argStr := nameArg.as! String
+  let name := parseName argStr
   let ixePath : Option String :=
     (p.flag? "ixe").map (·.as! String)
   match ixePath with
@@ -44,7 +42,7 @@ def runAddrOfCmd (p : Cli.Parsed) : IO UInt32 := do
       | .error e =>
         IO.eprintln s!"error: failed to deserialize {path}: {e}"; return 1
       | .ok env => pure env
-    match ixonEnv.getAddr? (Ix.Name.fromLeanName name) with
+    match resolveIxeAddr ixonEnv argStr with
     | none =>
       IO.eprintln s!"error: {name} not found in {path}"; return 1
     | some addr =>
diff --git a/Ix/Cli/CheckCmd.lean b/Ix/Cli/CheckCmd.lean
index 9a2afb24..5f55d2b5 100644
--- a/Ix/Cli/CheckCmd.lean
+++ b/Ix/Cli/CheckCmd.lean
@@ -32,29 +32,15 @@ public import Ix.IxVM.ClaimHarness
 public import Ix.Ixon
 public import Ix.Meta
 public import Ix.Store
+public import Ix.Cli.NameResolve
 
 public section
 
 open IxVM.ClaimHarness
+open Ix.Cli.NameResolve
 
 namespace Ix.Cli.CheckCmd
 
-def parseName (arg : String) : Lean.Name :=
-  arg.splitOn "." |>.foldl (init := .anonymous)
-    fun acc s => match s.toNat? with
-      | some n => Lean.Name.mkNum acc n
-      | none   => Lean.Name.mkStr acc s
-
-/-- Resolve a CLI name argument against the env. `parseName` can't rebuild
-    private names (`_private.M.0.foo`) — the marker/scope-index components
-    don't round-trip through naive dot-splitting. So if the parsed name
-    isn't present, fall back to matching the arg against each constant's
-    `toString` (the displayed form the user copied). -/
-def resolveName (env : Lean.Environment) (arg : String) : Option Lean.Name :=
-  let parsed := parseName arg
-  if env.constants.contains parsed then some parsed
-  else (env.constants.toList.find? (fun (k, _) => toString k == arg)).map (·.1)
-
 def addrOfHex! (label : String) (s : String) : IO Address := do
   match Address.fromString s with
   | some a => pure a
@@ -92,22 +78,6 @@ def loadClaimAndTrees (claimHex : String) :
     trees := trees.insert r tree
   return (claim, trees)
 
-/-- Reverse of `Ix.Name.fromLeanName`. Drops the per-node hash. -/
-partial def ixNameToLeanName : Ix.Name → Lean.Name
-  | .anonymous _ => .anonymous
-  | .str p s _ => .str (ixNameToLeanName p) s
-  | .num p n _ => .num (ixNameToLeanName p) n
-
-/-- Resolve a CLI name argument against a `.ixe` env's named map. Like
-    `resolveName` for the compiled-in env: `parseName` can't rebuild private
-    names, so when the direct lookup misses, fall back to matching the arg
-    against each named constant's displayed `toString`. -/
-def resolveIxeAddr (ixonEnv : Ixon.Env) (arg : String) : Option Address :=
-  match ixonEnv.getAddr? (Ix.Name.fromLeanName (parseName arg)) with
-  | some a => some a
-  | none =>
-    (ixonEnv.named.toList.find? (fun (k, _) => toString (ixNameToLeanName k) == arg)).map (·.2.addr)
-
 /-- Build a `ClaimWitness` for the `verify_claim` entrypoint against
     `Ix.Claim.check addr none` (full transitive typecheck of the
     target's closure). -/
diff --git a/Ix/Cli/NameResolve.lean b/Ix/Cli/NameResolve.lean
new file mode 100644
index 00000000..9c8831d3
--- /dev/null
+++ b/Ix/Cli/NameResolve.lean
@@ -0,0 +1,58 @@
+/-
+  Shared name-resolution helpers for the `ix` CLI commands.
+
+  Turning a user-supplied dotted name (`Nat.add_comm`, `_private.M.0.foo`)
+  into either a `Lean.Name` or a content `Address` is needed by several
+  commands (`addr-of`, `check`, …) and by benchmark drivers. These helpers
+  live here — depending only on `Address`/`Environment`/`Ixon` — so callers
+  don't have to pull in the Aiur typecheck machinery just to resolve a name.
+-/
+module
+public import Ix.Address
+public import Ix.Environment
+public import Ix.Ixon
+
+public section
+
+namespace Ix.Cli.NameResolve
+
+/-- Parse a dotted CLI argument into a `Lean.Name`, treating all-digit
+    components as numeric (`Nat`) name parts and everything else as string
+    parts. Naive: components that don't round-trip through `.`-splitting
+    (private markers, macro scopes) won't reconstruct — see `resolveName`
+    / `resolveIxeAddr` for the `toString` fallback that covers those. -/
+def parseName (arg : String) : Lean.Name :=
+  arg.splitOn "." |>.foldl (init := .anonymous)
+    fun acc s => match s.toNat? with
+      | some n => Lean.Name.mkNum acc n
+      | none   => Lean.Name.mkStr acc s
+
+/-- Resolve a CLI name argument against the env. `parseName` can't rebuild
+    private names (`_private.M.0.foo`) — the marker/scope-index components
+    don't round-trip through naive dot-splitting. So if the parsed name
+    isn't present, fall back to matching the arg against each constant's
+    `toString` (the displayed form the user copied). -/
+def resolveName (env : Lean.Environment) (arg : String) : Option Lean.Name :=
+  let parsed := parseName arg
+  if env.constants.contains parsed then some parsed
+  else (env.constants.toList.find? (fun (k, _) => toString k == arg)).map (·.1)
+
+/-- Reverse of `Ix.Name.fromLeanName`. Drops the per-node hash. -/
+partial def ixNameToLeanName : Ix.Name → Lean.Name
+  | .anonymous _ => .anonymous
+  | .str p s _ => .str (ixNameToLeanName p) s
+  | .num p n _ => .num (ixNameToLeanName p) n
+
+/-- Resolve a CLI name argument against a `.ixe` env's named map. Like
+    `resolveName` for the compiled-in env: `parseName` can't rebuild private
+    names, so when the direct lookup misses, fall back to matching the arg
+    against each named constant's displayed `toString`. -/
+def resolveIxeAddr (ixonEnv : Ixon.Env) (arg : String) : Option Address :=
+  match ixonEnv.getAddr? (Ix.Name.fromLeanName (parseName arg)) with
+  | some a => some a
+  | none =>
+    (ixonEnv.named.toList.find? (fun (k, _) => toString (ixNameToLeanName k) == arg)).map (·.2.addr)
+
+end Ix.Cli.NameResolve
+
+end
diff --git a/Ix/TracingTexray.lean b/Ix/TracingTexray.lean
index e63ebd3f..c0c4c064 100644
--- a/Ix/TracingTexray.lean
+++ b/Ix/TracingTexray.lean
@@ -5,9 +5,11 @@
   (`aiur/execute`, `aiur/witness`, `aiur/prove`) and each STARK proving
   stage (`stark/...`). After `init` installs the subscriber:
     * a one-line `[texray] <span>: <dur>  ── RAM Δ +X peak Y` is streamed
-      to stderr as each tracked span closes (when `streaming = true`);
-    * the full texray graph (with RAM block, Linux-only) prints when
-      `aiur/prove` exits.
+      to stderr as each tracked span closes (when `streaming = true`),
+      followed by a companion `[texray] <span> peak-rss-bytes=<N>
+      (<X.YZ MiB>)` line carrying the raw peak in bytes for programmatic
+      consumers (CI bench, grep);
+    * the full texray graph prints when `aiur/prove` exits.
 -/
 
 module
diff --git a/lakefile.lean b/lakefile.lean
index 5362daa8..b192de93 100644
--- a/lakefile.lean
+++ b/lakefile.lean
@@ -118,8 +118,8 @@ lean_exe «bench-ixvm» where
 lean_exe «bench-shardmap» where
   root := `Benchmarks.ShardMap
 
-lean_exe «bench-check-nat-add-comm» where
-  root := `Benchmarks.CheckNatAddComm
+lean_exe «bench-typecheck» where
+  root := `Benchmarks.Typecheck
   supportInterpreter := true
 
 end Benchmarks
diff --git a/src/ffi/texray.rs b/src/ffi/texray.rs
index 85486129..77d91b5b 100644
--- a/src/ffi/texray.rs
+++ b/src/ffi/texray.rs
@@ -2,8 +2,10 @@
 //!
 //! With `streaming = true`, each tracked span (e.g. `aiur/execute`,
 //! `aiur/witness`, `aiur/prove`) emits a one-line `[texray] <name>: <dur>
-//!   ── RAM Δ +X peak Y` to stderr as it closes, and the full texray
-//! graph prints when the examined root span exits.
+//!   ── RAM Δ +X peak Y` to stderr as it closes, followed by a companion
+//! `[texray] <name> peak-rss-bytes=<N> (<X.YZ MiB>)` line (raw bytes for
+//! CI/grep). The full texray graph prints when the examined root span
+//! exits.
 
 use lean_ffi::object::{LeanBorrowed, LeanIOResult, LeanOwned, LeanString};
 use tracing_subscriber::{
@@ -22,8 +24,9 @@ use tracing_subscriber::{
 ///   everything, including upstream library spans.
 /// - `track_ram`: sample VmRSS/VmHWM per span. Linux-only; zeros elsewhere.
 /// - `streaming`: emit per-span `[texray] <name>: <dur>  ── RAM Δ +X peak Y`
-///   lines on stderr as each span closes. The texray graph prints either
-///   way when the examined root span exits.
+///   plus a `[texray] <name> peak-rss-bytes=<N> (<X.YZ MiB>)` companion on
+///   stderr as each span closes. The texray graph prints either way when
+///   the examined root span exits.
 #[unsafe(no_mangle)]
 extern "C" fn rs_texray_init(
   name_prefixes: LeanString<LeanBorrowed<'_>>,