diff --git a/.github/actions/bencher-track/action.yml b/.github/actions/bencher-track/action.yml new file mode 100644 index 00000000..93d6dc25 --- /dev/null +++ b/.github/actions/bencher-track/action.yml @@ -0,0 +1,82 @@ +name: Track benchmarks on bencher +description: >- + Run `bencher run` for the `ix` project, windowing deterministic "stepping" + measures to commits since the last lean-toolchain bump (and dropping them on + the bump commit via --thresholds-reset, so a toolchain step lands without a + spurious alert). The calling job must check out with fetch-depth: 0. + +inputs: + testbed: + description: Bencher testbed name. + required: true + file: + description: Bencher Metric Format JSON file to upload. + required: true + key: + description: BENCHER_API_KEY (project-scoped `bencher_run_*` or user-scoped `bencher_user_*`). + required: true + github-token: + description: GITHUB_TOKEN, for the --github-actions comment. + required: true + always-thresholds: + description: >- + Raw `--threshold-measure …` flags for measures tracked on every commit + (non-stepping). Word-split, so keep each flag a single token. + required: false + default: "" + stepping-measures: + description: >- + Space-separated measure names that step on a toolchain bump (e.g. + "file-size constants" or "fft-cost"). Windowed to commits-since-bump and + omitted on the bump commit. + required: false + default: "" + +runs: + using: composite + steps: + - uses: bencherdev/bencher@v0.6.7 + - shell: bash + env: + BENCHER_KEY: ${{ inputs.key }} + GH_TOKEN: ${{ inputs.github-token }} + TESTBED: ${{ inputs.testbed }} + FILE: ${{ inputs.file }} + ALWAYS_THRESHOLDS: ${{ inputs.always-thresholds }} + STEPPING_MEASURES: ${{ inputs.stepping-measures }} + run: | + # Sample only commits since lean-toolchain last changed, so a stepping + # measure's baseline window never straddles a toolchain bump. + last_bump=$(git log -1 --format=%H -- lean-toolchain) + sample=$(git rev-list --count "${last_bump}..HEAD") + [ "$sample" -gt 64 ] && sample=64 + echo "commits since last lean-toolchain bump (capped 64): $sample" + # On the bump commit (sample=0) omit the stepping measures entirely; + # --thresholds-reset then clears their models so the step doesn't alert. + # Between bumps these measures are deterministic, so pin them exactly + # (0% on both sides): any change in either direction is a real circuit + # or closure change, not noise, and should alert. + step_thresholds=() + if [ "$sample" -gt 0 ]; then + for m in $STEPPING_MEASURES; do + step_thresholds+=( + --threshold-measure "$m" + --threshold-test percentage + --threshold-max-sample-size "$sample" + --threshold-upper-boundary 0 + --threshold-lower-boundary 0 + ) + done + fi + bencher run \ + --project ix \ + --key "$BENCHER_KEY" \ + --branch "$GITHUB_REF_NAME" \ + --hash "$GITHUB_SHA" \ + --testbed "$TESTBED" \ + --adapter json \ + --github-actions "$GH_TOKEN" \ + $ALWAYS_THRESHOLDS \ + "${step_thresholds[@]}" \ + --thresholds-reset \ + --file "$FILE" diff --git a/.github/workflows/aiur-bench.yml b/.github/workflows/aiur-bench.yml new file mode 100644 index 00000000..5f94ff99 --- /dev/null +++ b/.github/workflows/aiur-bench.yml @@ -0,0 +1,259 @@ +name: Aiur benchmarks + +# One workflow, two benchmarks per library env, on every push to main: +# 1. compile job — `ix compile` the Lean env to a `.ixe` (compile-throughput +# metrics) and cache the `.ixe`. +# 2. prove job — restore that `.ixe` from the cache (no recompile) and +# STARK-check selected constants over it via bench-typecheck +# (Aiur execute + prove metrics). +# The prove job reuses the exact `.ixe` the compile job built, so the compiler +# runs once. Compile and prove report to separate bencher testbeds so each one's +# `--thresholds-reset` only touches its own measures. + +on: + push: + branches: main + workflow_dispatch: + +permissions: + contents: read + checks: write + +env: + COMPILE_DIR: Benchmarks/Compile + +jobs: + # Build + stage the `ix` and `bench-typecheck` binaries once, then restore + # them on the (more expensive) matrix runners. + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + # Pinned Rust toolchain + cargo cache (~/.cargo + target/, via the action's + # built-in rust-cache), so `lake build`'s cargo step doesn't recompile the + # Plonky3/multi-stark deps from scratch on every run. + - uses: actions-rust-lang/setup-rust-toolchain@v1 + # `.cargo/config.toml` sets `-Ctarget-cpu=native`, and the binary is built + # once here (then only restored on the warp runners), so this build host's + # CPU fixes the instruction set baked in — incl. AVX-512, a big Plonky3 + # speedup. Log it so a benchmark shift can be traced to a build-CPU change. + - name: Log build CPU + run: | + lscpu + grep -qw avx512f /proc/cpuinfo \ + && echo "AVX-512F: present (compiled into the binary)" \ + || echo "AVX-512F: absent" + - uses: leanprover/lean-action@v1 + with: + auto-config: false + build: true + build-args: "ix --wfail -v" + - run: | + mkdir -p ~/.local/bin + echo | lake run install # copies ix -> ~/.local/bin/ix + lake build bench-typecheck + cp .lake/build/bin/bench-typecheck ~/.local/bin/bench-typecheck + chmod +x ~/.local/bin/bench-typecheck + - uses: actions/cache/save@v5 + with: + path: ~/.local/bin + key: aiur-bench-bins-${{ github.sha }} + + # Compile each library env to a `.ixe` and track compile throughput. Caches + # the `.ixe` (keyed by sha + matrix job) for the prove job to consume. + compile: + needs: build + runs-on: warp-ubuntu-latest-x64-32x + strategy: + fail-fast: false + matrix: + bench: [InitStd, Lean, Mathlib, FLT] # Add FC if updated to latest Lean + include: + - bench: Mathlib + mathlib: true + - bench: FLT + cache_pkg: flt + mathlib: true + # - bench: FC + # cache_pkg: formal_conjectures + # mathlib: true + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: actions/cache/restore@v5 + with: + path: ~/.local/bin + key: aiur-bench-bins-${{ github.sha }} + - run: echo "$HOME/.local/bin" >> $GITHUB_PATH + # FC's library env lives in a sibling `${COMPILE_DIR}FC` package dir, so + # point COMPILE_DIR there for the FC matrix job. + # - if: matrix.bench == 'FC' + # run: echo "COMPILE_DIR=${{ env.COMPILE_DIR }}FC" | tee -a $GITHUB_ENV + # Download elan; fetch the mathlib cache only for benches that import + # Mathlib (Mathlib, FLT) — InitStd/Lean would otherwise pull it for + # nothing, since the shared Benchmarks/Compile package depends on mathlib. + - uses: leanprover/lean-action@v1 + with: + lake-package-directory: ${{ env.COMPILE_DIR }} + auto-config: false + use-github-cache: false + use-mathlib-cache: ${{ matrix.mathlib && 'true' || 'false' }} + # FLT and FC take a few minutes to rebuild, so cache their build artifacts. + - if: matrix.cache_pkg + uses: actions/cache@v5 + with: + path: ${{ env.COMPILE_DIR }}/.lake/packages/${{ matrix.cache_pkg }}/.lake/build + key: ${{ matrix.cache_pkg }}-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles(format('{0}/lean-toolchain', env.COMPILE_DIR)) }}-${{ hashFiles(format('{0}/lake-manifest.json', env.COMPILE_DIR)) }} + # No `--wfail` here: formal-conjectures (FC) emits a copyright-notice + # warning that must not fail the build. + - run: lake build Compile${{ matrix.bench }} + working-directory: ${{ env.COMPILE_DIR }} + # Serialize the env to a `.ixe` and emit the `##benchmark##` line. + - name: Run ix compile + run: | + ix compile ${{ env.COMPILE_DIR }}/Compile${{ matrix.bench }}.lean \ + --out ${{ matrix.bench }}.ixe 2>&1 | tee output.txt + # Cache the `.ixe` for the prove job (reused, never recompiled there). + # Only the matrix jobs the prove job consumes, to stay under the repo cache limit. + - if: matrix.bench == 'InitStd' || matrix.bench == 'Mathlib' + uses: actions/cache/save@v5 + with: + path: ${{ matrix.bench }}.ixe + key: aiur-ixe-${{ github.sha }}-${{ matrix.bench }} + - name: Generate compile benchmark JSON + run: | + line=$(grep '^##benchmark##' output.txt) + elapsed_s=$(echo "$line" | awk '{printf "%.3f", $2 / 1000}') + bytes=$(echo "$line" | awk '{print $3}') + constants=$(echo "$line" | awk '{print $4}') + throughput=$(echo "$line" | awk '{if ($2 > 0) printf "%.2f", $4 * 1000 / $2; else print 0}') + cat > benchmark.json <> $GITHUB_PATH + # Provision the toolchain so the bench-typecheck binary finds libleanshared + # (no package build). use-github-cache off: nothing to cache here, and + # parallel matrix jobs share a key and would race the cache save. + - uses: leanprover/lean-action@v1 + with: + auto-config: false + build: false + use-github-cache: false + # Pull the `.ixe` the compile job built — do NOT recompile it here. + - uses: actions/cache/restore@v5 + with: + path: ${{ matrix.bench }}.ixe + key: aiur-ixe-${{ github.sha }}-${{ matrix.bench }} + fail-on-cache-miss: true + # Run each constant in its own process so a clean failure or timeout drops + # only that constant from the report. NB: a constant heavy enough to OOM + # the runner host still cancels the whole job (an OOM SIGKILL of the host + # is uncatchable here), so every listed constant must fit in runner RAM. + # RAM is tracked via tracing-texray's machine-readable streaming lines + # (`[texray] peak-rss-bytes= ()`, emitted as each + # span closes): we parse the raw byte integer (awk's `$2+0` stops at + # the first non-digit, ignoring the parenthesized human suffix) and + # fold the max over spans in as `peak-rss` (bytes), the proving RSS + # high-water mark. + - name: Run Aiur typecheck benchmark + run: | + measure() { + local c="$1" rss + timeout 20m bench-typecheck --ixe ${{ matrix.bench }}.ixe "$c" \ + --json "res-$c.json" --texray 2>"tx-$c.log" \ + || echo "warning: $c failed (OOM/timeout); dropping it from this report" + rss=$(awk -F'peak-rss-bytes=' 'NF>1 && $2+0>max {max=$2+0} END {if (max>0) print max}' "tx-$c.log") + if [ -f "res-$c.json" ] && [ -n "$rss" ] && [ "$rss" -gt 0 ]; then + jq --argjson rss "$rss" 'map_values(. + {"peak-rss": $rss})' \ + "res-$c.json" > "res-$c.json.tmp" && mv "res-$c.json.tmp" "res-$c.json" || true + fi + } + for c in ${{ matrix.consts }}; do measure "$c"; done + # Merge the per-constant results; if none produced anything, emit `{}`. + jq -s 'reduce .[] as $o ({}; . + $o)' res-*.json > results.json 2>/dev/null \ + || echo '{}' > results.json + [ -s results.json ] || echo '{}' > results.json + # Wrap each metric value as { "value": v } for Bencher Metric Format. + # bench-typecheck already emits slug keys (constants, fft-cost, + # execute-time, prove-time, throughput = constants/prove-time); peak-rss + # is injected above. + jq ' + map_values(to_entries | map({(.key): {value: .value}}) | add) + ' results.json > aiur.json + cat aiur.json + # Upload Aiur metrics. fft-cost and constants are deterministic and step on a + # toolchain bump (a different stdlib changes the constants' circuits and + # closure sizes), so they're the bump-windowed stepping measures — same + # treatment compile gives file-size/constants. prove-time/execute-time, + # peak-rss (texray's proving RSS high-water mark), and throughput + # (constants/prove-time, like compile's, where a drop is the regression) + # are noisy wall-clock and ride their normal 64-window. + - uses: ./.github/actions/bencher-track + with: + testbed: aiur-typecheck-x64-32x + file: aiur.json + key: ${{ secrets.BENCHER_API_KEY }} + github-token: ${{ secrets.GITHUB_TOKEN }} + stepping-measures: fft-cost constants + always-thresholds: | + --threshold-measure prove-time --threshold-test percentage + --threshold-max-sample-size 64 --threshold-upper-boundary 0.10 + --threshold-lower-boundary _ + --threshold-measure execute-time --threshold-test percentage + --threshold-max-sample-size 64 --threshold-upper-boundary 0.10 + --threshold-lower-boundary _ + --threshold-measure peak-rss --threshold-test percentage + --threshold-max-sample-size 64 --threshold-upper-boundary 0.10 + --threshold-lower-boundary _ + --threshold-measure throughput --threshold-test percentage + --threshold-max-sample-size 64 --threshold-upper-boundary _ + --threshold-lower-boundary 0.10 diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml deleted file mode 100644 index b003b39f..00000000 --- a/.github/workflows/compile.yml +++ /dev/null @@ -1,145 +0,0 @@ -name: Benchmark Ix compiler - -on: - push: - branches: main - workflow_dispatch: - -permissions: - contents: read - checks: write - -env: - COMPILE_DIR: Benchmarks/Compile - -jobs: - # First build and cache the `ix` binary for use in each matrix job - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - uses: leanprover/lean-action@v1 - with: - auto-config: false - build: true - build-args: "ix --wfail -v" - - run: | - mkdir -p ~/.local/bin - echo | lake run install - - uses: actions/cache/save@v5 - with: - path: ~/.local/bin/ix - key: ix-${{ github.sha }} - - # Run the Ix compiler over each library env in Benchmarks/Compile - run-compiler: - needs: build - strategy: - matrix: - bench: [InitStd, Lean, Mathlib, FLT] # Add FC if updated to latest Lean - include: - - bench: FLT - cache_pkg: flt - # - bench: FC - # cache_pkg: formal_conjectures - runs-on: warp-ubuntu-latest-x64-32x - steps: - - uses: actions/checkout@v6 - with: - fetch-depth: 0 - # Restore cached `ix` binary - - uses: actions/cache/restore@v5 - with: - path: ~/.local/bin/ix - key: ix-${{ github.sha }} - - run: echo "$HOME/.local/bin" >> $GITHUB_PATH - - if: matrix.bench == 'FC' - run: echo "COMPILE_DIR=${{env.COMPILE_DIR}}FC" | tee -a $GITHUB_ENV - # Download elan and fetch mathlib cache - - uses: leanprover/lean-action@v1 - with: - lake-package-directory: ${{ env.COMPILE_DIR }} - auto-config: false - use-github-cache: false - # FLT and FC take a few minutes to rebuild, so we cache the build artifacts - - if: matrix.cache_pkg - uses: actions/cache@v5 - with: - path: ${{ env.COMPILE_DIR }}/.lake/packages/${{ matrix.cache_pkg }}/.lake/build - key: ${{ matrix.cache_pkg }}-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles(format('{0}/lean-toolchain', env.COMPILE_DIR)) }}-${{ hashFiles(format('{0}/lake-manifest.json', env.COMPILE_DIR)) }} - # Build the input library - # Allow warnings due to copyright notice in formal-conjectures - - run: lake build Compile${{ matrix.bench }} - working-directory: ${{ env.COMPILE_DIR }} - # Run the `ix` compiler over the input library env - - name: Run ix compile - run: ix compile ${{ env.COMPILE_DIR }}/Compile${{ matrix.bench }}.lean 2>&1 | tee output.txt - # Parse the ##benchmark## line into Bencher Metric Format JSON - - name: Generate benchmark JSON - run: | - line=$(grep '^##benchmark##' output.txt) - elapsed_s=$(echo "$line" | awk '{printf "%.3f", $2 / 1000}') - bytes=$(echo "$line" | awk '{print $3}') - constants=$(echo "$line" | awk '{print $4}') - throughput=$(echo "$line" | awk '{if ($2 > 0) printf "%.2f", $4 * 1000 / $2; else print 0}') - cat > benchmark.json <= 1. - sample="${{ steps.thresholds.outputs.sample }}" - step_thresholds=() - if [ "$sample" -gt 0 ]; then - for m in file-size constants; do - step_thresholds+=( - --threshold-measure "$m" - --threshold-test percentage - --threshold-max-sample-size "$sample" - --threshold-upper-boundary 0.10 - --threshold-lower-boundary _ - ) - done - fi - bencher run \ - --project ix \ - --token '${{ secrets.BENCHER_API_TOKEN }}' \ - --branch '${{ github.ref_name }}' \ - --hash '${{ github.sha }}' \ - --testbed warp-ubuntu-x64-32x \ - --adapter json \ - --github-actions '${{ secrets.GITHUB_TOKEN }}' \ - --threshold-measure build-time \ - --threshold-test percentage \ - --threshold-max-sample-size 64 \ - --threshold-upper-boundary 0.05 \ - --threshold-lower-boundary _ \ - --threshold-measure throughput \ - --threshold-test percentage \ - --threshold-max-sample-size 64 \ - --threshold-upper-boundary _ \ - --threshold-lower-boundary 0.05 \ - "${step_thresholds[@]}" \ - --thresholds-reset \ - --file benchmark.json diff --git a/Benchmarks/CheckNatAddComm.lean b/Benchmarks/CheckNatAddComm.lean deleted file mode 100644 index f5748e05..00000000 --- a/Benchmarks/CheckNatAddComm.lean +++ /dev/null @@ -1,43 +0,0 @@ -import Ix.Meta -import Ix.IxVM -import Ix.Aiur.Protocol -import Ix.Aiur.Compiler -import Ix.Benchmark.Bench - -open BgroupM - -def commitmentParameters : Aiur.CommitmentParameters := { - logBlowup := 1 - capHeight := 0 -} - -def friParameters : Aiur.FriParameters := { - logFinalPolyLen := 0 - maxLogArity := 1 - numQueries := 100 - commitProofOfWorkBits := 20 - queryProofOfWorkBits := 0 -} - -def main : IO Unit := do - let .ok toplevel := IxVM.ixVM - | throw (IO.userError "Merging failed") - let .ok compiled := toplevel.compile - | throw (IO.userError "Compilation failed") - let some funIdx := compiled.getFuncIdx `verify_claim - | throw (IO.userError "verify_claim entrypoint missing") - let aiurSystem := Aiur.AiurSystem.build compiled.bytecode commitmentParameters - - let env ← get_env! - let ixonEnv ← IxVM.ClaimHarness.loadIxonEnv ``Nat.add_comm env - let target ← IxVM.ClaimHarness.lookupAddr ixonEnv ``Nat.add_comm - -- `assumptions = none` = full transitive typecheck. - let witness ← IO.ofExcept <| IxVM.ClaimHarness.buildClaimWitness ixonEnv - (Ix.Claim.check target none) - - let _ ← bgroup "Kernel typechecking" { oneShot := true } do - throughput (.Elements ixonEnv.consts.size.toUInt64 "consts") - bench "check Nat.add_comm" - (aiurSystem.prove friParameters funIdx witness.input) - witness.inputIOBuffer - return diff --git a/Benchmarks/Typecheck.lean b/Benchmarks/Typecheck.lean new file mode 100644 index 00000000..ce4c64ca --- /dev/null +++ b/Benchmarks/Typecheck.lean @@ -0,0 +1,261 @@ +import Cli +import Ix.IxVM +import Ix.Aiur.Protocol +import Ix.Aiur.Compiler +import Ix.Aiur.Statistics +import Ix.TracingTexray +import Ix.Benchmark.Bench +import Ix.Cli.NameResolve + +/-! +# Aiur typecheck benchmark + +Out-of-circuit **execution** and in-circuit **proving** benchmark for the IxVM +kernel's `verify_claim` entrypoint, run over constants from a serialized +`Ixon.Env` (`.ixe`). Reading the env from a `.ixe` keeps this on Ix's critical +path (same load `ix check --ixe` uses) and avoids importing Lean modules at +runtime. Useful standalone (per-constant timeline + RAM breakdown via +tracing-texray) and as a machine source (neutral results JSON). + +``` +lake exe bench-typecheck --ixe [names…] [flags] + + --ixe serialized `Ixon.Env`, e.g. from `ix compile Foo.lean` + (writes `foo.ixe`). Required. + [names…] zero or more fully-qualified constant names to benchmark, + e.g. `Nat.add_comm String.append`. + --manifest additionally read names from a file: one per line, blank + lines and `#` comments ignored. Unions with [names…]. + + Names (from either source) resolve against the env's named map via + `String.toName` plus a `toString` fallback (mirrors `ix check --ixe`), so + numeric / private components round-trip (`Foo.0.Bar`, `_private.M.0.foo`). + Pass at least one name or a `--manifest`. + + --json write per-constant results JSON to . Off by default: + normal CLI usage prints only the human-readable summary. + --texray force the tracing-texray timeline + RAM breakdown on. + --no-texray force it off. Default: on iff `--json` was NOT given, so a + plain local run gets the breakdown while a JSON run stays quiet. +``` + +For each constant the harness STARK-checks `Ix.Claim.check addr none` (the full +transitive typecheck) in two phases: + +1. **Execute** (every constant): run the bytecode out-of-circuit. Cheap and + deterministic, so we always record `constants` (closure size), `fft-cost` + (Σ width·height·log2(height) over circuits — the proving-cost proxy), and + `execute-time`. +2. **Prove** (cheap→expensive by measured fft-cost): the end-to-end STARK prove, + recording `prove-time`. With texray on, each prove emits a per-span timeline + (`aiur/execute`, `aiur/witness`, `stark/...`) with RAM Δ/peak to stderr. + +When `--json` is set the file is rewritten after every prove, so an external +`timeout` still leaves a complete file of the results collected so far (cheapest +first). A name absent from the env or whose execution errors is skipped with a +warning, so a single bad name never fails the run. The harness imposes no time +limit; bound a run with an external `timeout` if needed. + +The JSON is a neutral, flat shape (`{ "": { "constants": …, "fft-cost": …, +"execute-time": …, "prove-time": …, "throughput": … } }`, where `prove-time` and +`throughput` appear only for proven constants); any bencher-specific reshaping +is the caller's job (see `.github/workflows/aiur-bench.yml`). +-/ + +open Lean (Json Name) + +def commitmentParameters : Aiur.CommitmentParameters := { + logBlowup := 1 + capHeight := 0 +} + +def friParameters : Aiur.FriParameters := { + logFinalPolyLen := 0 + maxLogArity := 1 + numQueries := 100 + commitProofOfWorkBits := 20 + queryProofOfWorkBits := 0 +} + +/-- Manifest lines as raw strings: one name per line. Everything from a `#` to + end of line is a comment (whole-line or inline); blank lines are dropped. + `#` never appears in a Lean name, so splitting on it is safe. Resolution + against the env happens later (so the `toString` fallback can see the + displayed form the user wrote). -/ +def parseManifest (contents : String) : Array String := + (contents.splitOn "\n").filterMap (fun line => + let s := ((line.splitOn "#").head?.getD "").trimAscii + if s.isEmpty then none else some s.toString) |>.toArray + + +/-- Per-constant measurements. `proveSec` is `none` when the constant was + executed but not (yet) proven. -/ +structure Result where + name : String + constants : Nat + fftCost : Float + executeSec : Float + proveSec : Option Float := none + deriving Inhabited + +/-- Round a Float to `d` decimal places, to keep the emitted JSON readable. -/ +def roundTo (d : Nat) (f : Float) : Float := + let scale := (10.0 : Float) ^ d.toFloat + (f * scale).round / scale + +/-- Neutral, flat results object: `name → { constants, fft-cost, execute-time, + prove-time?, throughput? }`. No bencher-specific shaping. -/ +def Result.toJsonEntry (r : Result) : String × Json := + let base : List (String × Json) := + [ ("constants", Lean.toJson r.constants) + , ("fft-cost", Lean.toJson (roundTo 0 r.fftCost)) + , ("execute-time", Lean.toJson (roundTo 6 r.executeSec)) ] + -- prove-time and the derived proving throughput (constants/prove-time, the + -- proving analog of compile's constants/sec) are present only once proven. + let fields := match r.proveSec with + | some p => base ++ [ ("prove-time", Lean.toJson (roundTo 6 p)) + , ("throughput", Lean.toJson (roundTo 2 (r.constants.toFloat / p))) ] + | none => base + (r.name, Json.mkObj fields) + +/-- Time a thunk, returning its value and the elapsed seconds. The result is + forced by `blackBoxIO` so a pure computation isn't optimized away. -/ +def timed (f : Unit → α) : IO (α × Float) := do + let t0 ← IO.monoNanosNow + let a ← blackBoxIO f () + let t1 ← IO.monoNanosNow + return (a, (t1 - t0).toFloat / 1e9) + +def runTypecheckCmd (p : Cli.Parsed) : IO UInt32 := do + let some ixeArg := p.flag? "ixe" + | IO.eprintln "error: --ixe is required"; return 1 + let ixePath := ixeArg.as! String + -- Names come from the variadic positional args and/or a `--manifest` file. + let cliNames := p.variableArgsAs! String + let fileNames ← match p.flag? "manifest" with + | some f => pure (parseManifest (← IO.FS.readFile (f.as! String))) + | none => pure #[] + -- Union, preserving first-seen order, so the same const isn't proven twice. + let nameArgs := Id.run do + let mut seen : Std.HashSet String := {} + let mut acc : Array String := #[] + for n in cliNames ++ fileNames do + if !seen.contains n then seen := seen.insert n; acc := acc.push n + return acc + if nameArgs.isEmpty then + IO.eprintln "error: provide one or more constant names and/or --manifest " + return 1 + let jsonOut : Option String := (p.flag? "json").map (·.as! String) + -- subject-only: check just the target (`verify_const`, trusting its deps) + -- instead of re-checking the whole transitive closure (`verify_claim`). + let subjectOnly := p.hasFlag "subject-only" + -- Default: trace iff we're not in JSON/bencher mode. + let useTexray := + if p.hasFlag "texray" then true + else if p.hasFlag "no-texray" then false + else jsonOut.isNone + + -- Compile the IxVM kernel once; build the prover system once. + let .ok toplevel := IxVM.ixVM + | throw (IO.userError "Merging IxVM kernel failed") + let .ok compiled := toplevel.compile + | throw (IO.userError "Compilation of IxVM kernel failed") + let entrypoint := if subjectOnly then `verify_const else `verify_claim + let some funIdx := compiled.getFuncIdx entrypoint + | throw (IO.userError s!"{entrypoint} entrypoint missing") + let aiurSystem := Aiur.AiurSystem.build compiled.bytecode commitmentParameters + + -- Load the serialized env lazily (the `ix check --ixe` path, #445): byte-window + -- constants over the backing buffer, so only the checked closure is ever + -- materialized — no whole-env (100+GB on mathlib) load just to check a few. + let ixonEnv ← match Ixon.deEnvAnon (← IO.FS.readBinFile ixePath) with + | .error e => IO.eprintln s!"deserialize {ixePath} failed: {e}"; return 1 + | .ok env => pure env + IO.println s!"Loaded {ixePath}: {ixonEnv.namedCount} named, {ixonEnv.constCount} consts" + -- Build the witness for `addr`: subject-only (`verify_const`) trusts deps; + -- otherwise the full-closure check (`verify_claim`, `check addr none`). + let mkWitness (addr : Address) : IO IxVM.ClaimHarness.ClaimWitness := + if subjectOnly then pure (IxVM.ClaimHarness.buildVerifyConst ixonEnv addr) + else IO.ofExcept (IxVM.ClaimHarness.buildClaimWitness ixonEnv (Ix.Claim.check addr none)) + let mut targets : Array (String × Address) := #[] + for arg in nameArgs do + match Ix.Cli.NameResolve.resolveIxeAddr ixonEnv arg with + | some addr => targets := targets.push (arg, addr) + | none => IO.eprintln s!"warning: {arg} not found in {ixePath}; skipping" + if targets.isEmpty then + IO.eprintln "no requested constants were found in the env" + return 1 + + -- Phase 1: execute every constant (cheap, deterministic structural metrics). + -- Carry each target's address through so phase 2 can rebuild its witness. + IO.println "── Phase 1: execute (witness generation) ──" + let mut execed : Array (Result × Address) := #[] + for (label, addr) in targets do + try + let witness ← mkWitness addr + let (res, execSec) ← timed fun _ => + Aiur.Bytecode.Toplevel.execute compiled.bytecode funIdx + witness.input witness.inputIOBuffer + match res with + | .error e => IO.eprintln s!" execute {label} failed: {e}" + | .ok (_, _, queryCounts) => + let stats := Aiur.computeStats compiled queryCounts + let constants := (IxVM.ClaimHarness.closureFrom ixonEnv addr).size + IO.println s!" {label}: constants={constants} fft-cost={stats.totalFftCost} \ + execute={execSec}s" + execed := execed.push + ({ name := label, constants, fftCost := stats.totalFftCost, executeSec := execSec }, addr) + catch e => + IO.eprintln s!" execute {label} threw: {e}" + + -- Write the neutral results JSON, but only when `--json` was given. Rewritten + -- after each prove so a `timeout` kill still leaves a complete file. + let writeJson (results : Array Result) : IO Unit := + match jsonOut with + | some path => + IO.FS.writeFile path (Json.mkObj (results.map Result.toJsonEntry).toList).pretty + | none => pure () + + -- Phase 2: prove cheap→expensive. Refine each entry with its prove-time as it + -- lands. Install texray first so the prove spans (timeline + RAM Δ/peak) render. + if useTexray then TracingTexray.init {} + IO.println "── Phase 2: prove ──" + let mut ordered := execed.qsort (·.1.fftCost < ·.1.fftCost) + writeJson (ordered.map (·.1)) + let mut spent : Float := 0.0 + for i in [:ordered.size] do + let (r, addr) := ordered[i]! + try + let witness ← mkWitness addr + let (_, proveSec) ← timed fun _ => + aiurSystem.prove friParameters funIdx witness.input witness.inputIOBuffer + spent := spent + proveSec + IO.println s!" {r.name}: prove={proveSec}s (cumulative {spent}s)" + ordered := ordered.set! i ({ r with proveSec := some proveSec }, addr) + writeJson (ordered.map (·.1)) + catch e => + IO.eprintln s!" prove {r.name} threw: {e}" + + match jsonOut with + | some path => IO.println s!"wrote {ordered.size} benchmarks to {path} ({spent}s proving)" + | none => IO.println s!"proved {ordered.size} constants ({spent}s); pass --json to emit results" + return 0 + +def typecheckCmd : Cli.Cmd := `[Cli| + typecheck VIA runTypecheckCmd; + "Benchmark IxVM-kernel execution + proving of `Ix.Claim.check` over `.ixe` constants" + + FLAGS: + "ixe" : String; "Path to a serialized `Ixon.Env` (e.g. produced by `ix compile`). Required." + "manifest" : String; "Additionally read constant names from a file (one per line; `#` comments and blank lines ignored). Unions with the positional names." + "json" : String; "Write per-constant results JSON to this path. Off by default; normal CLI usage prints only the human-readable summary." + "subject-only"; "Check only each target itself (verify_const, trusting its deps) instead of re-checking its whole transitive closure (verify_claim)." + texray; "Force the tracing-texray timeline + RAM breakdown on (per-prove spans on stderr)." + "no-texray"; "Force the breakdown off. Default: on iff --json was not given." + + ARGS: + ...names : String; "Fully-qualified constant name(s) to benchmark (e.g. `Nat.add_comm String.append`). Optional if `--manifest` is given." +] + +def main (args : List String) : IO UInt32 := + typecheckCmd.validate args diff --git a/Cargo.lock b/Cargo.lock index 1d573036..78ba8070 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3886,7 +3886,7 @@ dependencies = [ [[package]] name = "tracing-texray" version = "0.2.0" -source = "git+https://github.com/argumentcomputer/tracing-texray?rev=8ce04e3422cd48e68ef47fab95dba7d06b8c368c#8ce04e3422cd48e68ef47fab95dba7d06b8c368c" +source = "git+https://github.com/argumentcomputer/tracing-texray?rev=31d194dd1bc50458d26f77c89bb68f67e5d1c149#31d194dd1bc50458d26f77c89bb68f67e5d1c149" dependencies = [ "loom", "parking_lot", diff --git a/Cargo.toml b/Cargo.toml index e83b8384..d1ee422a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ n0-error = { version = "0.1", optional = true } getrandom = { version = "0.3", optional = true } tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } -tracing-texray = { git = "https://github.com/argumentcomputer/tracing-texray", rev = "8ce04e3422cd48e68ef47fab95dba7d06b8c368c" } +tracing-texray = { git = "https://github.com/argumentcomputer/tracing-texray", rev = "31d194dd1bc50458d26f77c89bb68f67e5d1c149" } bincode = { version = "2.0.1", optional = true } serde = { version = "1.0.219", features = ["derive"], optional = true } diff --git a/Ix/Cli/AddrOfCmd.lean b/Ix/Cli/AddrOfCmd.lean index b21803fc..da70104e 100644 --- a/Ix/Cli/AddrOfCmd.lean +++ b/Ix/Cli/AddrOfCmd.lean @@ -17,16 +17,13 @@ public import Ix.Environment public import Ix.IxVM.ClaimHarness public import Ix.Ixon public import Ix.Meta +public import Ix.Cli.NameResolve public section -namespace Ix.Cli.AddrOfCmd +open Ix.Cli.NameResolve -private def parseName (arg : String) : Lean.Name := - arg.splitOn "." |>.foldl (init := .anonymous) - fun acc s => match s.toNat? with - | some n => Lean.Name.mkNum acc n - | none => Lean.Name.mkStr acc s +namespace Ix.Cli.AddrOfCmd def runAddrOfCmd (p : Cli.Parsed) : IO UInt32 := do -- Suppress Rust-side `[compile_env]` scheduler noise; the only @@ -34,7 +31,8 @@ def runAddrOfCmd (p : Cli.Parsed) : IO UInt32 := do Std.Internal.UV.System.osSetenv "IX_QUIET" "1" let some nameArg := p.positionalArg? "name" | p.printError "error: must specify "; return 1 - let name := parseName (nameArg.as! String) + let argStr := nameArg.as! String + let name := parseName argStr let ixePath : Option String := (p.flag? "ixe").map (·.as! String) match ixePath with @@ -44,7 +42,7 @@ def runAddrOfCmd (p : Cli.Parsed) : IO UInt32 := do | .error e => IO.eprintln s!"error: failed to deserialize {path}: {e}"; return 1 | .ok env => pure env - match ixonEnv.getAddr? (Ix.Name.fromLeanName name) with + match resolveIxeAddr ixonEnv argStr with | none => IO.eprintln s!"error: {name} not found in {path}"; return 1 | some addr => diff --git a/Ix/Cli/CheckCmd.lean b/Ix/Cli/CheckCmd.lean index 9a2afb24..5f55d2b5 100644 --- a/Ix/Cli/CheckCmd.lean +++ b/Ix/Cli/CheckCmd.lean @@ -32,29 +32,15 @@ public import Ix.IxVM.ClaimHarness public import Ix.Ixon public import Ix.Meta public import Ix.Store +public import Ix.Cli.NameResolve public section open IxVM.ClaimHarness +open Ix.Cli.NameResolve namespace Ix.Cli.CheckCmd -def parseName (arg : String) : Lean.Name := - arg.splitOn "." |>.foldl (init := .anonymous) - fun acc s => match s.toNat? with - | some n => Lean.Name.mkNum acc n - | none => Lean.Name.mkStr acc s - -/-- Resolve a CLI name argument against the env. `parseName` can't rebuild - private names (`_private.M.0.foo`) — the marker/scope-index components - don't round-trip through naive dot-splitting. So if the parsed name - isn't present, fall back to matching the arg against each constant's - `toString` (the displayed form the user copied). -/ -def resolveName (env : Lean.Environment) (arg : String) : Option Lean.Name := - let parsed := parseName arg - if env.constants.contains parsed then some parsed - else (env.constants.toList.find? (fun (k, _) => toString k == arg)).map (·.1) - def addrOfHex! (label : String) (s : String) : IO Address := do match Address.fromString s with | some a => pure a @@ -92,22 +78,6 @@ def loadClaimAndTrees (claimHex : String) : trees := trees.insert r tree return (claim, trees) -/-- Reverse of `Ix.Name.fromLeanName`. Drops the per-node hash. -/ -partial def ixNameToLeanName : Ix.Name → Lean.Name - | .anonymous _ => .anonymous - | .str p s _ => .str (ixNameToLeanName p) s - | .num p n _ => .num (ixNameToLeanName p) n - -/-- Resolve a CLI name argument against a `.ixe` env's named map. Like - `resolveName` for the compiled-in env: `parseName` can't rebuild private - names, so when the direct lookup misses, fall back to matching the arg - against each named constant's displayed `toString`. -/ -def resolveIxeAddr (ixonEnv : Ixon.Env) (arg : String) : Option Address := - match ixonEnv.getAddr? (Ix.Name.fromLeanName (parseName arg)) with - | some a => some a - | none => - (ixonEnv.named.toList.find? (fun (k, _) => toString (ixNameToLeanName k) == arg)).map (·.2.addr) - /-- Build a `ClaimWitness` for the `verify_claim` entrypoint against `Ix.Claim.check addr none` (full transitive typecheck of the target's closure). -/ diff --git a/Ix/Cli/NameResolve.lean b/Ix/Cli/NameResolve.lean new file mode 100644 index 00000000..9c8831d3 --- /dev/null +++ b/Ix/Cli/NameResolve.lean @@ -0,0 +1,58 @@ +/- + Shared name-resolution helpers for the `ix` CLI commands. + + Turning a user-supplied dotted name (`Nat.add_comm`, `_private.M.0.foo`) + into either a `Lean.Name` or a content `Address` is needed by several + commands (`addr-of`, `check`, …) and by benchmark drivers. These helpers + live here — depending only on `Address`/`Environment`/`Ixon` — so callers + don't have to pull in the Aiur typecheck machinery just to resolve a name. +-/ +module +public import Ix.Address +public import Ix.Environment +public import Ix.Ixon + +public section + +namespace Ix.Cli.NameResolve + +/-- Parse a dotted CLI argument into a `Lean.Name`, treating all-digit + components as numeric (`Nat`) name parts and everything else as string + parts. Naive: components that don't round-trip through `.`-splitting + (private markers, macro scopes) won't reconstruct — see `resolveName` + / `resolveIxeAddr` for the `toString` fallback that covers those. -/ +def parseName (arg : String) : Lean.Name := + arg.splitOn "." |>.foldl (init := .anonymous) + fun acc s => match s.toNat? with + | some n => Lean.Name.mkNum acc n + | none => Lean.Name.mkStr acc s + +/-- Resolve a CLI name argument against the env. `parseName` can't rebuild + private names (`_private.M.0.foo`) — the marker/scope-index components + don't round-trip through naive dot-splitting. So if the parsed name + isn't present, fall back to matching the arg against each constant's + `toString` (the displayed form the user copied). -/ +def resolveName (env : Lean.Environment) (arg : String) : Option Lean.Name := + let parsed := parseName arg + if env.constants.contains parsed then some parsed + else (env.constants.toList.find? (fun (k, _) => toString k == arg)).map (·.1) + +/-- Reverse of `Ix.Name.fromLeanName`. Drops the per-node hash. -/ +partial def ixNameToLeanName : Ix.Name → Lean.Name + | .anonymous _ => .anonymous + | .str p s _ => .str (ixNameToLeanName p) s + | .num p n _ => .num (ixNameToLeanName p) n + +/-- Resolve a CLI name argument against a `.ixe` env's named map. Like + `resolveName` for the compiled-in env: `parseName` can't rebuild private + names, so when the direct lookup misses, fall back to matching the arg + against each named constant's displayed `toString`. -/ +def resolveIxeAddr (ixonEnv : Ixon.Env) (arg : String) : Option Address := + match ixonEnv.getAddr? (Ix.Name.fromLeanName (parseName arg)) with + | some a => some a + | none => + (ixonEnv.named.toList.find? (fun (k, _) => toString (ixNameToLeanName k) == arg)).map (·.2.addr) + +end Ix.Cli.NameResolve + +end diff --git a/Ix/TracingTexray.lean b/Ix/TracingTexray.lean index e63ebd3f..c0c4c064 100644 --- a/Ix/TracingTexray.lean +++ b/Ix/TracingTexray.lean @@ -5,9 +5,11 @@ (`aiur/execute`, `aiur/witness`, `aiur/prove`) and each STARK proving stage (`stark/...`). After `init` installs the subscriber: * a one-line `[texray] : ── RAM Δ +X peak Y` is streamed - to stderr as each tracked span closes (when `streaming = true`); - * the full texray graph (with RAM block, Linux-only) prints when - `aiur/prove` exits. + to stderr as each tracked span closes (when `streaming = true`), + followed by a companion `[texray] peak-rss-bytes= + ()` line carrying the raw peak in bytes for programmatic + consumers (CI bench, grep); + * the full texray graph prints when `aiur/prove` exits. -/ module diff --git a/lakefile.lean b/lakefile.lean index 5362daa8..b192de93 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -118,8 +118,8 @@ lean_exe «bench-ixvm» where lean_exe «bench-shardmap» where root := `Benchmarks.ShardMap -lean_exe «bench-check-nat-add-comm» where - root := `Benchmarks.CheckNatAddComm +lean_exe «bench-typecheck» where + root := `Benchmarks.Typecheck supportInterpreter := true end Benchmarks diff --git a/src/ffi/texray.rs b/src/ffi/texray.rs index 85486129..77d91b5b 100644 --- a/src/ffi/texray.rs +++ b/src/ffi/texray.rs @@ -2,8 +2,10 @@ //! //! With `streaming = true`, each tracked span (e.g. `aiur/execute`, //! `aiur/witness`, `aiur/prove`) emits a one-line `[texray] : -//! ── RAM Δ +X peak Y` to stderr as it closes, and the full texray -//! graph prints when the examined root span exits. +//! ── RAM Δ +X peak Y` to stderr as it closes, followed by a companion +//! `[texray] peak-rss-bytes= ()` line (raw bytes for +//! CI/grep). The full texray graph prints when the examined root span +//! exits. use lean_ffi::object::{LeanBorrowed, LeanIOResult, LeanOwned, LeanString}; use tracing_subscriber::{ @@ -22,8 +24,9 @@ use tracing_subscriber::{ /// everything, including upstream library spans. /// - `track_ram`: sample VmRSS/VmHWM per span. Linux-only; zeros elsewhere. /// - `streaming`: emit per-span `[texray] : ── RAM Δ +X peak Y` -/// lines on stderr as each span closes. The texray graph prints either -/// way when the examined root span exits. +/// plus a `[texray] peak-rss-bytes= ()` companion on +/// stderr as each span closes. The texray graph prints either way when +/// the examined root span exits. #[unsafe(no_mangle)] extern "C" fn rs_texray_init( name_prefixes: LeanString>,