diff --git a/.cargo/config.toml b/.cargo/config.toml
new file mode 100644
index 000000000..1831b0a74
--- /dev/null
+++ b/.cargo/config.toml
@@ -0,0 +1,2 @@
+[alias] # command aliases
+ci = ["run", "--quiet", "--package=hyperlight-ci", "--"]
\ No newline at end of file
diff --git a/.github/workflows/ValidatePullRequest.yml b/.github/workflows/ValidatePullRequest.yml
index 2f6294476..b212504e3 100644
--- a/.github/workflows/ValidatePullRequest.yml
+++ b/.github/workflows/ValidatePullRequest.yml
@@ -15,7 +15,7 @@ concurrency:
permissions:
contents: write
- pull-requests: read
+ pull-requests: write
jobs:
docs-pr:
@@ -140,6 +140,86 @@ jobs:
docs_only: ${{ needs.docs-pr.outputs.docs-only }}
secrets: inherit
+ # Run benchmarks and post results as PR comment
+ benchmarks:
+ needs:
+ - docs-pr
+ - build-guests
+ # Required because update-guest-locks is skipped on non-dependabot PRs,
+ # and a skipped dependency transitively skips all downstream jobs.
+ # See: https://github.com/actions/runner/issues/2205
+ if: ${{ !cancelled() && !failure() }}
+ strategy:
+ fail-fast: false
+ matrix:
+ hypervisor: ['hyperv-ws2025', mshv3, kvm]
+ cpu: [amd, intel]
+ uses: ./.github/workflows/dep_benchmarks.yml
+ secrets: inherit
+ with:
+ docs_only: ${{ needs.docs-pr.outputs.docs-only }}
+ hypervisor: ${{ matrix.hypervisor }}
+ cpu: ${{ matrix.cpu }}
+
+ # Collect all benchmark reports and post a single combined PR comment
+ benchmark-comment:
+ needs: benchmarks
+ if: ${{ !cancelled() && !failure() }}
+ runs-on: ubuntu-latest
+ permissions:
+ pull-requests: write
+ steps:
+ - name: Download all benchmark reports
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+ with:
+ pattern: benchmark-report_*
+ path: reports/
+
+ - name: Post combined benchmark results to PR
+ uses: actions/github-script@v9
+ with:
+ script: |
+ const fs = require('fs');
+ const path = require('path');
+
+ const reportsDir = 'reports';
+ if (!fs.existsSync(reportsDir)) {
+ console.log('No benchmark reports found, skipping comment.');
+ return;
+ }
+
+ // Collect all report files from subdirectories
+ const sections = [];
+ const dirs = fs.readdirSync(reportsDir).sort();
+ for (const dir of dirs) {
+ const mdPath = path.join(reportsDir, dir, 'benchmark.md');
+ if (!fs.existsSync(mdPath)) continue;
+
+ // Extract hypervisor/cpu from artifact name: benchmark-report_OS_hypervisor_cpu
+ const parts = dir.replace('benchmark-report_', '').split('_');
+ const os = parts[0];
+ const hypervisor = parts.slice(1, -1).join('_');
+ const cpu = parts[parts.length - 1];
+ const label = `${hypervisor} / ${cpu} (${os})`;
+
+ const content = fs.readFileSync(mdPath, 'utf8').trim();
+ sections.push(`\n${label}
\n\n${content}\n\n `);
+ }
+
+ if (sections.length === 0) {
+ console.log('No benchmark report content found, skipping comment.');
+ return;
+ }
+
+ const body = `## Benchmark Results\n\n${sections.join('\n\n')}`;
+
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: context.issue.number,
+ body: body,
+ });
+
spelling:
name: spell check with typos
runs-on: ubuntu-latest
@@ -167,6 +247,8 @@ jobs:
- build-test
- run-examples
- fuzzing
+ - benchmarks
+ - benchmark-comment
- spelling
- license-headers
if: always()
diff --git a/.github/workflows/dep_benchmarks.yml b/.github/workflows/dep_benchmarks.yml
index b0c47be76..c4420b117 100644
--- a/.github/workflows/dep_benchmarks.yml
+++ b/.github/workflows/dep_benchmarks.yml
@@ -56,7 +56,6 @@ on:
required: false
type: number
default: 5
-
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: full
@@ -133,7 +132,17 @@ jobs:
continue-on-error: true
- name: Run benchmarks
- run: just bench-ci main
+ run: just bench-ci
+
+ - name: Create benchmarks report
+ run: cargo ci bench-report > target/criterion/benchmark.md
+
+ - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+ with:
+ name: benchmark-report_${{ runner.os }}_${{ inputs.hypervisor }}_${{ inputs.cpu }}
+ path: target/criterion/benchmark.md
+ if-no-files-found: error
+ retention-days: 1
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
diff --git a/Cargo.lock b/Cargo.lock
index ba73df16d..f7f485cba 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -58,18 +58,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
-name = "anstream"
-version = "0.6.21"
+name = "ansi-replace"
+version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+checksum = "7f8b155ab93213f41c886d3a46e335258428e52c7cf868e25cf099d50274496d"
dependencies = [
- "anstyle",
- "anstyle-parse 0.2.7",
- "anstyle-query",
- "anstyle-wincon",
- "colorchoice",
- "is_terminal_polyfill",
- "utf8parse",
+ "regex",
+ "stable-pattern",
]
[[package]]
@@ -79,7 +74,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
dependencies = [
"anstyle",
- "anstyle-parse 1.0.0",
+ "anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
@@ -93,15 +88,6 @@ version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
-[[package]]
-name = "anstyle-parse"
-version = "0.2.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
-dependencies = [
- "utf8parse",
-]
-
[[package]]
name = "anstyle-parse"
version = "1.0.0"
@@ -117,7 +103,7 @@ version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
dependencies = [
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -128,7 +114,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
dependencies = [
"anstyle",
"once_cell_polyfill",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -499,25 +485,38 @@ dependencies = [
[[package]]
name = "clap"
-version = "4.5.58"
+version = "4.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806"
+checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51"
dependencies = [
"clap_builder",
+ "clap_derive",
]
[[package]]
name = "clap_builder"
-version = "4.5.58"
+version = "4.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2"
+checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
dependencies = [
- "anstream 0.6.21",
+ "anstream",
"anstyle",
"clap_lex",
"strsim",
]
+[[package]]
+name = "clap_derive"
+version = "4.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "clap_lex"
version = "1.0.0"
@@ -530,6 +529,19 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+[[package]]
+name = "console"
+version = "0.15.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
+dependencies = [
+ "encode_unicode",
+ "libc",
+ "once_cell",
+ "unicode-width",
+ "windows-sys 0.59.0",
+]
+
[[package]]
name = "constant_time_eq"
version = "0.4.2"
@@ -609,6 +621,19 @@ dependencies = [
"cfg-if",
]
+[[package]]
+name = "cpu-pin"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb5bc1be026f7f066429ce0611e23a341db91b91ed701de4a1432d01d3ed1105"
+dependencies = [
+ "libc",
+ "mach2 0.6.0",
+ "once_cell",
+ "tokio",
+ "windows",
+]
+
[[package]]
name = "cpufeatures"
version = "0.2.17"
@@ -769,7 +794,7 @@ dependencies = [
"libc",
"option-ext",
"redox_users",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -815,6 +840,12 @@ dependencies = [
"zerocopy",
]
+[[package]]
+name = "encode_unicode"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
+
[[package]]
name = "endian-type"
version = "0.1.2"
@@ -837,7 +868,7 @@ version = "0.11.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a"
dependencies = [
- "anstream 1.0.0",
+ "anstream",
"anstyle",
"env_filter",
"jiff",
@@ -857,7 +888,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -1190,7 +1221,7 @@ dependencies = [
"gobject-sys",
"libc",
"system-deps",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -1359,6 +1390,12 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
[[package]]
name = "http"
version = "1.4.0"
@@ -1456,6 +1493,23 @@ dependencies = [
"tracing",
]
+[[package]]
+name = "hyperlight-ci"
+version = "0.0.0"
+dependencies = [
+ "ansi-replace",
+ "anyhow",
+ "clap",
+ "cpu-pin",
+ "indicatif",
+ "num_cpus",
+ "regex",
+ "serde",
+ "serde_json",
+ "simple-pool",
+ "tokio",
+]
+
[[package]]
name = "hyperlight-common"
version = "0.15.0"
@@ -1626,7 +1680,7 @@ dependencies = [
"vmm-sys-util",
"windows",
"windows-result",
- "windows-sys",
+ "windows-sys 0.61.2",
"windows-version",
]
@@ -1820,6 +1874,19 @@ dependencies = [
"serde_core",
]
+[[package]]
+name = "indicatif"
+version = "0.17.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
+dependencies = [
+ "console",
+ "number_prefix",
+ "portable-atomic",
+ "unicode-width",
+ "web-time",
+]
+
[[package]]
name = "ipnet"
version = "2.11.0"
@@ -2105,6 +2172,12 @@ dependencies = [
"libc",
]
+[[package]]
+name = "mach2"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dae608c151f68243f2b000364e1f7b186d9c29845f7d2d85bd31b9ad77ad552b"
+
[[package]]
name = "macho-unwind-info"
version = "0.5.0"
@@ -2216,7 +2289,7 @@ checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
dependencies = [
"libc",
"wasi",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -2280,7 +2353,7 @@ version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -2292,6 +2365,16 @@ dependencies = [
"autocfg",
]
+[[package]]
+name = "num_cpus"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
[[package]]
name = "num_enum"
version = "0.7.5"
@@ -2313,6 +2396,12 @@ dependencies = [
"syn",
]
+[[package]]
+name = "number_prefix"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
+
[[package]]
name = "object"
version = "0.39.0"
@@ -2324,6 +2413,12 @@ dependencies = [
"ruzstd",
]
+[[package]]
+name = "object-id"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c587bd1cd63959a8520442afc0f92a875d83deea175c7b48dd9f104a2c5070a9"
+
[[package]]
name = "once_cell"
version = "1.21.4"
@@ -2785,7 +2880,7 @@ dependencies = [
"bindgen 0.70.1",
"libc",
"libproc",
- "mach2",
+ "mach2 0.4.3",
"winapi",
]
@@ -3152,7 +3247,7 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -3363,6 +3458,16 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+[[package]]
+name = "simple-pool"
+version = "0.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "073382259dbeb56c3eaab04a1d330459f6490d1e518b2a8ee441c8bd00dbc092"
+dependencies = [
+ "object-id",
+ "parking_lot",
+]
+
[[package]]
name = "sketches-ddsketch"
version = "0.3.0"
@@ -3388,7 +3493,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
dependencies = [
"libc",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -3406,6 +3511,15 @@ dependencies = [
"lock_api",
]
+[[package]]
+name = "stable-pattern"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4564168c00635f88eaed410d5efa8131afa8d8699a612c80c455a0ba05c21045"
+dependencies = [
+ "memchr",
+]
+
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
@@ -3478,7 +3592,7 @@ dependencies = [
"getrandom 0.4.1",
"once_cell",
"rustix",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -3553,7 +3667,7 @@ dependencies = [
"signal-hook-registry",
"socket2",
"tokio-macros",
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -3913,6 +4027,12 @@ version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
[[package]]
name = "unicode-xid"
version = "0.2.6"
@@ -4195,7 +4315,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [
- "windows-sys",
+ "windows-sys 0.61.2",
]
[[package]]
@@ -4305,6 +4425,15 @@ dependencies = [
"windows-link",
]
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
[[package]]
name = "windows-sys"
version = "0.61.2"
@@ -4314,6 +4443,22 @@ dependencies = [
"windows-link",
]
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
[[package]]
name = "windows-threading"
version = "0.2.1"
@@ -4332,6 +4477,54 @@ dependencies = [
"windows-link",
]
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
[[package]]
name = "winnow"
version = "0.7.14"
diff --git a/Cargo.toml b/Cargo.toml
index e9b69f40d..6650dcbf9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,7 @@ default-members = [
"src/hyperlight_testing",
]
members = [
+ "src/hyperlight_ci",
"src/hyperlight_common",
"src/hyperlight_guest",
"src/hyperlight_host",
diff --git a/Justfile b/Justfile
index 401897425..9b6846289 100644
--- a/Justfile
+++ b/Justfile
@@ -177,7 +177,7 @@ run-examples-like-ci config=default-target hypervisor="kvm":
benchmarks-like-ci config=default-target hypervisor="kvm":
@# Run benchmarks
- {{ if config == "release" { "just bench-ci main" } else { "" } }}
+ {{ if config == "release" { "just bench-ci" } else { "" } }}
fuzz-like-ci target config=default-target hypervisor="kvm":
@# Run Fuzzing
@@ -400,13 +400,12 @@ bench-download os hypervisor cpu tag="":
tar -zxvf target/benchmarks_{{ os }}_{{ hypervisor }}_{{ cpu }}.tar.gz -C target/criterion/ --strip-components=1
# Warning: compares to and then OVERWRITES the given baseline
-bench-ci baseline features="":
- @# Benchmarks are always run with release builds for meaningful results
- cargo bench --profile=release {{ if features =="" {''} else { "--features " + features } }} -- --verbose --save-baseline {{ baseline }}
+bench-ci features="":
+ cargo ci bench --no-progress {{ if features == "" {''} else { "--features " + features } }}
bench features="":
@# Benchmarks are always run with release builds for meaningful results
- cargo bench --profile=release {{ if features =="" {''} else { "--features " + features } }} -- --verbose
+ cargo ci bench {{ if features == "" {''} else { "--features " + features } }}
###############
### FUZZING ###
diff --git a/docs/benchmarking-hyperlight.md b/docs/benchmarking-hyperlight.md
index dd28c6ea8..2fb931011 100644
--- a/docs/benchmarking-hyperlight.md
+++ b/docs/benchmarking-hyperlight.md
@@ -72,6 +72,6 @@ Found 1 outliers among 100 measurements (1.00%)
## Running benchmarks locally
-Use `just bench` to run benchmarks with release builds (the only supported configuration). Comparing local benchmark results to github-saved benchmarks doesn't make much sense, since you'd be using different hardware, but you can use `just bench-download os hypervisor [tag] ` to download and extract the GitHub release benchmarks to the correct place folder. You can then run `just bench-ci main` to compare to (and overwrite) the previous release benchmarks. Note that `main` is the name of the baselines stored in GitHub.
+Use `just bench` to run benchmarks with release builds (the only supported configuration). Comparing local benchmark results to github-saved benchmarks doesn't make much sense, since you'd be using different hardware, but you can use `just bench-download os hypervisor [tag] ` to download and extract the GitHub release benchmarks to the correct place folder. You can then run `just bench-ci` to compare to (and overwrite) the previous release benchmarks. The name of the baselines stored in GitHub is `base`.
**Important**: The `just bench` command uses release builds by default to ensure meaningful performance measurements. For profiling purposes, you can compile benchmarks with debug symbols by running `cargo bench` directly.
diff --git a/src/hyperlight_ci/Cargo.toml b/src/hyperlight_ci/Cargo.toml
new file mode 100644
index 000000000..fa5555a9f
--- /dev/null
+++ b/src/hyperlight_ci/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "hyperlight-ci"
+edition = "2021"
+# fields intentionally not set, to avoid accidentally publishing this crate to crates.io
+description = """
+Hyperlight's CI and development tools.
+"""
+
+[lints]
+workspace = true
+
+[dependencies]
+anyhow = "1"
+clap = { version = "4.6.1", features = ["derive"] }
+indicatif = "0.17"
+num_cpus = "1"
+tokio = { version = "1", features = ["rt", "process", "io-util", "sync", "macros"] }
+ansi-replace = "0.1"
+regex = "1"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+simple-pool = "0.0.18"
+cpu-pin = { version = "0.1.3", features = ["tokio"] }
\ No newline at end of file
diff --git a/src/hyperlight_ci/src/bench/args.rs b/src/hyperlight_ci/src/bench/args.rs
new file mode 100644
index 000000000..3aa92a46f
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/args.rs
@@ -0,0 +1,52 @@
+use std::path::PathBuf;
+
+use clap::Args;
+
+use crate::bench::cpu::PerformanceCoresPool;
+
+/// Command-line arguments for the `bench` subcommand.
+#[derive(Args)]
+pub struct BenchArgs {
+ /// Filter benchmarks by name (substring match, or exact with --exact)
+ pub filter: Option,
+
+ /// Match the filter exactly instead of as a substring
+ #[arg(long)]
+ pub exact: bool,
+
+ /// Pre-built benchmark binary to use (skip build step; can be specified multiple times)
+ #[arg(long)]
+ pub binary: Vec,
+
+ /// Number of benchmarks to run in parallel (0 = all CPUs, default: 0)
+ #[arg(long, short, default_value_t = 0)]
+ pub jobs: usize,
+
+ /// Reduce output verbosity (repeatable: -q hides stderr, -qq hides everything)
+ #[arg(short, long, action = clap::ArgAction::Count)]
+ pub quiet: u8,
+
+ /// Disable progress bar (auto-detected: shown only on TTY)
+ #[arg(long)]
+ pub no_progress: bool,
+
+ /// Additional features to pass to cargo criterion
+ #[arg(short = 'F', long, default_value = "")]
+ pub features: String,
+}
+
+impl BenchArgs {
+ /// Determine the maximum number of parallel benchmark jobs.
+ pub fn max_jobs(&self) -> usize {
+ match self.jobs {
+ 0 => PerformanceCoresPool::num_cores(),
+ j => j,
+ }
+ }
+
+ /// Whether progress bars should be displayed.
+ pub fn use_progress(&self) -> bool {
+ use std::io::IsTerminal;
+ !self.no_progress && std::io::stderr().is_terminal() && self.quiet < 2
+ }
+}
diff --git a/src/hyperlight_ci/src/bench/cpu.rs b/src/hyperlight_ci/src/bench/cpu.rs
new file mode 100644
index 000000000..5ae6f7a9a
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/cpu.rs
@@ -0,0 +1,71 @@
+//! CPU core discovery and pool management for benchmark isolation.
+//!
+//! Discovers performance cores (P-cores) on the system and provides a pool
+//! that allows benchmarks to be pinned to specific cores, avoiding interference
+//! from concurrent workloads.
+
+use std::sync::{Arc, LazyLock};
+
+use anyhow::{Result, bail};
+use cpu_pin::CpuInfo;
+use simple_pool::{ResourcePool, ResourcePoolGuard};
+
+/// Lazily discovered list of performance cores on the system.
+///
+/// Filters for cores that are marked as `Performance` type and have the maximum
+/// number of logical processors (i.e., full-featured P-cores with hyperthreading,
+/// excluding any asymmetric E-cores).
+static PERFORMANCE_CORES: LazyLock> = LazyLock::new(|| {
+ cpu_pin::topology()
+ .expect("failed to detect CPU topology")
+ .best_cores()
+});
+
+/// A pool of performance cores that can be claimed by benchmark tasks.
+///
+/// Each benchmark acquires a core from the pool before running, ensuring
+/// no two benchmarks share the same physical core simultaneously.
+#[derive(Clone)]
+pub struct PerformanceCoresPool {
+ pool: Arc>,
+}
+
+impl PerformanceCoresPool {
+ /// Returns the total number of performance cores available on the system.
+ pub fn num_cores() -> usize {
+ PERFORMANCE_CORES.len()
+ }
+
+ /// Creates a new pool with up to `size` performance cores.
+ ///
+ /// Returns an error if `size` exceeds the number of available performance cores.
+ pub fn new(size: usize) -> Result {
+ if size > PERFORMANCE_CORES.len() {
+ bail!(
+ "Requested more performance cores than available: requested {size}, available {}",
+ PERFORMANCE_CORES.len()
+ );
+ }
+
+ let pool = Arc::new(ResourcePool::new());
+ for core in PERFORMANCE_CORES.iter().take(size) {
+ pool.append((*core).clone());
+ }
+
+ Ok(Self { pool })
+ }
+
+ /// Acquires a performance core from the pool, waiting if none are available.
+ ///
+ /// The core is returned to the pool when the guard is dropped.
+ pub async fn get(&self) -> ResourcePoolGuard {
+ self.pool.get().await
+ }
+}
+
+impl Default for PerformanceCoresPool {
+ /// Creates a pool containing all available performance cores.
+ fn default() -> Self {
+ Self::new(Self::num_cores()).unwrap()
+ }
+}
diff --git a/src/hyperlight_ci/src/bench/discovery.rs b/src/hyperlight_ci/src/bench/discovery.rs
new file mode 100644
index 000000000..147579d28
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/discovery.rs
@@ -0,0 +1,125 @@
+use std::path::{Path, PathBuf};
+
+use anyhow::{Context, Result, bail};
+use tokio::process::Command;
+use std::process::Stdio;
+
+/// Discovers available benchmarks by querying the benchmark binary.
+pub struct BenchmarkDiscovery {
+ features: String,
+ filter: Option,
+ exact: bool,
+}
+
+impl BenchmarkDiscovery {
+ /// Create a new discovery instance with the given parameters.
+ pub fn new(features: &str, filter: Option<&str>, exact: bool) -> Self {
+ Self {
+ features: features.to_string(),
+ filter: filter.map(|s| s.to_string()),
+ exact,
+ }
+ }
+
+ /// Build all benchmark binaries and return their paths.
+ pub async fn build(&self) -> Result> {
+ let mut cmd = Command::new("cargo");
+ cmd.args([
+ "build",
+ "--release",
+ "--benches",
+ "--message-format=json",
+ ]);
+ if !self.features.is_empty() {
+ cmd.args(["--features", &self.features]);
+ }
+ cmd.stdout(Stdio::piped());
+ cmd.stderr(Stdio::piped());
+
+ let output = cmd
+ .output()
+ .await
+ .context("Failed to run cargo build for benchmarks")?;
+
+ if !output.status.success() {
+ let stderr = String::from_utf8_lossy(&output.stderr);
+ bail!("Failed to build benchmarks:\n{stderr}");
+ }
+
+ let stdout = String::from_utf8_lossy(&output.stdout);
+ let mut binaries = Vec::new();
+
+ // Parse cargo's JSON output to find all benchmark binary paths
+ for line in stdout.lines() {
+ let Ok(msg) = serde_json::from_str::(line) else {
+ continue;
+ };
+ if msg.get("reason").and_then(|r| r.as_str()) != Some("compiler-artifact") {
+ continue;
+ }
+ let is_bench = msg
+ .get("target")
+ .and_then(|t| t.get("kind"))
+ .and_then(|k| k.as_array())
+ .is_some_and(|kinds| kinds.iter().any(|k| k.as_str() == Some("bench")));
+ if !is_bench {
+ continue;
+ }
+ if let Some(filenames) = msg.get("filenames").and_then(|f| f.as_array()) {
+ for f in filenames {
+ if let Some(path) = f.as_str() {
+ // Skip non-executable artifacts:
+ // .d = dep-info files (all platforms)
+ // .pdb = debug symbols (Windows)
+ // .dSYM = debug symbol bundles (macOS)
+ // .dwp = DWARF packages (Linux, split-debuginfo)
+ // .lib = import libraries (Windows)
+ // .exp = export files (Windows)
+ let dominated = [".d", ".pdb", ".dSYM", ".dwp", ".lib", ".exp"];
+ if dominated.iter().any(|ext| path.ends_with(ext)) {
+ continue;
+ }
+ binaries.push(PathBuf::from(path));
+ }
+ }
+ }
+ }
+
+ if binaries.is_empty() {
+ bail!("No benchmark binaries found in cargo build output");
+ }
+
+ Ok(binaries)
+ }
+
+ /// List all benchmark names matching the configured filter.
+ pub async fn list(&self, binary: &Path) -> Result> {
+ let mut cmd = Command::new(binary);
+ cmd.args(["--bench", "--list"]);
+ if self.exact {
+ cmd.arg("--exact");
+ }
+ if let Some(filter) = &self.filter {
+ cmd.arg(filter);
+ }
+ cmd.stdout(Stdio::piped());
+ cmd.stderr(Stdio::null());
+
+ let output = cmd
+ .output()
+ .await
+ .with_context(|| format!("Failed to run {} --bench --list", binary.display()))?;
+ let stdout = String::from_utf8_lossy(&output.stdout);
+
+ let benches: Vec = stdout
+ .lines()
+ .filter_map(|line| {
+ let line = line.trim();
+ let line = line.strip_suffix(": benchmark")?;
+ Some(line.to_string())
+ })
+ .collect();
+
+ Ok(benches)
+ }
+}
diff --git a/src/hyperlight_ci/src/bench/mod.rs b/src/hyperlight_ci/src/bench/mod.rs
new file mode 100644
index 000000000..a23914898
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/mod.rs
@@ -0,0 +1,67 @@
+//! The `bench` subcommand: discovers, runs, and reports on criterion benchmarks
+//! using the benchmark binary directly.
+
+mod args;
+mod discovery;
+mod output;
+mod process;
+mod progress;
+mod runner;
+mod cpu;
+
+pub use args::BenchArgs;
+
+use anyhow::{Context, Result};
+
+use self::discovery::BenchmarkDiscovery;
+use self::runner::BenchRunner;
+
+/// Entry point for the bench subcommand. Builds a single-threaded tokio runtime
+/// and delegates to the async implementation.
+pub fn run(args: BenchArgs) -> Result<()> {
+ tokio::runtime::Builder::new_current_thread()
+ .enable_all()
+ .build()
+ .context("Failed to build tokio runtime")?
+ .block_on(run_async(args))
+}
+
+async fn run_async(args: BenchArgs) -> Result<()> {
+ let discovery = BenchmarkDiscovery::new(&args.features, args.filter.as_deref(), args.exact);
+
+ let binaries = if args.binary.is_empty() {
+ if args.quiet < 2 {
+ eprintln!("Building benchmarks ...");
+ }
+ discovery.build().await?
+ } else {
+ args.binary.clone()
+ };
+
+ let mut benches = Vec::new();
+ for binary in &binaries {
+ for name in discovery.list(binary).await? {
+ benches.push((binary.clone(), name));
+ }
+ }
+
+ if benches.is_empty() {
+ anyhow::bail!("No benchmarks found");
+ }
+
+ let max_jobs = args.max_jobs();
+ let use_progress = args.use_progress();
+
+ if args.quiet < 2 {
+ eprintln!(
+ "Running {} benchmark(s) with parallelism {}",
+ benches.len(),
+ max_jobs
+ );
+ }
+
+ let runner = BenchRunner::new(max_jobs, args.quiet, use_progress);
+ runner.run(&benches).await?;
+
+ Ok(())
+}
diff --git a/src/hyperlight_ci/src/bench/output.rs b/src/hyperlight_ci/src/bench/output.rs
new file mode 100644
index 000000000..636057c86
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/output.rs
@@ -0,0 +1,36 @@
+use std::fmt::Write;
+use std::ops::Range;
+
+use ansi_replace::AnsiExt as _;
+use ansi_replace::replacer::Writable;
+
+/// Returns true if an output line is build noise that should be suppressed.
+pub fn is_noisy_line(line: &str) -> bool {
+ line.contains("waiting for file lock on")
+ || line.contains("Gnuplot not found")
+ || line.contains("`bench` profile [optimized]")
+}
+
+/// Strip the bench name from an output line.
+///
+/// Strategy:
+/// - If the line starts with the bench name, replace it with spaces to preserve alignment
+/// - Any other appearance of the bench name and surrounding whitespace are removed entirely
+/// - ANSI codes are preserved in all cases
+pub fn strip_bench_prefix(line: &str, bench: &str) -> String {
+ let escaped = regex::escape(bench);
+ let pattern = regex::Regex::new(&format!(r" ?{escaped}")).unwrap();
+
+ let result = line.ansi_replace(&pattern, |m: &str, i: Range, dst: &mut Writable| {
+ if i.start == 0 && m == bench {
+ write!(dst, "{:n$}", " ", n = m.len())?;
+ }
+ Ok(())
+ });
+
+ if result.ansi_strip().trim().is_empty() {
+ return String::new();
+ }
+
+ result
+}
diff --git a/src/hyperlight_ci/src/bench/process.rs b/src/hyperlight_ci/src/bench/process.rs
new file mode 100644
index 000000000..f2387d41f
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/process.rs
@@ -0,0 +1,77 @@
+//! Spawns the benchmark binary for a single benchmark and streams its output.
+
+use std::ops::Deref;
+use std::path::Path;
+use std::process::Stdio;
+
+use anyhow::{Context, Result, bail};
+use cpu_pin::{CpuInfo, PinnedCommand as _};
+use tokio::io::{AsyncBufReadExt, BufReader};
+use tokio::process::Command;
+use tokio::sync::mpsc;
+
+/// Output of a completed benchmark process.
+pub struct ProcessOutput {
+ pub output_lines: Vec,
+}
+
+/// Spawns the benchmark binary for a single benchmark.
+///
+/// Streams output lines through `output_tx` as they arrive (for live progress updates),
+/// and returns the collected output when the process exits.
+pub async fn run(
+ bench: &str,
+ binary: &Path,
+ core: impl Deref,
+ output_tx: &mpsc::UnboundedSender,
+) -> Result {
+ let mut cmd = Command::new(binary);
+ cmd.args(["--bench", "--color=always", "--noplot", "--exact"]);
+ cmd.arg(bench);
+ cmd.stdout(Stdio::piped());
+ cmd.stderr(Stdio::piped());
+
+ let core_id = core.logical_cpus.first().unwrap();
+
+ let mut child = cmd
+ .spawn_pinned(*core_id)
+ .with_context(|| format!("Failed to spawn benchmark binary: {}", binary.display()))?;
+
+ let stdout = child.stdout.take().unwrap();
+ let stderr = child.stderr.take().unwrap();
+ let mut reader_stdout = BufReader::new(stdout).lines();
+ let mut reader_stderr = BufReader::new(stderr).lines();
+ let mut output_lines = Vec::new();
+
+ // combine the stream of both stdout and stderr lines
+ // do not exit until both streams have been closed
+ loop {
+ tokio::select! {
+ line = reader_stdout.next_line() => {
+ let Some(line) = line.context("Failed to read stdout")? else { break };
+ let _ = output_tx.send(line.clone());
+ output_lines.push(line);
+ }
+ line = reader_stderr.next_line() => {
+ let Some(line) = line.context("Failed to read stderr")? else { break };
+ let _ = output_tx.send(line.clone());
+ output_lines.push(line);
+ }
+ }
+ }
+
+ let status = child
+ .wait()
+ .await
+ .context("Failed to wait for benchmark binary")?;
+
+ if !status.success() {
+ bail!(
+ "benchmark binary exited with status {} for benchmark '{}'",
+ status,
+ bench
+ );
+ }
+
+ Ok(ProcessOutput { output_lines })
+}
diff --git a/src/hyperlight_ci/src/bench/progress.rs b/src/hyperlight_ci/src/bench/progress.rs
new file mode 100644
index 000000000..137e7181f
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/progress.rs
@@ -0,0 +1,142 @@
+//! Progress bar and spinner management for benchmark output.
+
+use std::collections::{HashMap, HashSet};
+
+use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
+
+use super::output::{is_noisy_line, strip_bench_prefix};
+
+/// Manages progress bars for a set of benchmarks.
+pub struct ProgressTracker {
+ multi: MultiProgress,
+ overall: ProgressBar,
+ spinners: HashMap,
+ /// Benchmarks that have been registered but not yet started (no spinner visible).
+ pending: HashSet,
+ quiet_level: u8,
+ enabled: bool,
+}
+
+impl ProgressTracker {
+ /// Create a new progress tracker.
+ ///
+ /// If `enabled` is false, all operations become no-ops (hidden bars, no output).
+ pub fn new(total: usize, quiet_level: u8, enabled: bool) -> Self {
+ let multi = MultiProgress::new();
+ let overall = if enabled {
+ let bar = multi.add(ProgressBar::new(total as u64));
+ bar.set_style(
+ ProgressStyle::with_template("{prefix} [{bar:40.cyan/blue}] {pos}/{len} ({eta})")
+ .unwrap()
+ .progress_chars("━━─"),
+ );
+ bar.set_prefix("Benchmarks");
+ bar
+ } else {
+ ProgressBar::hidden()
+ };
+
+ Self {
+ multi,
+ overall,
+ spinners: HashMap::new(),
+ pending: HashSet::new(),
+ quiet_level,
+ enabled,
+ }
+ }
+
+ /// Register a benchmark for tracking (spinner stays hidden until it starts running).
+ pub fn add_spinner(&mut self, bench: &str) {
+ if !self.enabled {
+ return;
+ }
+ self.pending.insert(bench.to_string());
+ }
+
+ /// Update the spinner for a benchmark with an output line.
+ ///
+ /// On the first update, the spinner is created and becomes visible.
+ /// Filters noisy lines and strips the benchmark prefix before displaying.
+ pub fn update_spinner(&mut self, bench: &str, line: &str) {
+ if is_noisy_line(line) {
+ return;
+ }
+ // If this is a pending benchmark, create and show its spinner now
+ if self.pending.remove(bench) {
+ let bar = self.multi.insert_before(&self.overall, ProgressBar::new_spinner());
+ bar.set_style(
+ ProgressStyle::with_template(" {spinner:.green} {msg}")
+ .unwrap()
+ .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]),
+ );
+ bar.enable_steady_tick(std::time::Duration::from_millis(100));
+ self.spinners.insert(bench.to_string(), bar);
+ }
+ let Some(spinner) = self.spinners.get(bench) else { return };
+ let display = strip_bench_prefix(line, bench);
+ if !display.is_empty() {
+ spinner.set_message(format!("\x1b[1;32m{bench}\x1b[0m: {display}"));
+ }
+ }
+
+ /// Finish and remove the spinner for a benchmark.
+ pub fn finish_spinner(&mut self, bench: &str) {
+ if let Some(bar) = self.spinners.remove(bench) {
+ bar.finish_and_clear();
+ self.multi.remove(&bar);
+ }
+ }
+
+ /// Advance the overall progress bar by one.
+ pub fn advance(&self, position: u64) {
+ self.overall.set_position(position);
+ }
+
+ /// Print a message respecting the progress system and quiet level.
+ pub fn println(&self, msg: &str) {
+ if self.enabled {
+ let _ = self.multi.println(msg);
+ } else if self.quiet_level < 1 {
+ eprintln!("{msg}");
+ }
+ }
+
+ /// Print the completion summary for a benchmark.
+ ///
+ /// At quiet_level 0, also prints filtered output lines.
+ pub fn print_completion(
+ &self,
+ done_count: usize,
+ total: usize,
+ bench: &str,
+ status: &str,
+ output_lines: &[String],
+ error: Option<&anyhow::Error>,
+ ) {
+ self.println(&format!(
+ "[{done_count}/{total}] \x1b[1;32m{bench}\x1b[0m ... {status}"
+ ));
+
+ if self.quiet_level == 0 {
+ for line in output_lines {
+ if !is_noisy_line(line) {
+ let line = strip_bench_prefix(line, bench);
+ if !line.is_empty() && !line.starts_with("Benchmarking") {
+ self.println(&line);
+ }
+ }
+ }
+ self.println("");
+ }
+
+ if let Some(e) = error {
+ self.println(&format!(" error: {e}"));
+ }
+ }
+
+ /// Finish the overall progress bar.
+ pub fn finish(&self) {
+ self.overall.finish_and_clear();
+ }
+}
diff --git a/src/hyperlight_ci/src/bench/runner.rs b/src/hyperlight_ci/src/bench/runner.rs
new file mode 100644
index 000000000..97a945acc
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/runner.rs
@@ -0,0 +1,188 @@
+//! Orchestrates parallel benchmark execution, wiring together process spawning
+//! and progress reporting.
+
+use std::ops::Deref;
+use std::path::{Path, PathBuf};
+
+use anyhow::{Result, bail};
+use cpu_pin::CpuInfo;
+
+use super::cpu::PerformanceCoresPool;
+use super::process::{self, ProcessOutput};
+use super::progress::ProgressTracker;
+
+/// Events sent from benchmark tasks to the orchestration loop.
+enum BenchEvent {
+ /// An output line was produced by the given benchmark.
+ OutputLine { bench: String, line: String },
+ /// The benchmark has completed.
+ Done(BenchResult),
+}
+
+/// Result of a single benchmark run, combining identity with output.
+struct BenchResult {
+ bench: String,
+ output_lines: Vec,
+ success: Result<()>,
+}
+
+impl BenchResult {
+ fn status(&self) -> &str {
+ if self.success.is_ok() {
+ "done"
+ } else {
+ "FAILED"
+ }
+ }
+}
+
+/// Orchestrates parallel benchmark execution with progress reporting.
+pub struct BenchRunner {
+ max_jobs: usize,
+ quiet_level: u8,
+ use_progress: bool,
+}
+
+impl BenchRunner {
+ /// Create a new runner with the given configuration.
+ pub fn new(max_jobs: usize, quiet_level: u8, use_progress: bool) -> Self {
+ Self {
+ max_jobs,
+ quiet_level,
+ use_progress,
+ }
+ }
+
+ /// Run all benchmarks in parallel.
+ ///
+ /// Each entry is a (binary_path, benchmark_name) pair.
+ ///
+ /// Quiet levels:
+ /// - 0: show progress, completion headers, and per-benchmark output
+ /// - 1: show progress and completion headers only (no output details)
+ /// - 2+: fully silent (no progress, no output)
+ pub async fn run(&self, benches: &[(PathBuf, String)]) -> Result<()> {
+ let total = benches.len();
+ let mut tracker = ProgressTracker::new(total, self.quiet_level, self.use_progress);
+
+ if self.max_jobs > PerformanceCoresPool::num_cores() {
+ bail!(
+ "Requested number of jobs {} exceeds available performance cores {}, use --jobs=0 or --quick to use all available performance cores.",
+ self.max_jobs,
+ PerformanceCoresPool::num_cores(),
+ );
+ }
+
+ let pool = PerformanceCoresPool::new(self.max_jobs)?;
+ let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::();
+
+ // Spawn all benchmarks (they'll wait on the semaphore internally)
+ for (binary, bench) in benches {
+ tracker.add_spinner(bench);
+
+ let bench = bench.clone();
+ let binary = binary.clone();
+ let tx = tx.clone();
+ let pool = pool.clone();
+
+ tokio::spawn(async move {
+ let core = pool.get().await;
+ Self::run_one(&bench, &binary, core, &tx).await;
+ });
+ }
+
+ // Drop our sender so rx closes when all tasks finish
+ drop(tx);
+
+ // Process events as they arrive
+ let mut failed = Vec::new();
+ let mut done_count = 0;
+
+ while let Some(event) = rx.recv().await {
+ match event {
+ BenchEvent::OutputLine { bench, line } => {
+ tracker.update_spinner(&bench, &line);
+ }
+ BenchEvent::Done(result) => {
+ done_count += 1;
+ tracker.finish_spinner(&result.bench);
+ tracker.advance(done_count as u64);
+
+ let error = result.success.as_ref().err();
+ tracker.print_completion(
+ done_count,
+ total,
+ &result.bench,
+ result.status(),
+ &result.output_lines,
+ error,
+ );
+
+ if result.success.is_err() {
+ failed.push(result.bench);
+ }
+ }
+ }
+ }
+
+ tracker.finish();
+
+ if !failed.is_empty() {
+ anyhow::bail!(
+ "{} benchmark(s) failed: {}",
+ failed.len(),
+ failed.join(", ")
+ );
+ }
+
+ Ok(())
+ }
+
+ /// Run a single benchmark, streaming output events and sending the final result.
+ async fn run_one(
+ bench: &str,
+ binary: &Path,
+ core: impl Deref,
+ event_tx: &tokio::sync::mpsc::UnboundedSender,
+ ) {
+ // Create a channel for output lines from the process
+ let (output_tx, mut output_rx) = tokio::sync::mpsc::unbounded_channel::();
+ let bench_name = bench.to_string();
+ let event_tx_clone = event_tx.clone();
+
+ // Forward output lines as events
+ let forwarder = tokio::spawn(async move {
+ while let Some(line) = output_rx.recv().await {
+ let _ = event_tx_clone.send(BenchEvent::OutputLine {
+ bench: bench_name.clone(),
+ line,
+ });
+ }
+ });
+
+ // Signal that this benchmark is starting
+ let _ = event_tx.send(BenchEvent::OutputLine {
+ bench: bench.to_string(),
+ line: "Starting ...".to_string(),
+ });
+
+ let result = match process::run(bench, binary, core, &output_tx).await {
+ Ok(ProcessOutput { output_lines }) => BenchResult {
+ bench: bench.to_string(),
+ output_lines,
+ success: Ok(()),
+ },
+ Err(e) => BenchResult {
+ bench: bench.to_string(),
+ output_lines: vec![],
+ success: Err(e),
+ },
+ };
+
+ // Ensure all output forwarding completes before sending Done
+ drop(output_tx);
+ let _ = forwarder.await;
+
+ let _ = event_tx.send(BenchEvent::Done(result));
+ }
+}
diff --git a/src/hyperlight_ci/src/bench_report/mod.rs b/src/hyperlight_ci/src/bench_report/mod.rs
new file mode 100644
index 000000000..619e61319
--- /dev/null
+++ b/src/hyperlight_ci/src/bench_report/mod.rs
@@ -0,0 +1,144 @@
+//! The `bench-report` subcommand: generates a markdown table from existing
+//! criterion benchmark results in `target/criterion/`.
+
+mod table;
+
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::{Context, Result};
+use clap::Args;
+
+/// Command-line arguments for the `bench-report` subcommand.
+#[derive(Args)]
+pub struct BenchReportArgs {
+ /// Filter benchmarks by name (substring match, or exact with --exact)
+ pub filter: Option,
+
+ /// Match the filter exactly instead of as a substring
+ #[arg(long)]
+ pub exact: bool,
+
+ /// Benchmark binary to list benchmarks from (can be specified multiple times).
+ /// When provided, only benchmarks available in these binaries are included.
+ #[arg(long)]
+ pub binary: Vec,
+
+ /// Path to the criterion output directory
+ #[arg(long, default_value = "target/criterion")]
+ pub criterion_dir: PathBuf,
+
+ /// Output file path (default: stdout)
+ #[arg(short, long)]
+ pub output: Option,
+}
+
+/// Entry point for the bench-report subcommand.
+pub fn run(args: BenchReportArgs) -> Result<()> {
+ let allowlist = build_allowlist(&args)?;
+ let allowlist_ref = allowlist.as_deref();
+
+ let markdown = table::render(&args.criterion_dir, allowlist_ref)?;
+
+ if let Some(path) = &args.output {
+ std::fs::write(path, &markdown)?;
+ } else {
+ print!("{markdown}");
+ }
+
+ Ok(())
+}
+
+/// Builds an allowlist of benchmark full_ids by querying binaries and applying the filter.
+///
+/// - If `--binary` is specified, lists benchmarks from each binary.
+/// - If a text filter is specified, applies substring (or exact) matching.
+/// - If neither is specified, returns `None` (include everything).
+fn build_allowlist(args: &BenchReportArgs) -> Result