diff --git a/.cargo/config.toml b/.cargo/config.toml
new file mode 100644
index 000000000..1831b0a74
--- /dev/null
+++ b/.cargo/config.toml
@@ -0,0 +1,2 @@
+[alias] # command aliases
+ci = ["run", "--quiet", "--package=hyperlight-ci", "--"]
\ No newline at end of file
diff --git a/.github/workflows/ValidatePullRequest.yml b/.github/workflows/ValidatePullRequest.yml
index 2f6294476..b212504e3 100644
--- a/.github/workflows/ValidatePullRequest.yml
+++ b/.github/workflows/ValidatePullRequest.yml
@@ -15,7 +15,7 @@ concurrency:
 
 permissions:
   contents: write
-  pull-requests: read
+  pull-requests: write
 
 jobs:
   docs-pr:
@@ -140,6 +140,86 @@ jobs:
       docs_only: ${{ needs.docs-pr.outputs.docs-only }}
     secrets: inherit
 
+  # Run benchmarks and post results as PR comment
+  benchmarks:
+    needs:
+      - docs-pr
+      - build-guests
+    # Required because update-guest-locks is skipped on non-dependabot PRs,
+    # and a skipped dependency transitively skips all downstream jobs.
+    # See: https://github.com/actions/runner/issues/2205
+    if: ${{ !cancelled() && !failure() }}
+    strategy:
+      fail-fast: false
+      matrix:
+        hypervisor: ['hyperv-ws2025', mshv3, kvm]
+        cpu: [amd, intel]
+    uses: ./.github/workflows/dep_benchmarks.yml
+    secrets: inherit
+    with:
+      docs_only: ${{ needs.docs-pr.outputs.docs-only }}
+      hypervisor: ${{ matrix.hypervisor }}
+      cpu: ${{ matrix.cpu }}
+
+  # Collect all benchmark reports and post a single combined PR comment
+  benchmark-comment:
+    needs: benchmarks
+    if: ${{ !cancelled() && !failure() }}
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+    steps:
+      - name: Download all benchmark reports
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
+        with:
+          pattern: benchmark-report_*
+          path: reports/
+
+      - name: Post combined benchmark results to PR
+        uses: actions/github-script@v9
+        with:
+          script: |
+            const fs = require('fs');
+            const path = require('path');
+
+            const reportsDir = 'reports';
+            if (!fs.existsSync(reportsDir)) {
+              console.log('No benchmark reports found, skipping comment.');
+              return;
+            }
+
+            // Collect all report files from subdirectories
+            const sections = [];
+            const dirs = fs.readdirSync(reportsDir).sort();
+            for (const dir of dirs) {
+              const mdPath = path.join(reportsDir, dir, 'benchmark.md');
+              if (!fs.existsSync(mdPath)) continue;
+
+              // Extract hypervisor/cpu from artifact name: benchmark-report_OS_hypervisor_cpu
+              const parts = dir.replace('benchmark-report_', '').split('_');
+              const os = parts[0];
+              const hypervisor = parts.slice(1, -1).join('_');
+              const cpu = parts[parts.length - 1];
+              const label = `${hypervisor} / ${cpu} (${os})`;
+
+              const content = fs.readFileSync(mdPath, 'utf8').trim();
+              sections.push(`<details>\n<summary><b>${label}</b></summary>\n\n${content}\n\n</details>`);
+            }
+
+            if (sections.length === 0) {
+              console.log('No benchmark report content found, skipping comment.');
+              return;
+            }
+
+            const body = `## Benchmark Results\n\n${sections.join('\n\n')}`;
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: body,
+            });
+
   spelling:
     name: spell check with typos
     runs-on: ubuntu-latest
@@ -167,6 +247,8 @@ jobs:
       - build-test
       - run-examples
       - fuzzing
+      - benchmarks
+      - benchmark-comment
       - spelling
       - license-headers
     if: always()
diff --git a/.github/workflows/dep_benchmarks.yml b/.github/workflows/dep_benchmarks.yml
index b0c47be76..c4420b117 100644
--- a/.github/workflows/dep_benchmarks.yml
+++ b/.github/workflows/dep_benchmarks.yml
@@ -56,7 +56,6 @@ on:
         required: false
         type: number
         default: 5
-
 env:
   CARGO_TERM_COLOR: always
   RUST_BACKTRACE: full
@@ -133,7 +132,17 @@ jobs:
         continue-on-error: true
 
       - name: Run benchmarks
-        run: just bench-ci main
+        run: just bench-ci
+
+      - name: Create benchmarks report
+        run: cargo ci bench-report > target/criterion/benchmark.md
+
+      - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
+        with:
+          name: benchmark-report_${{ runner.os }}_${{ inputs.hypervisor }}_${{ inputs.cpu }}
+          path: target/criterion/benchmark.md
+          if-no-files-found: error
+          retention-days: 1
 
       - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
         with:
diff --git a/Cargo.lock b/Cargo.lock
index ba73df16d..f7f485cba 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -58,18 +58,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
-name = "anstream"
-version = "0.6.21"
+name = "ansi-replace"
+version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+checksum = "7f8b155ab93213f41c886d3a46e335258428e52c7cf868e25cf099d50274496d"
 dependencies = [
- "anstyle",
- "anstyle-parse 0.2.7",
- "anstyle-query",
- "anstyle-wincon",
- "colorchoice",
- "is_terminal_polyfill",
- "utf8parse",
+ "regex",
+ "stable-pattern",
 ]
 
 [[package]]
@@ -79,7 +74,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
 dependencies = [
  "anstyle",
- "anstyle-parse 1.0.0",
+ "anstyle-parse",
  "anstyle-query",
  "anstyle-wincon",
  "colorchoice",
@@ -93,15 +88,6 @@ version = "1.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
 
-[[package]]
-name = "anstyle-parse"
-version = "0.2.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
-dependencies = [
- "utf8parse",
-]
-
 [[package]]
 name = "anstyle-parse"
 version = "1.0.0"
@@ -117,7 +103,7 @@ version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -128,7 +114,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
 dependencies = [
  "anstyle",
  "once_cell_polyfill",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -499,25 +485,38 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.58"
+version = "4.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806"
+checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51"
 dependencies = [
  "clap_builder",
+ "clap_derive",
 ]
 
 [[package]]
 name = "clap_builder"
-version = "4.5.58"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2"
+checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
 dependencies = [
- "anstream 0.6.21",
+ "anstream",
  "anstyle",
  "clap_lex",
  "strsim",
 ]
 
+[[package]]
+name = "clap_derive"
+version = "4.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "clap_lex"
 version = "1.0.0"
@@ -530,6 +529,19 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
 
+[[package]]
+name = "console"
+version = "0.15.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
+dependencies = [
+ "encode_unicode",
+ "libc",
+ "once_cell",
+ "unicode-width",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "constant_time_eq"
 version = "0.4.2"
@@ -609,6 +621,19 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "cpu-pin"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb5bc1be026f7f066429ce0611e23a341db91b91ed701de4a1432d01d3ed1105"
+dependencies = [
+ "libc",
+ "mach2 0.6.0",
+ "once_cell",
+ "tokio",
+ "windows",
+]
+
 [[package]]
 name = "cpufeatures"
 version = "0.2.17"
@@ -769,7 +794,7 @@ dependencies = [
  "libc",
  "option-ext",
  "redox_users",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -815,6 +840,12 @@ dependencies = [
  "zerocopy",
 ]
 
+[[package]]
+name = "encode_unicode"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
+
 [[package]]
 name = "endian-type"
 version = "0.1.2"
@@ -837,7 +868,7 @@ version = "0.11.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a"
 dependencies = [
- "anstream 1.0.0",
+ "anstream",
  "anstyle",
  "env_filter",
  "jiff",
@@ -857,7 +888,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
 dependencies = [
  "libc",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -1190,7 +1221,7 @@ dependencies = [
  "gobject-sys",
  "libc",
  "system-deps",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -1359,6 +1390,12 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
 [[package]]
 name = "http"
 version = "1.4.0"
@@ -1456,6 +1493,23 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "hyperlight-ci"
+version = "0.0.0"
+dependencies = [
+ "ansi-replace",
+ "anyhow",
+ "clap",
+ "cpu-pin",
+ "indicatif",
+ "num_cpus",
+ "regex",
+ "serde",
+ "serde_json",
+ "simple-pool",
+ "tokio",
+]
+
 [[package]]
 name = "hyperlight-common"
 version = "0.15.0"
@@ -1626,7 +1680,7 @@ dependencies = [
  "vmm-sys-util",
  "windows",
  "windows-result",
- "windows-sys",
+ "windows-sys 0.61.2",
  "windows-version",
 ]
 
@@ -1820,6 +1874,19 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "indicatif"
+version = "0.17.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
+dependencies = [
+ "console",
+ "number_prefix",
+ "portable-atomic",
+ "unicode-width",
+ "web-time",
+]
+
 [[package]]
 name = "ipnet"
 version = "2.11.0"
@@ -2105,6 +2172,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "mach2"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dae608c151f68243f2b000364e1f7b186d9c29845f7d2d85bd31b9ad77ad552b"
+
 [[package]]
 name = "macho-unwind-info"
 version = "0.5.0"
@@ -2216,7 +2289,7 @@ checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
 dependencies = [
  "libc",
  "wasi",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -2280,7 +2353,7 @@ version = "0.50.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -2292,6 +2365,16 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "num_cpus"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
 [[package]]
 name = "num_enum"
 version = "0.7.5"
@@ -2313,6 +2396,12 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "number_prefix"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
+
 [[package]]
 name = "object"
 version = "0.39.0"
@@ -2324,6 +2413,12 @@ dependencies = [
  "ruzstd",
 ]
 
+[[package]]
+name = "object-id"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c587bd1cd63959a8520442afc0f92a875d83deea175c7b48dd9f104a2c5070a9"
+
 [[package]]
 name = "once_cell"
 version = "1.21.4"
@@ -2785,7 +2880,7 @@ dependencies = [
  "bindgen 0.70.1",
  "libc",
  "libproc",
- "mach2",
+ "mach2 0.4.3",
  "winapi",
 ]
 
@@ -3152,7 +3247,7 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -3363,6 +3458,16 @@ version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
 
+[[package]]
+name = "simple-pool"
+version = "0.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "073382259dbeb56c3eaab04a1d330459f6490d1e518b2a8ee441c8bd00dbc092"
+dependencies = [
+ "object-id",
+ "parking_lot",
+]
+
 [[package]]
 name = "sketches-ddsketch"
 version = "0.3.0"
@@ -3388,7 +3493,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
 dependencies = [
  "libc",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -3406,6 +3511,15 @@ dependencies = [
  "lock_api",
 ]
 
+[[package]]
+name = "stable-pattern"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4564168c00635f88eaed410d5efa8131afa8d8699a612c80c455a0ba05c21045"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "stable_deref_trait"
 version = "1.2.1"
@@ -3478,7 +3592,7 @@ dependencies = [
  "getrandom 0.4.1",
  "once_cell",
  "rustix",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -3553,7 +3667,7 @@ dependencies = [
  "signal-hook-registry",
  "socket2",
  "tokio-macros",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -3913,6 +4027,12 @@ version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
 
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
 [[package]]
 name = "unicode-xid"
 version = "0.2.6"
@@ -4195,7 +4315,7 @@ version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -4305,6 +4425,15 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
 [[package]]
 name = "windows-sys"
 version = "0.61.2"
@@ -4314,6 +4443,22 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
 [[package]]
 name = "windows-threading"
 version = "0.2.1"
@@ -4332,6 +4477,54 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
 [[package]]
 name = "winnow"
 version = "0.7.14"
diff --git a/Cargo.toml b/Cargo.toml
index e9b69f40d..6650dcbf9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,7 @@ default-members = [
     "src/hyperlight_testing",
 ]
 members = [
+    "src/hyperlight_ci",
     "src/hyperlight_common",
     "src/hyperlight_guest",
     "src/hyperlight_host",
diff --git a/Justfile b/Justfile
index 401897425..9b6846289 100644
--- a/Justfile
+++ b/Justfile
@@ -177,7 +177,7 @@ run-examples-like-ci config=default-target hypervisor="kvm":
 
 benchmarks-like-ci config=default-target hypervisor="kvm":
     @# Run benchmarks
-    {{ if config == "release" { "just bench-ci main" } else { "" } }}
+    {{ if config == "release" { "just bench-ci" } else { "" } }}
 
 fuzz-like-ci target config=default-target hypervisor="kvm":
     @# Run Fuzzing
@@ -400,13 +400,12 @@ bench-download os hypervisor cpu tag="":
     tar -zxvf target/benchmarks_{{ os }}_{{ hypervisor }}_{{ cpu }}.tar.gz -C target/criterion/ --strip-components=1
 
 # Warning: compares to and then OVERWRITES the given baseline
-bench-ci baseline features="":
-    @# Benchmarks are always run with release builds for meaningful results
-    cargo bench --profile=release {{ if features =="" {''} else { "--features " + features } }} -- --verbose --save-baseline {{ baseline }}
+bench-ci features="":
+    cargo ci bench --no-progress {{ if features == "" {''} else { "--features " + features } }}
 
 bench features="":
     @# Benchmarks are always run with release builds for meaningful results
-    cargo bench --profile=release {{ if features =="" {''} else { "--features " + features } }} -- --verbose
+    cargo ci bench {{ if features == "" {''} else { "--features " + features } }}
 
 ###############
 ### FUZZING ###
diff --git a/docs/benchmarking-hyperlight.md b/docs/benchmarking-hyperlight.md
index dd28c6ea8..2fb931011 100644
--- a/docs/benchmarking-hyperlight.md
+++ b/docs/benchmarking-hyperlight.md
@@ -72,6 +72,6 @@ Found 1 outliers among 100 measurements (1.00%)
 
 ## Running benchmarks locally
 
-Use `just bench` to run benchmarks with release builds (the only supported configuration). Comparing local benchmark results to github-saved benchmarks doesn't make much sense, since you'd be using different hardware, but you can use `just bench-download os hypervisor [tag] ` to download and extract the GitHub release benchmarks to the correct place folder. You can then run `just bench-ci main` to compare to (and overwrite) the previous release benchmarks. Note that `main` is the name of the baselines stored in GitHub.
+Use `just bench` to run benchmarks with release builds (the only supported configuration). Comparing local benchmark results to github-saved benchmarks doesn't make much sense, since you'd be using different hardware, but you can use `just bench-download os hypervisor [tag] ` to download and extract the GitHub release benchmarks to the correct place folder. You can then run `just bench-ci` to compare to (and overwrite) the previous release benchmarks. The name of the baselines stored in GitHub is `base`.
 
 **Important**: The `just bench` command uses release builds by default to ensure meaningful performance measurements. For profiling purposes, you can compile benchmarks with debug symbols by running `cargo bench` directly.
diff --git a/src/hyperlight_ci/Cargo.toml b/src/hyperlight_ci/Cargo.toml
new file mode 100644
index 000000000..fa5555a9f
--- /dev/null
+++ b/src/hyperlight_ci/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "hyperlight-ci"
+edition = "2021"
+# fields intentionally not set, to avoid accidentally publishing this crate to crates.io
+description = """
+Hyperlight's CI and development tools.
+"""
+
+[lints]
+workspace = true
+
+[dependencies]
+anyhow = "1"
+clap = { version = "4.6.1", features = ["derive"] }
+indicatif = "0.17"
+num_cpus = "1"
+tokio = { version = "1", features = ["rt", "process", "io-util", "sync", "macros"] }
+ansi-replace = "0.1"
+regex = "1"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+simple-pool = "0.0.18"
+cpu-pin = { version = "0.1.3", features = ["tokio"] }
\ No newline at end of file
diff --git a/src/hyperlight_ci/src/bench/args.rs b/src/hyperlight_ci/src/bench/args.rs
new file mode 100644
index 000000000..3aa92a46f
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/args.rs
@@ -0,0 +1,52 @@
+use std::path::PathBuf;
+
+use clap::Args;
+
+use crate::bench::cpu::PerformanceCoresPool;
+
+/// Command-line arguments for the `bench` subcommand.
+#[derive(Args)]
+pub struct BenchArgs {
+    /// Filter benchmarks by name (substring match, or exact with --exact)
+    pub filter: Option<String>,
+
+    /// Match the filter exactly instead of as a substring
+    #[arg(long)]
+    pub exact: bool,
+
+    /// Pre-built benchmark binary to use (skip build step; can be specified multiple times)
+    #[arg(long)]
+    pub binary: Vec<PathBuf>,
+
+    /// Number of benchmarks to run in parallel (0 = all CPUs, default: 0)
+    #[arg(long, short, default_value_t = 0)]
+    pub jobs: usize,
+
+    /// Reduce output verbosity (repeatable: -q hides stderr, -qq hides everything)
+    #[arg(short, long, action = clap::ArgAction::Count)]
+    pub quiet: u8,
+
+    /// Disable progress bar (auto-detected: shown only on TTY)
+    #[arg(long)]
+    pub no_progress: bool,
+
+    /// Additional features to pass to cargo criterion
+    #[arg(short = 'F', long, default_value = "")]
+    pub features: String,
+}
+
+impl BenchArgs {
+    /// Determine the maximum number of parallel benchmark jobs.
+    pub fn max_jobs(&self) -> usize {
+        match self.jobs {
+            0 => PerformanceCoresPool::num_cores(),
+            j => j,
+        }
+    }
+
+    /// Whether progress bars should be displayed.
+    pub fn use_progress(&self) -> bool {
+        use std::io::IsTerminal;
+        !self.no_progress && std::io::stderr().is_terminal() && self.quiet < 2
+    }
+}
diff --git a/src/hyperlight_ci/src/bench/cpu.rs b/src/hyperlight_ci/src/bench/cpu.rs
new file mode 100644
index 000000000..5ae6f7a9a
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/cpu.rs
@@ -0,0 +1,71 @@
+//! CPU core discovery and pool management for benchmark isolation.
+//!
+//! Discovers performance cores (P-cores) on the system and provides a pool
+//! that allows benchmarks to be pinned to specific cores, avoiding interference
+//! from concurrent workloads.
+
+use std::sync::{Arc, LazyLock};
+
+use anyhow::{Result, bail};
+use cpu_pin::CpuInfo;
+use simple_pool::{ResourcePool, ResourcePoolGuard};
+
+/// Lazily discovered list of performance cores on the system.
+///
+/// Filters for cores that are marked as `Performance` type and have the maximum
+/// number of logical processors (i.e., full-featured P-cores with hyperthreading,
+/// excluding any asymmetric E-cores).
+static PERFORMANCE_CORES: LazyLock<Vec<&'static CpuInfo>> = LazyLock::new(|| {
+    cpu_pin::topology()
+        .expect("failed to detect CPU topology")
+        .best_cores()
+});
+
+/// A pool of performance cores that can be claimed by benchmark tasks.
+///
+/// Each benchmark acquires a core from the pool before running, ensuring
+/// no two benchmarks share the same physical core simultaneously.
+#[derive(Clone)]
+pub struct PerformanceCoresPool {
+    pool: Arc<ResourcePool<CpuInfo>>,
+}
+
+impl PerformanceCoresPool {
+    /// Returns the total number of performance cores available on the system.
+    pub fn num_cores() -> usize {
+        PERFORMANCE_CORES.len()
+    }
+
+    /// Creates a new pool with up to `size` performance cores.
+    ///
+    /// Returns an error if `size` exceeds the number of available performance cores.
+    pub fn new(size: usize) -> Result<Self> {
+        if size > PERFORMANCE_CORES.len() {
+            bail!(
+                "Requested more performance cores than available: requested {size}, available {}",
+                PERFORMANCE_CORES.len()
+            );
+        }
+
+        let pool = Arc::new(ResourcePool::new());
+        for core in PERFORMANCE_CORES.iter().take(size) {
+            pool.append((*core).clone());
+        }
+
+        Ok(Self { pool })
+    }
+
+    /// Acquires a performance core from the pool, waiting if none are available.
+    ///
+    /// The core is returned to the pool when the guard is dropped.
+    pub async fn get(&self) -> ResourcePoolGuard<CpuInfo> {
+        self.pool.get().await
+    }
+}
+
+impl Default for PerformanceCoresPool {
+    /// Creates a pool containing all available performance cores.
+    fn default() -> Self {
+        Self::new(Self::num_cores()).unwrap()
+    }
+}
diff --git a/src/hyperlight_ci/src/bench/discovery.rs b/src/hyperlight_ci/src/bench/discovery.rs
new file mode 100644
index 000000000..147579d28
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/discovery.rs
@@ -0,0 +1,125 @@
+use std::path::{Path, PathBuf};
+
+use anyhow::{Context, Result, bail};
+use tokio::process::Command;
+use std::process::Stdio;
+
+/// Discovers available benchmarks by querying the benchmark binary.
+pub struct BenchmarkDiscovery {
+    features: String,
+    filter: Option<String>,
+    exact: bool,
+}
+
+impl BenchmarkDiscovery {
+    /// Create a new discovery instance with the given parameters.
+    pub fn new(features: &str, filter: Option<&str>, exact: bool) -> Self {
+        Self {
+            features: features.to_string(),
+            filter: filter.map(|s| s.to_string()),
+            exact,
+        }
+    }
+
+    /// Build all benchmark binaries and return their paths.
+    pub async fn build(&self) -> Result<Vec<PathBuf>> {
+        let mut cmd = Command::new("cargo");
+        cmd.args([
+            "build",
+            "--release",
+            "--benches",
+            "--message-format=json",
+        ]);
+        if !self.features.is_empty() {
+            cmd.args(["--features", &self.features]);
+        }
+        cmd.stdout(Stdio::piped());
+        cmd.stderr(Stdio::piped());
+
+        let output = cmd
+            .output()
+            .await
+            .context("Failed to run cargo build for benchmarks")?;
+
+        if !output.status.success() {
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            bail!("Failed to build benchmarks:\n{stderr}");
+        }
+
+        let stdout = String::from_utf8_lossy(&output.stdout);
+        let mut binaries = Vec::new();
+
+        // Parse cargo's JSON output to find all benchmark binary paths
+        for line in stdout.lines() {
+            let Ok(msg) = serde_json::from_str::<serde_json::Value>(line) else {
+                continue;
+            };
+            if msg.get("reason").and_then(|r| r.as_str()) != Some("compiler-artifact") {
+                continue;
+            }
+            let is_bench = msg
+                .get("target")
+                .and_then(|t| t.get("kind"))
+                .and_then(|k| k.as_array())
+                .is_some_and(|kinds| kinds.iter().any(|k| k.as_str() == Some("bench")));
+            if !is_bench {
+                continue;
+            }
+            if let Some(filenames) = msg.get("filenames").and_then(|f| f.as_array()) {
+                for f in filenames {
+                    if let Some(path) = f.as_str() {
+                        // Skip non-executable artifacts:
+                        //   .d    = dep-info files (all platforms)
+                        //   .pdb  = debug symbols (Windows)
+                        //   .dSYM = debug symbol bundles (macOS)
+                        //   .dwp  = DWARF packages (Linux, split-debuginfo)
+                        //   .lib  = import libraries (Windows)
+                        //   .exp  = export files (Windows)
+                        let dominated = [".d", ".pdb", ".dSYM", ".dwp", ".lib", ".exp"];
+                        if dominated.iter().any(|ext| path.ends_with(ext)) {
+                            continue;
+                        }
+                        binaries.push(PathBuf::from(path));
+                    }
+                }
+            }
+        }
+
+        if binaries.is_empty() {
+            bail!("No benchmark binaries found in cargo build output");
+        }
+
+        Ok(binaries)
+    }
+
+    /// List all benchmark names matching the configured filter.
+    pub async fn list(&self, binary: &Path) -> Result<Vec<String>> {
+        let mut cmd = Command::new(binary);
+        cmd.args(["--bench", "--list"]);
+        if self.exact {
+            cmd.arg("--exact");
+        }
+        if let Some(filter) = &self.filter {
+            cmd.arg(filter);
+        }
+        cmd.stdout(Stdio::piped());
+        cmd.stderr(Stdio::null());
+
+        let output = cmd
+            .output()
+            .await
+            .with_context(|| format!("Failed to run {} --bench --list", binary.display()))?;
+        let stdout = String::from_utf8_lossy(&output.stdout);
+
+        let benches: Vec<String> = stdout
+            .lines()
+            .filter_map(|line| {
+                let line = line.trim();
+                let line = line.strip_suffix(": benchmark")?;
+                Some(line.to_string())
+            })
+            .collect();
+
+        Ok(benches)
+    }
+}
diff --git a/src/hyperlight_ci/src/bench/mod.rs b/src/hyperlight_ci/src/bench/mod.rs
new file mode 100644
index 000000000..a23914898
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/mod.rs
@@ -0,0 +1,67 @@
+//! The `bench` subcommand: discovers, runs, and reports on criterion benchmarks
+//! using the benchmark binary directly.
+
+mod args;
+mod discovery;
+mod output;
+mod process;
+mod progress;
+mod runner;
+mod cpu;
+
+pub use args::BenchArgs;
+
+use anyhow::{Context, Result};
+
+use self::discovery::BenchmarkDiscovery;
+use self::runner::BenchRunner;
+
+/// Entry point for the bench subcommand. Builds a single-threaded tokio runtime
+/// and delegates to the async implementation.
+pub fn run(args: BenchArgs) -> Result<()> {
+    tokio::runtime::Builder::new_current_thread()
+        .enable_all()
+        .build()
+        .context("Failed to build tokio runtime")?
+        .block_on(run_async(args))
+}
+
+async fn run_async(args: BenchArgs) -> Result<()> {
+    let discovery = BenchmarkDiscovery::new(&args.features, args.filter.as_deref(), args.exact);
+
+    let binaries = if args.binary.is_empty() {
+        if args.quiet < 2 {
+            eprintln!("Building benchmarks ...");
+        }
+        discovery.build().await?
+    } else {
+        args.binary.clone()
+    };
+
+    let mut benches = Vec::new();
+    for binary in &binaries {
+        for name in discovery.list(binary).await? {
+            benches.push((binary.clone(), name));
+        }
+    }
+
+    if benches.is_empty() {
+        anyhow::bail!("No benchmarks found");
+    }
+
+    let max_jobs = args.max_jobs();
+    let use_progress = args.use_progress();
+
+    if args.quiet < 2 {
+        eprintln!(
+            "Running {} benchmark(s) with parallelism {}",
+            benches.len(),
+            max_jobs
+        );
+    }
+
+    let runner = BenchRunner::new(max_jobs, args.quiet, use_progress);
+    runner.run(&benches).await?;
+
+    Ok(())
+}
diff --git a/src/hyperlight_ci/src/bench/output.rs b/src/hyperlight_ci/src/bench/output.rs
new file mode 100644
index 000000000..636057c86
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/output.rs
@@ -0,0 +1,36 @@
+use std::fmt::Write;
+use std::ops::Range;
+
+use ansi_replace::AnsiExt as _;
+use ansi_replace::replacer::Writable;
+
+/// Returns true if an output line is build noise that should be suppressed.
+pub fn is_noisy_line(line: &str) -> bool {
+    line.contains("waiting for file lock on")
+        || line.contains("Gnuplot not found")
+        || line.contains("`bench` profile [optimized]")
+}
+
+/// Strip the bench name from an output line.
+///
+/// Strategy:
+/// - If the line starts with the bench name, replace it with spaces to preserve alignment
+/// - Any other appearance of the bench name and surrounding whitespace are removed entirely
+/// - ANSI codes are preserved in all cases
+pub fn strip_bench_prefix(line: &str, bench: &str) -> String {
+    let escaped = regex::escape(bench);
+    let pattern = regex::Regex::new(&format!(r" ?{escaped}")).unwrap();
+
+    let result = line.ansi_replace(&pattern, |m: &str, i: Range<usize>, dst: &mut Writable| {
+        if i.start == 0 && m == bench {
+            write!(dst, "{:n$}", " ", n = m.len())?;
+        }
+        Ok(())
+    });
+
+    if result.ansi_strip().trim().is_empty() {
+        return String::new();
+    }
+
+    result
+}
diff --git a/src/hyperlight_ci/src/bench/process.rs b/src/hyperlight_ci/src/bench/process.rs
new file mode 100644
index 000000000..f2387d41f
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/process.rs
@@ -0,0 +1,77 @@
+//! Spawns the benchmark binary for a single benchmark and streams its output.
+
+use std::ops::Deref;
+use std::path::Path;
+use std::process::Stdio;
+
+use anyhow::{Context, Result, bail};
+use cpu_pin::{CpuInfo, PinnedCommand as _};
+use tokio::io::{AsyncBufReadExt, BufReader};
+use tokio::process::Command;
+use tokio::sync::mpsc;
+
+/// Output of a completed benchmark process.
+pub struct ProcessOutput {
+    pub output_lines: Vec<String>,
+}
+
+/// Spawns the benchmark binary for a single benchmark.
+///
+/// Streams output lines through `output_tx` as they arrive (for live progress updates),
+/// and returns the collected output when the process exits.
+pub async fn run(
+    bench: &str,
+    binary: &Path,
+    core: impl Deref<Target = CpuInfo>,
+    output_tx: &mpsc::UnboundedSender<String>,
+) -> Result<ProcessOutput> {
+    let mut cmd = Command::new(binary);
+    cmd.args(["--bench", "--color=always", "--noplot", "--exact"]);
+    cmd.arg(bench);
+    cmd.stdout(Stdio::piped());
+    cmd.stderr(Stdio::piped());
+
+    let core_id = core.logical_cpus.first().unwrap();
+
+    let mut child = cmd
+        .spawn_pinned(*core_id)
+        .with_context(|| format!("Failed to spawn benchmark binary: {}", binary.display()))?;
+
+    let stdout = child.stdout.take().unwrap();
+    let stderr = child.stderr.take().unwrap();
+    let mut reader_stdout = BufReader::new(stdout).lines();
+    let mut reader_stderr = BufReader::new(stderr).lines();
+    let mut output_lines = Vec::new();
+
+    // combine the stream of both stdout and stderr lines
+    // do not exit until both streams have been closed
+    loop {
+        tokio::select! {
+            line = reader_stdout.next_line() => {
+                let Some(line) = line.context("Failed to read stdout")? else { break };
+                let _ = output_tx.send(line.clone());
+                output_lines.push(line);
+            }
+            line = reader_stderr.next_line() => {
+                let Some(line) = line.context("Failed to read stderr")? else { break };
+                let _ = output_tx.send(line.clone());
+                output_lines.push(line);
+            }
+        }
+    }
+
+    let status = child
+        .wait()
+        .await
+        .context("Failed to wait for benchmark binary")?;
+
+    if !status.success() {
+        bail!(
+            "benchmark binary exited with status {} for benchmark '{}'",
+            status,
+            bench
+        );
+    }
+
+    Ok(ProcessOutput { output_lines })
+}
diff --git a/src/hyperlight_ci/src/bench/progress.rs b/src/hyperlight_ci/src/bench/progress.rs
new file mode 100644
index 000000000..137e7181f
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/progress.rs
@@ -0,0 +1,142 @@
+//! Progress bar and spinner management for benchmark output.
+
+use std::collections::{HashMap, HashSet};
+
+use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
+
+use super::output::{is_noisy_line, strip_bench_prefix};
+
+/// Manages progress bars for a set of benchmarks.
+pub struct ProgressTracker {
+    multi: MultiProgress,
+    overall: ProgressBar,
+    spinners: HashMap<String, ProgressBar>,
+    /// Benchmarks that have been registered but not yet started (no spinner visible).
+    pending: HashSet<String>,
+    quiet_level: u8,
+    enabled: bool,
+}
+
+impl ProgressTracker {
+    /// Create a new progress tracker.
+    ///
+    /// If `enabled` is false, all operations become no-ops (hidden bars, no output).
+    pub fn new(total: usize, quiet_level: u8, enabled: bool) -> Self {
+        let multi = MultiProgress::new();
+        let overall = if enabled {
+            let bar = multi.add(ProgressBar::new(total as u64));
+            bar.set_style(
+                ProgressStyle::with_template("{prefix} [{bar:40.cyan/blue}] {pos}/{len} ({eta})")
+                    .unwrap()
+                    .progress_chars("━━─"),
+            );
+            bar.set_prefix("Benchmarks");
+            bar
+        } else {
+            ProgressBar::hidden()
+        };
+
+        Self {
+            multi,
+            overall,
+            spinners: HashMap::new(),
+            pending: HashSet::new(),
+            quiet_level,
+            enabled,
+        }
+    }
+
+    /// Register a benchmark for tracking (spinner stays hidden until it starts running).
+    pub fn add_spinner(&mut self, bench: &str) {
+        if !self.enabled {
+            return;
+        }
+        self.pending.insert(bench.to_string());
+    }
+
+    /// Update the spinner for a benchmark with an output line.
+    ///
+    /// On the first update, the spinner is created and becomes visible.
+    /// Filters noisy lines and strips the benchmark prefix before displaying.
+    pub fn update_spinner(&mut self, bench: &str, line: &str) {
+        if is_noisy_line(line) {
+            return;
+        }
+        // If this is a pending benchmark, create and show its spinner now
+        if self.pending.remove(bench) {
+            let bar = self.multi.insert_before(&self.overall, ProgressBar::new_spinner());
+            bar.set_style(
+                ProgressStyle::with_template("  {spinner:.green} {msg}")
+                    .unwrap()
+                    .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]),
+            );
+            bar.enable_steady_tick(std::time::Duration::from_millis(100));
+            self.spinners.insert(bench.to_string(), bar);
+        }
+        let Some(spinner) = self.spinners.get(bench) else { return };
+        let display = strip_bench_prefix(line, bench);
+        if !display.is_empty() {
+            spinner.set_message(format!("\x1b[1;32m{bench}\x1b[0m: {display}"));
+        }
+    }
+
+    /// Finish and remove the spinner for a benchmark.
+    pub fn finish_spinner(&mut self, bench: &str) {
+        if let Some(bar) = self.spinners.remove(bench) {
+            bar.finish_and_clear();
+            self.multi.remove(&bar);
+        }
+    }
+
+    /// Advance the overall progress bar by one.
+    pub fn advance(&self, position: u64) {
+        self.overall.set_position(position);
+    }
+
+    /// Print a message respecting the progress system and quiet level.
+    pub fn println(&self, msg: &str) {
+        if self.enabled {
+            let _ = self.multi.println(msg);
+        } else if self.quiet_level < 1 {
+            eprintln!("{msg}");
+        }
+    }
+
+    /// Print the completion summary for a benchmark.
+    ///
+    /// At quiet_level 0, also prints filtered output lines.
+    pub fn print_completion(
+        &self,
+        done_count: usize,
+        total: usize,
+        bench: &str,
+        status: &str,
+        output_lines: &[String],
+        error: Option<&anyhow::Error>,
+    ) {
+        self.println(&format!(
+            "[{done_count}/{total}] \x1b[1;32m{bench}\x1b[0m ... {status}"
+        ));
+
+        if self.quiet_level == 0 {
+            for line in output_lines {
+                if !is_noisy_line(line) {
+                    let line = strip_bench_prefix(line, bench);
+                    if !line.is_empty() && !line.starts_with("Benchmarking") {
+                        self.println(&line);
+                    }
+                }
+            }
+            self.println("");
+        }
+
+        if let Some(e) = error {
+            self.println(&format!("  error: {e}"));
+        }
+    }
+
+    /// Finish the overall progress bar.
+    pub fn finish(&self) {
+        self.overall.finish_and_clear();
+    }
+}
diff --git a/src/hyperlight_ci/src/bench/runner.rs b/src/hyperlight_ci/src/bench/runner.rs
new file mode 100644
index 000000000..97a945acc
--- /dev/null
+++ b/src/hyperlight_ci/src/bench/runner.rs
@@ -0,0 +1,188 @@
+//! Orchestrates parallel benchmark execution, wiring together process spawning
+//! and progress reporting.
+
+use std::ops::Deref;
+use std::path::{Path, PathBuf};
+
+use anyhow::{Result, bail};
+use cpu_pin::CpuInfo;
+
+use super::cpu::PerformanceCoresPool;
+use super::process::{self, ProcessOutput};
+use super::progress::ProgressTracker;
+
+/// Events sent from benchmark tasks to the orchestration loop.
+enum BenchEvent {
+    /// An output line was produced by the given benchmark.
+    OutputLine { bench: String, line: String },
+    /// The benchmark has completed.
+    Done(BenchResult),
+}
+
+/// Result of a single benchmark run, combining identity with output.
+struct BenchResult {
+    bench: String,
+    output_lines: Vec<String>,
+    success: Result<()>,
+}
+
+impl BenchResult {
+    fn status(&self) -> &str {
+        if self.success.is_ok() {
+            "done"
+        } else {
+            "FAILED"
+        }
+    }
+}
+
+/// Orchestrates parallel benchmark execution with progress reporting.
+pub struct BenchRunner {
+    max_jobs: usize,
+    quiet_level: u8,
+    use_progress: bool,
+}
+
+impl BenchRunner {
+    /// Create a new runner with the given configuration.
+    pub fn new(max_jobs: usize, quiet_level: u8, use_progress: bool) -> Self {
+        Self {
+            max_jobs,
+            quiet_level,
+            use_progress,
+        }
+    }
+
+    /// Run all benchmarks in parallel.
+    ///
+    /// Each entry is a (binary_path, benchmark_name) pair.
+    ///
+    /// Quiet levels:
+    /// - 0: show progress, completion headers, and per-benchmark output
+    /// - 1: show progress and completion headers only (no output details)
+    /// - 2+: fully silent (no progress, no output)
+    pub async fn run(&self, benches: &[(PathBuf, String)]) -> Result<()> {
+        let total = benches.len();
+        let mut tracker = ProgressTracker::new(total, self.quiet_level, self.use_progress);
+
+        if self.max_jobs > PerformanceCoresPool::num_cores() {
+            bail!(
+                "Requested number of jobs {} exceeds available performance cores {}, use --jobs=0 or --quick to use all available performance cores.",
+                self.max_jobs,
+                PerformanceCoresPool::num_cores(),
+            );
+        }
+
+        let pool = PerformanceCoresPool::new(self.max_jobs)?;
+        let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<BenchEvent>();
+
+        // Spawn all benchmarks (they'll wait on the semaphore internally)
+        for (binary, bench) in benches {
+            tracker.add_spinner(bench);
+
+            let bench = bench.clone();
+            let binary = binary.clone();
+            let tx = tx.clone();
+            let pool = pool.clone();
+
+            tokio::spawn(async move {
+                let core = pool.get().await;
+                Self::run_one(&bench, &binary, core, &tx).await;
+            });
+        }
+
+        // Drop our sender so rx closes when all tasks finish
+        drop(tx);
+
+        // Process events as they arrive
+        let mut failed = Vec::new();
+        let mut done_count = 0;
+
+        while let Some(event) = rx.recv().await {
+            match event {
+                BenchEvent::OutputLine { bench, line } => {
+                    tracker.update_spinner(&bench, &line);
+                }
+                BenchEvent::Done(result) => {
+                    done_count += 1;
+                    tracker.finish_spinner(&result.bench);
+                    tracker.advance(done_count as u64);
+
+                    let error = result.success.as_ref().err();
+                    tracker.print_completion(
+                        done_count,
+                        total,
+                        &result.bench,
+                        result.status(),
+                        &result.output_lines,
+                        error,
+                    );
+
+                    if result.success.is_err() {
+                        failed.push(result.bench);
+                    }
+                }
+            }
+        }
+
+        tracker.finish();
+
+        if !failed.is_empty() {
+            anyhow::bail!(
+                "{} benchmark(s) failed: {}",
+                failed.len(),
+                failed.join(", ")
+            );
+        }
+
+        Ok(())
+    }
+
+    /// Run a single benchmark, streaming output events and sending the final result.
+    async fn run_one(
+        bench: &str,
+        binary: &Path,
+        core: impl Deref<Target = CpuInfo>,
+        event_tx: &tokio::sync::mpsc::UnboundedSender<BenchEvent>,
+    ) {
+        // Create a channel for output lines from the process
+        let (output_tx, mut output_rx) = tokio::sync::mpsc::unbounded_channel::<String>();
+        let bench_name = bench.to_string();
+        let event_tx_clone = event_tx.clone();
+
+        // Forward output lines as events
+        let forwarder = tokio::spawn(async move {
+            while let Some(line) = output_rx.recv().await {
+                let _ = event_tx_clone.send(BenchEvent::OutputLine {
+                    bench: bench_name.clone(),
+                    line,
+                });
+            }
+        });
+
+        // Signal that this benchmark is starting
+        let _ = event_tx.send(BenchEvent::OutputLine {
+            bench: bench.to_string(),
+            line: "Starting ...".to_string(),
+        });
+
+        let result = match process::run(bench, binary, core, &output_tx).await {
+            Ok(ProcessOutput { output_lines }) => BenchResult {
+                bench: bench.to_string(),
+                output_lines,
+                success: Ok(()),
+            },
+            Err(e) => BenchResult {
+                bench: bench.to_string(),
+                output_lines: vec![],
+                success: Err(e),
+            },
+        };
+
+        // Ensure all output forwarding completes before sending Done
+        drop(output_tx);
+        let _ = forwarder.await;
+
+        let _ = event_tx.send(BenchEvent::Done(result));
+    }
+}
diff --git a/src/hyperlight_ci/src/bench_report/mod.rs b/src/hyperlight_ci/src/bench_report/mod.rs
new file mode 100644
index 000000000..619e61319
--- /dev/null
+++ b/src/hyperlight_ci/src/bench_report/mod.rs
@@ -0,0 +1,144 @@
+//! The `bench-report` subcommand: generates a markdown table from existing
+//! criterion benchmark results in `target/criterion/`.
+
+mod table;
+
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::{Context, Result};
+use clap::Args;
+
+/// Command-line arguments for the `bench-report` subcommand.
+#[derive(Args)]
+pub struct BenchReportArgs {
+    /// Filter benchmarks by name (substring match, or exact with --exact)
+    pub filter: Option<String>,
+
+    /// Match the filter exactly instead of as a substring
+    #[arg(long)]
+    pub exact: bool,
+
+    /// Benchmark binary to list benchmarks from (can be specified multiple times).
+    /// When provided, only benchmarks available in these binaries are included.
+    #[arg(long)]
+    pub binary: Vec<PathBuf>,
+
+    /// Path to the criterion output directory
+    #[arg(long, default_value = "target/criterion")]
+    pub criterion_dir: PathBuf,
+
+    /// Output file path (default: stdout)
+    #[arg(short, long)]
+    pub output: Option<PathBuf>,
+}
+
+/// Entry point for the bench-report subcommand.
+pub fn run(args: BenchReportArgs) -> Result<()> {
+    let allowlist = build_allowlist(&args)?;
+    let allowlist_ref = allowlist.as_deref();
+
+    let markdown = table::render(&args.criterion_dir, allowlist_ref)?;
+
+    if let Some(path) = &args.output {
+        std::fs::write(path, &markdown)?;
+    } else {
+        print!("{markdown}");
+    }
+
+    Ok(())
+}
+
+/// Builds an allowlist of benchmark full_ids by querying binaries and applying the filter.
+///
+/// - If `--binary` is specified, lists benchmarks from each binary.
+/// - If a text filter is specified, applies substring (or exact) matching.
+/// - If neither is specified, returns `None` (include everything).
+fn build_allowlist(args: &BenchReportArgs) -> Result<Option<Vec<String>>> {
+    let mut names: Option<Vec<String>> = None;
+
+    if !args.binary.is_empty() {
+        let mut list = Vec::new();
+        for binary in &args.binary {
+            let output = Command::new(binary)
+                .args(["--bench", "--list"])
+                .output()
+                .with_context(|| format!("Failed to run {} --bench --list", binary.display()))?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            for line in stdout.lines() {
+                let line = line.trim();
+                if let Some(name) = line.strip_suffix(": benchmark") {
+                    list.push(name.to_string());
+                }
+            }
+        }
+        names = Some(list);
+    }
+
+    if let Some(ref filter) = args.filter {
+        let base = names.take();
+        let iter: Box<dyn Iterator<Item = String>> = match base {
+            Some(v) => Box::new(v.into_iter()),
+            None => {
+                // No binaries specified; discover all benchmarks from criterion dir
+                let all = discover_all_ids(&args.criterion_dir)?;
+                Box::new(all.into_iter())
+            }
+        };
+
+        let filtered: Vec<String> = if args.exact {
+            iter.filter(|id| id == filter).collect()
+        } else {
+            iter.filter(|id| id.contains(filter.as_str())).collect()
+        };
+        names = Some(filtered);
+    }
+
+    Ok(names)
+}
+
+/// Discovers all benchmark full_ids from the criterion directory.
+fn discover_all_ids(criterion_dir: &PathBuf) -> Result<Vec<String>> {
+    // Render with no filter to get all entries, then extract IDs
+    // We can reuse the walk logic by reading benchmark.json files
+    let mut ids = Vec::new();
+    walk_for_ids(criterion_dir, &mut ids)?;
+    Ok(ids)
+}
+
+/// Recursively walks directories looking for `new/benchmark.json` to extract full_ids.
+fn walk_for_ids(dir: &std::path::Path, ids: &mut Vec<String>) -> Result<()> {
+    let new_dir = dir.join("new");
+    let meta_path = new_dir.join("benchmark.json");
+    if meta_path.exists() {
+        let data = std::fs::read_to_string(&meta_path)
+            .with_context(|| format!("Failed to read {}", meta_path.display()))?;
+        #[derive(serde::Deserialize)]
+        struct Meta {
+            full_id: String,
+        }
+        let meta: Meta = serde_json::from_str(&data)
+            .with_context(|| format!("Failed to parse {}", meta_path.display()))?;
+        ids.push(meta.full_id);
+        return Ok(());
+    }
+
+    let read_dir = match std::fs::read_dir(dir) {
+        Ok(rd) => rd,
+        Err(_) => return Ok(()),
+    };
+
+    for entry in read_dir {
+        let entry = entry?;
+        if entry.file_type()?.is_dir() {
+            let name = entry.file_name();
+            let name_str = name.to_string_lossy();
+            if name_str == "reports" || name_str.starts_with('.') {
+                continue;
+            }
+            walk_for_ids(&entry.path(), ids)?;
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/hyperlight_ci/src/bench_report/table.rs b/src/hyperlight_ci/src/bench_report/table.rs
new file mode 100644
index 000000000..550b1b0a6
--- /dev/null
+++ b/src/hyperlight_ci/src/bench_report/table.rs
@@ -0,0 +1,329 @@
+//! Reads criterion benchmark results from `target/criterion/` JSON files and
+//! renders a markdown table similar to criterion-table.
+
+use std::collections::BTreeMap;
+use std::fmt::Write;
+use std::path::Path;
+
+use anyhow::{Context, Result};
+use serde::Deserialize;
+
+/// Metadata from a criterion `benchmark.json` file.
+#[derive(Deserialize)]
+struct BenchmarkMeta {
+    group_id: String,
+    function_id: String,
+    value_str: Option<String>,
+    throughput: Option<Throughput>,
+    full_id: String,
+}
+
+/// Throughput specification from `benchmark.json`.
+#[derive(Deserialize)]
+#[serde(rename_all = "PascalCase")]
+#[allow(dead_code)]
+enum Throughput {
+    Bytes(u64),
+    Elements(u64),
+}
+
+/// Statistical estimates from a criterion `estimates.json` file.
+#[derive(Deserialize)]
+struct Estimates {
+    slope: Option<Estimate>,
+    mean: Estimate,
+}
+
+/// A single statistical estimate with confidence interval.
+#[derive(Deserialize)]
+struct Estimate {
+    point_estimate: f64,
+}
+
+/// Change estimates from a criterion `change/estimates.json` file.
+#[derive(Deserialize)]
+struct ChangeEstimates {
+    mean: ChangeEstimate,
+}
+
+/// A single change estimate with point value.
+#[derive(Deserialize)]
+struct ChangeEstimate {
+    point_estimate: f64,
+}
+
+/// Parsed change information for a benchmark.
+struct ChangeInfo {
+    /// Relative change as a fraction (e.g., 0.05 = +5%, -0.02 = -2%).
+    point_estimate: f64,
+}
+
+/// A single benchmark entry with its metadata and timing.
+struct BenchEntry {
+    full_id: String,
+    group_id: String,
+    function_id: String,
+    value_str: Option<String>,
+    estimate_ns: f64,
+    #[allow(dead_code)]
+    throughput: Option<Throughput>,
+    /// Change vs the stored baseline, if available.
+    change: Option<ChangeInfo>,
+}
+
+impl BenchEntry {
+    /// Returns the column label for this benchmark (the function name).
+    ///
+    /// If `value_str` is set, the full `function_id` is the column.
+    /// Otherwise, if `function_id` contains "/", the part before the last "/" is the column.
+    fn column(&self) -> &str {
+        if self.value_str.is_some() {
+            return &self.function_id;
+        }
+        match self.function_id.rfind('/') {
+            Some(idx) => &self.function_id[..idx],
+            None => &self.function_id,
+        }
+    }
+
+    /// Returns the row label for this benchmark (the parameter/value).
+    ///
+    /// Uses `value_str` if set, otherwise the part after the last "/" in `function_id`.
+    fn row(&self) -> Option<&str> {
+        if let Some(ref v) = self.value_str {
+            return Some(v.as_str());
+        }
+        self.function_id.rfind('/').map(|idx| &self.function_id[idx + 1..])
+    }
+}
+
+/// Reads all benchmark results from the given criterion output directory
+/// and renders a markdown table.
+///
+/// If `allowlist` is provided, only benchmarks whose `full_id` is in the list are included.
+pub fn render(criterion_dir: &Path, allowlist: Option<&[String]>) -> Result<String> {
+    let mut entries = discover_benchmarks(criterion_dir)?;
+    if let Some(names) = allowlist {
+        entries.retain(|e| names.iter().any(|n| n == &e.full_id));
+    }
+    if entries.is_empty() {
+        anyhow::bail!("No benchmark results found in {}", criterion_dir.display());
+    }
+    Ok(format_table(&entries))
+}
+
+/// Discovers all benchmark entries by walking the criterion directory.
+fn discover_benchmarks(criterion_dir: &Path) -> Result<Vec<BenchEntry>> {
+    let mut entries = Vec::new();
+    walk_for_benchmarks(criterion_dir, &mut entries)?;
+    Ok(entries)
+}
+
+/// Recursively walks directories looking for `new/benchmark.json` files.
+fn walk_for_benchmarks(dir: &Path, entries: &mut Vec<BenchEntry>) -> Result<()> {
+    let new_dir = dir.join("new");
+    if new_dir.join("benchmark.json").exists() {
+        if let Some(entry) = read_benchmark_entry(&new_dir)? {
+            entries.push(entry);
+        }
+        return Ok(());
+    }
+
+    let read_dir = std::fs::read_dir(dir)
+        .with_context(|| format!("Failed to read directory {}", dir.display()))?;
+
+    for entry in read_dir {
+        let entry = entry?;
+        if entry.file_type()?.is_dir() {
+            let name = entry.file_name();
+            let name_str = name.to_string_lossy();
+            // Skip non-benchmark directories
+            if name_str == "reports" || name_str.starts_with('.') {
+                continue;
+            }
+            walk_for_benchmarks(&entry.path(), entries)?;
+        }
+    }
+
+    Ok(())
+}
+
+/// Reads a single benchmark entry from a `new/` directory.
+fn read_benchmark_entry(new_dir: &Path) -> Result<Option<BenchEntry>> {
+    let meta_path = new_dir.join("benchmark.json");
+    let estimates_path = new_dir.join("estimates.json");
+
+    if !estimates_path.exists() {
+        return Ok(None);
+    }
+
+    let meta: BenchmarkMeta = serde_json::from_str(
+        &std::fs::read_to_string(&meta_path)
+            .with_context(|| format!("Failed to read {}", meta_path.display()))?,
+    )
+    .with_context(|| format!("Failed to parse {}", meta_path.display()))?;
+
+    let estimates: Estimates = serde_json::from_str(
+        &std::fs::read_to_string(&estimates_path)
+            .with_context(|| format!("Failed to read {}", estimates_path.display()))?,
+    )
+    .with_context(|| format!("Failed to parse {}", estimates_path.display()))?;
+
+    // Prefer slope (linear regression) over mean, matching criterion's "typical" behavior
+    let estimate_ns = estimates
+        .slope
+        .as_ref()
+        .unwrap_or(&estimates.mean)
+        .point_estimate;
+
+    // Read change/estimates.json (sibling to new/) if it exists
+    let change_path = new_dir
+        .parent()
+        .map(|p| p.join("change").join("estimates.json"));
+    let change = change_path
+        .filter(|p| p.exists())
+        .and_then(|p| {
+            let data = std::fs::read_to_string(&p).ok()?;
+            let ce: ChangeEstimates = serde_json::from_str(&data).ok()?;
+            Some(ChangeInfo {
+                point_estimate: ce.mean.point_estimate,
+            })
+        });
+
+    Ok(Some(BenchEntry {
+        full_id: meta.full_id,
+        group_id: meta.group_id,
+        function_id: meta.function_id,
+        value_str: meta.value_str,
+        throughput: meta.throughput,
+        estimate_ns,
+        change,
+    }))
+}
+
+/// Formats all benchmark entries into a markdown string.
+fn format_table(entries: &[BenchEntry]) -> String {
+    // Group entries by group_id, preserving discovery order
+    let mut groups: BTreeMap<&str, Vec<&BenchEntry>> = BTreeMap::new();
+    for entry in entries {
+        groups.entry(&entry.group_id).or_default().push(entry);
+    }
+
+    let mut out = String::new();
+    writeln!(out, "# Benchmarks\n").unwrap();
+    writeln!(out, "## Benchmark Results\n").unwrap();
+
+    for (group_id, group_entries) in &groups {
+        writeln!(out, "### {group_id}\n").unwrap();
+        write_group_table(&mut out, group_entries);
+        writeln!(out).unwrap();
+    }
+
+    out
+}
+
+/// Writes a markdown table for a single benchmark group.
+fn write_group_table(out: &mut String, entries: &[&BenchEntry]) {
+    // Collect unique functions (columns) and values (rows), preserving order
+    let mut functions: Vec<&str> = Vec::new();
+    let mut values: Vec<Option<&str>> = Vec::new();
+
+    for entry in entries {
+        let col = entry.column();
+        if !functions.contains(&col) {
+            functions.push(col);
+        }
+        let row = entry.row();
+        if !values.contains(&row) {
+            values.push(row);
+        }
+    }
+
+    // Build a lookup: (column, row) -> &BenchEntry
+    let mut lookup: BTreeMap<(&str, Option<&str>), &BenchEntry> = BTreeMap::new();
+    for entry in entries {
+        lookup.insert((entry.column(), entry.row()), entry);
+    }
+
+    // Header row
+    write!(out, "|").unwrap();
+    // Row label column (empty header)
+    write!(out, "            ").unwrap();
+    for func in &functions {
+        write!(out, " | `{func}`").unwrap();
+    }
+    writeln!(out, " |").unwrap();
+
+    // Alignment row
+    write!(out, "|:-----------|").unwrap();
+    for _ in &functions {
+        write!(out, ":------------------------ |").unwrap();
+    }
+    writeln!(out).unwrap();
+
+    // Data rows
+    for val in &values {
+        let row_label = match val {
+            Some(v) => format!("**`{v}`**"),
+            None => String::new(),
+        };
+        write!(out, "| {row_label:10} ").unwrap();
+
+        for func in &functions {
+            if let Some(&entry) = lookup.get(&(*func, *val)) {
+                let time_str = format_time(entry.estimate_ns);
+                let change_str = format_change(&entry.change);
+                write!(out, " | `{time_str}` ({change_str}) ").unwrap();
+            } else {
+                write!(out, " |                          ").unwrap();
+            }
+        }
+        writeln!(out, " |").unwrap();
+    }
+}
+
+/// Formats change vs baseline with tiered emojis (matching criterion-table style).
+///
+/// Uses `compare = 1 / ratio` (where ratio = new/old) to determine tier:
+/// - `compare >= 1.8` (44%+ faster): 🚀
+/// - `compare > 0.9` (within ~10% slower): ✅
+/// - `compare <= 0.9` (10%+ slower): ❌
+fn format_change(change: &Option<ChangeInfo>) -> String {
+    let Some(change) = change else {
+        return "---".to_string();
+    };
+
+    // ratio = new_time / old_time
+    let ratio = 1.0 + change.point_estimate;
+    // compare = old_time / new_time (criterion-table's convention)
+    let compare = 1.0 / ratio;
+
+    let speedup_str = if ratio < 1.0 {
+        format!("{:.2}x faster", 1.0 / ratio)
+    } else if ratio > 1.0 {
+        format!("{:.2}x slower", ratio)
+    } else {
+        format!("{ratio:.2}x")
+    };
+
+    if compare >= 1.8 {
+        format!("🚀 **{speedup_str}**")
+    } else if compare > 0.9 {
+        format!("✅ **{speedup_str}**")
+    } else {
+        format!("❌ *{speedup_str}*")
+    }
+}
+
+/// Formats a time in nanoseconds to a human-readable string with appropriate units.
+fn format_time(ns: f64) -> String {
+    if ns < 1_000.0 {
+        format!("{:.2} ns", ns)
+    } else if ns < 1_000_000.0 {
+        format!("{:.2} µs", ns / 1_000.0)
+    } else if ns < 1_000_000_000.0 {
+        format!("{:.2} ms", ns / 1_000_000.0)
+    } else {
+        format!("{:.2} s", ns / 1_000_000_000.0)
+    }
+}
diff --git a/src/hyperlight_ci/src/main.rs b/src/hyperlight_ci/src/main.rs
new file mode 100644
index 000000000..70850dffa
--- /dev/null
+++ b/src/hyperlight_ci/src/main.rs
@@ -0,0 +1,27 @@
+mod bench;
+mod bench_report;
+
+use clap::{Parser, Subcommand};
+
+#[derive(Parser)]
+#[command(name = "hyperlight-ci", about = "Hyperlight's CI and development tools")]
+struct Cli {
+    #[command(subcommand)]
+    command: Commands,
+}
+
+#[derive(Subcommand)]
+enum Commands {
+    /// Run benchmarks using the benchmark binary directly
+    Bench(bench::BenchArgs),
+    /// Generate a markdown table from existing criterion benchmark results
+    BenchReport(bench_report::BenchReportArgs),
+}
+
+fn main() -> anyhow::Result<()> {
+    let cli = Cli::parse();
+    match cli.command {
+        Commands::Bench(args) => bench::run(args),
+        Commands::BenchReport(args) => bench_report::run(args),
+    }
+}