diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 000000000..1831b0a74 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[alias] # command aliases +ci = ["run", "--quiet", "--package=hyperlight-ci", "--"] \ No newline at end of file diff --git a/.github/workflows/ValidatePullRequest.yml b/.github/workflows/ValidatePullRequest.yml index 2f6294476..b212504e3 100644 --- a/.github/workflows/ValidatePullRequest.yml +++ b/.github/workflows/ValidatePullRequest.yml @@ -15,7 +15,7 @@ concurrency: permissions: contents: write - pull-requests: read + pull-requests: write jobs: docs-pr: @@ -140,6 +140,86 @@ jobs: docs_only: ${{ needs.docs-pr.outputs.docs-only }} secrets: inherit + # Run benchmarks and post results as PR comment + benchmarks: + needs: + - docs-pr + - build-guests + # Required because update-guest-locks is skipped on non-dependabot PRs, + # and a skipped dependency transitively skips all downstream jobs. + # See: https://github.com/actions/runner/issues/2205 + if: ${{ !cancelled() && !failure() }} + strategy: + fail-fast: false + matrix: + hypervisor: ['hyperv-ws2025', mshv3, kvm] + cpu: [amd, intel] + uses: ./.github/workflows/dep_benchmarks.yml + secrets: inherit + with: + docs_only: ${{ needs.docs-pr.outputs.docs-only }} + hypervisor: ${{ matrix.hypervisor }} + cpu: ${{ matrix.cpu }} + + # Collect all benchmark reports and post a single combined PR comment + benchmark-comment: + needs: benchmarks + if: ${{ !cancelled() && !failure() }} + runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: + - name: Download all benchmark reports + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + pattern: benchmark-report_* + path: reports/ + + - name: Post combined benchmark results to PR + uses: actions/github-script@v9 + with: + script: | + const fs = require('fs'); + const path = require('path'); + + const reportsDir = 'reports'; + if (!fs.existsSync(reportsDir)) { + console.log('No benchmark reports found, skipping comment.'); + return; + } + + // Collect all report files from subdirectories + const sections = []; + const dirs = fs.readdirSync(reportsDir).sort(); + for (const dir of dirs) { + const mdPath = path.join(reportsDir, dir, 'benchmark.md'); + if (!fs.existsSync(mdPath)) continue; + + // Extract hypervisor/cpu from artifact name: benchmark-report_OS_hypervisor_cpu + const parts = dir.replace('benchmark-report_', '').split('_'); + const os = parts[0]; + const hypervisor = parts.slice(1, -1).join('_'); + const cpu = parts[parts.length - 1]; + const label = `${hypervisor} / ${cpu} (${os})`; + + const content = fs.readFileSync(mdPath, 'utf8').trim(); + sections.push(`
\n${label}\n\n${content}\n\n
`); + } + + if (sections.length === 0) { + console.log('No benchmark report content found, skipping comment.'); + return; + } + + const body = `## Benchmark Results\n\n${sections.join('\n\n')}`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: body, + }); + spelling: name: spell check with typos runs-on: ubuntu-latest @@ -167,6 +247,8 @@ jobs: - build-test - run-examples - fuzzing + - benchmarks + - benchmark-comment - spelling - license-headers if: always() diff --git a/.github/workflows/dep_benchmarks.yml b/.github/workflows/dep_benchmarks.yml index b0c47be76..c4420b117 100644 --- a/.github/workflows/dep_benchmarks.yml +++ b/.github/workflows/dep_benchmarks.yml @@ -56,7 +56,6 @@ on: required: false type: number default: 5 - env: CARGO_TERM_COLOR: always RUST_BACKTRACE: full @@ -133,7 +132,17 @@ jobs: continue-on-error: true - name: Run benchmarks - run: just bench-ci main + run: just bench-ci + + - name: Create benchmarks report + run: cargo ci bench-report > target/criterion/benchmark.md + + - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: benchmark-report_${{ runner.os }}_${{ inputs.hypervisor }}_${{ inputs.cpu }} + path: target/criterion/benchmark.md + if-no-files-found: error + retention-days: 1 - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: diff --git a/Cargo.lock b/Cargo.lock index ba73df16d..f7f485cba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -58,18 +58,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] -name = "anstream" -version = "0.6.21" +name = "ansi-replace" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +checksum = "7f8b155ab93213f41c886d3a46e335258428e52c7cf868e25cf099d50274496d" dependencies = [ - "anstyle", - "anstyle-parse 0.2.7", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "is_terminal_polyfill", - "utf8parse", + "regex", + "stable-pattern", ] [[package]] @@ -79,7 +74,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", - "anstyle-parse 1.0.0", + "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", @@ -93,15 +88,6 @@ version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" -[[package]] -name = "anstyle-parse" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" -dependencies = [ - "utf8parse", -] - [[package]] name = "anstyle-parse" version = "1.0.0" @@ -117,7 +103,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -128,7 +114,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -499,25 +485,38 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.58" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] name = "clap_builder" -version = "4.5.58" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ - "anstream 0.6.21", + "anstream", "anstyle", "clap_lex", "strsim", ] +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "clap_lex" version = "1.0.0" @@ -530,6 +529,19 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.59.0", +] + [[package]] name = "constant_time_eq" version = "0.4.2" @@ -609,6 +621,19 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "cpu-pin" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb5bc1be026f7f066429ce0611e23a341db91b91ed701de4a1432d01d3ed1105" +dependencies = [ + "libc", + "mach2 0.6.0", + "once_cell", + "tokio", + "windows", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -769,7 +794,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -815,6 +840,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "endian-type" version = "0.1.2" @@ -837,7 +868,7 @@ version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a" dependencies = [ - "anstream 1.0.0", + "anstream", "anstyle", "env_filter", "jiff", @@ -857,7 +888,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -1190,7 +1221,7 @@ dependencies = [ "gobject-sys", "libc", "system-deps", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -1359,6 +1390,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "http" version = "1.4.0" @@ -1456,6 +1493,23 @@ dependencies = [ "tracing", ] +[[package]] +name = "hyperlight-ci" +version = "0.0.0" +dependencies = [ + "ansi-replace", + "anyhow", + "clap", + "cpu-pin", + "indicatif", + "num_cpus", + "regex", + "serde", + "serde_json", + "simple-pool", + "tokio", +] + [[package]] name = "hyperlight-common" version = "0.15.0" @@ -1626,7 +1680,7 @@ dependencies = [ "vmm-sys-util", "windows", "windows-result", - "windows-sys", + "windows-sys 0.61.2", "windows-version", ] @@ -1820,6 +1874,19 @@ dependencies = [ "serde_core", ] +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width", + "web-time", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -2105,6 +2172,12 @@ dependencies = [ "libc", ] +[[package]] +name = "mach2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dae608c151f68243f2b000364e1f7b186d9c29845f7d2d85bd31b9ad77ad552b" + [[package]] name = "macho-unwind-info" version = "0.5.0" @@ -2216,7 +2289,7 @@ checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -2280,7 +2353,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -2292,6 +2365,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "num_enum" version = "0.7.5" @@ -2313,6 +2396,12 @@ dependencies = [ "syn", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "object" version = "0.39.0" @@ -2324,6 +2413,12 @@ dependencies = [ "ruzstd", ] +[[package]] +name = "object-id" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c587bd1cd63959a8520442afc0f92a875d83deea175c7b48dd9f104a2c5070a9" + [[package]] name = "once_cell" version = "1.21.4" @@ -2785,7 +2880,7 @@ dependencies = [ "bindgen 0.70.1", "libc", "libproc", - "mach2", + "mach2 0.4.3", "winapi", ] @@ -3152,7 +3247,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -3363,6 +3458,16 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "simple-pool" +version = "0.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "073382259dbeb56c3eaab04a1d330459f6490d1e518b2a8ee441c8bd00dbc092" +dependencies = [ + "object-id", + "parking_lot", +] + [[package]] name = "sketches-ddsketch" version = "0.3.0" @@ -3388,7 +3493,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -3406,6 +3511,15 @@ dependencies = [ "lock_api", ] +[[package]] +name = "stable-pattern" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4564168c00635f88eaed410d5efa8131afa8d8699a612c80c455a0ba05c21045" +dependencies = [ + "memchr", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -3478,7 +3592,7 @@ dependencies = [ "getrandom 0.4.1", "once_cell", "rustix", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -3553,7 +3667,7 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -3913,6 +4027,12 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -4195,7 +4315,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -4305,6 +4425,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -4314,6 +4443,22 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + [[package]] name = "windows-threading" version = "0.2.1" @@ -4332,6 +4477,54 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + [[package]] name = "winnow" version = "0.7.14" diff --git a/Cargo.toml b/Cargo.toml index e9b69f40d..6650dcbf9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ default-members = [ "src/hyperlight_testing", ] members = [ + "src/hyperlight_ci", "src/hyperlight_common", "src/hyperlight_guest", "src/hyperlight_host", diff --git a/Justfile b/Justfile index 401897425..9b6846289 100644 --- a/Justfile +++ b/Justfile @@ -177,7 +177,7 @@ run-examples-like-ci config=default-target hypervisor="kvm": benchmarks-like-ci config=default-target hypervisor="kvm": @# Run benchmarks - {{ if config == "release" { "just bench-ci main" } else { "" } }} + {{ if config == "release" { "just bench-ci" } else { "" } }} fuzz-like-ci target config=default-target hypervisor="kvm": @# Run Fuzzing @@ -400,13 +400,12 @@ bench-download os hypervisor cpu tag="": tar -zxvf target/benchmarks_{{ os }}_{{ hypervisor }}_{{ cpu }}.tar.gz -C target/criterion/ --strip-components=1 # Warning: compares to and then OVERWRITES the given baseline -bench-ci baseline features="": - @# Benchmarks are always run with release builds for meaningful results - cargo bench --profile=release {{ if features =="" {''} else { "--features " + features } }} -- --verbose --save-baseline {{ baseline }} +bench-ci features="": + cargo ci bench --no-progress {{ if features == "" {''} else { "--features " + features } }} bench features="": @# Benchmarks are always run with release builds for meaningful results - cargo bench --profile=release {{ if features =="" {''} else { "--features " + features } }} -- --verbose + cargo ci bench {{ if features == "" {''} else { "--features " + features } }} ############### ### FUZZING ### diff --git a/docs/benchmarking-hyperlight.md b/docs/benchmarking-hyperlight.md index dd28c6ea8..2fb931011 100644 --- a/docs/benchmarking-hyperlight.md +++ b/docs/benchmarking-hyperlight.md @@ -72,6 +72,6 @@ Found 1 outliers among 100 measurements (1.00%) ## Running benchmarks locally -Use `just bench` to run benchmarks with release builds (the only supported configuration). Comparing local benchmark results to github-saved benchmarks doesn't make much sense, since you'd be using different hardware, but you can use `just bench-download os hypervisor [tag] ` to download and extract the GitHub release benchmarks to the correct place folder. You can then run `just bench-ci main` to compare to (and overwrite) the previous release benchmarks. Note that `main` is the name of the baselines stored in GitHub. +Use `just bench` to run benchmarks with release builds (the only supported configuration). Comparing local benchmark results to github-saved benchmarks doesn't make much sense, since you'd be using different hardware, but you can use `just bench-download os hypervisor [tag] ` to download and extract the GitHub release benchmarks to the correct place folder. You can then run `just bench-ci` to compare to (and overwrite) the previous release benchmarks. The name of the baselines stored in GitHub is `base`. **Important**: The `just bench` command uses release builds by default to ensure meaningful performance measurements. For profiling purposes, you can compile benchmarks with debug symbols by running `cargo bench` directly. diff --git a/src/hyperlight_ci/Cargo.toml b/src/hyperlight_ci/Cargo.toml new file mode 100644 index 000000000..fa5555a9f --- /dev/null +++ b/src/hyperlight_ci/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "hyperlight-ci" +edition = "2021" +# fields intentionally not set, to avoid accidentally publishing this crate to crates.io +description = """ +Hyperlight's CI and development tools. +""" + +[lints] +workspace = true + +[dependencies] +anyhow = "1" +clap = { version = "4.6.1", features = ["derive"] } +indicatif = "0.17" +num_cpus = "1" +tokio = { version = "1", features = ["rt", "process", "io-util", "sync", "macros"] } +ansi-replace = "0.1" +regex = "1" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +simple-pool = "0.0.18" +cpu-pin = { version = "0.1.3", features = ["tokio"] } \ No newline at end of file diff --git a/src/hyperlight_ci/src/bench/args.rs b/src/hyperlight_ci/src/bench/args.rs new file mode 100644 index 000000000..3aa92a46f --- /dev/null +++ b/src/hyperlight_ci/src/bench/args.rs @@ -0,0 +1,52 @@ +use std::path::PathBuf; + +use clap::Args; + +use crate::bench::cpu::PerformanceCoresPool; + +/// Command-line arguments for the `bench` subcommand. +#[derive(Args)] +pub struct BenchArgs { + /// Filter benchmarks by name (substring match, or exact with --exact) + pub filter: Option, + + /// Match the filter exactly instead of as a substring + #[arg(long)] + pub exact: bool, + + /// Pre-built benchmark binary to use (skip build step; can be specified multiple times) + #[arg(long)] + pub binary: Vec, + + /// Number of benchmarks to run in parallel (0 = all CPUs, default: 0) + #[arg(long, short, default_value_t = 0)] + pub jobs: usize, + + /// Reduce output verbosity (repeatable: -q hides stderr, -qq hides everything) + #[arg(short, long, action = clap::ArgAction::Count)] + pub quiet: u8, + + /// Disable progress bar (auto-detected: shown only on TTY) + #[arg(long)] + pub no_progress: bool, + + /// Additional features to pass to cargo criterion + #[arg(short = 'F', long, default_value = "")] + pub features: String, +} + +impl BenchArgs { + /// Determine the maximum number of parallel benchmark jobs. + pub fn max_jobs(&self) -> usize { + match self.jobs { + 0 => PerformanceCoresPool::num_cores(), + j => j, + } + } + + /// Whether progress bars should be displayed. + pub fn use_progress(&self) -> bool { + use std::io::IsTerminal; + !self.no_progress && std::io::stderr().is_terminal() && self.quiet < 2 + } +} diff --git a/src/hyperlight_ci/src/bench/cpu.rs b/src/hyperlight_ci/src/bench/cpu.rs new file mode 100644 index 000000000..5ae6f7a9a --- /dev/null +++ b/src/hyperlight_ci/src/bench/cpu.rs @@ -0,0 +1,71 @@ +//! CPU core discovery and pool management for benchmark isolation. +//! +//! Discovers performance cores (P-cores) on the system and provides a pool +//! that allows benchmarks to be pinned to specific cores, avoiding interference +//! from concurrent workloads. + +use std::sync::{Arc, LazyLock}; + +use anyhow::{Result, bail}; +use cpu_pin::CpuInfo; +use simple_pool::{ResourcePool, ResourcePoolGuard}; + +/// Lazily discovered list of performance cores on the system. +/// +/// Filters for cores that are marked as `Performance` type and have the maximum +/// number of logical processors (i.e., full-featured P-cores with hyperthreading, +/// excluding any asymmetric E-cores). +static PERFORMANCE_CORES: LazyLock> = LazyLock::new(|| { + cpu_pin::topology() + .expect("failed to detect CPU topology") + .best_cores() +}); + +/// A pool of performance cores that can be claimed by benchmark tasks. +/// +/// Each benchmark acquires a core from the pool before running, ensuring +/// no two benchmarks share the same physical core simultaneously. +#[derive(Clone)] +pub struct PerformanceCoresPool { + pool: Arc>, +} + +impl PerformanceCoresPool { + /// Returns the total number of performance cores available on the system. + pub fn num_cores() -> usize { + PERFORMANCE_CORES.len() + } + + /// Creates a new pool with up to `size` performance cores. + /// + /// Returns an error if `size` exceeds the number of available performance cores. + pub fn new(size: usize) -> Result { + if size > PERFORMANCE_CORES.len() { + bail!( + "Requested more performance cores than available: requested {size}, available {}", + PERFORMANCE_CORES.len() + ); + } + + let pool = Arc::new(ResourcePool::new()); + for core in PERFORMANCE_CORES.iter().take(size) { + pool.append((*core).clone()); + } + + Ok(Self { pool }) + } + + /// Acquires a performance core from the pool, waiting if none are available. + /// + /// The core is returned to the pool when the guard is dropped. + pub async fn get(&self) -> ResourcePoolGuard { + self.pool.get().await + } +} + +impl Default for PerformanceCoresPool { + /// Creates a pool containing all available performance cores. + fn default() -> Self { + Self::new(Self::num_cores()).unwrap() + } +} diff --git a/src/hyperlight_ci/src/bench/discovery.rs b/src/hyperlight_ci/src/bench/discovery.rs new file mode 100644 index 000000000..147579d28 --- /dev/null +++ b/src/hyperlight_ci/src/bench/discovery.rs @@ -0,0 +1,125 @@ +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result, bail}; +use tokio::process::Command; +use std::process::Stdio; + +/// Discovers available benchmarks by querying the benchmark binary. +pub struct BenchmarkDiscovery { + features: String, + filter: Option, + exact: bool, +} + +impl BenchmarkDiscovery { + /// Create a new discovery instance with the given parameters. + pub fn new(features: &str, filter: Option<&str>, exact: bool) -> Self { + Self { + features: features.to_string(), + filter: filter.map(|s| s.to_string()), + exact, + } + } + + /// Build all benchmark binaries and return their paths. + pub async fn build(&self) -> Result> { + let mut cmd = Command::new("cargo"); + cmd.args([ + "build", + "--release", + "--benches", + "--message-format=json", + ]); + if !self.features.is_empty() { + cmd.args(["--features", &self.features]); + } + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + + let output = cmd + .output() + .await + .context("Failed to run cargo build for benchmarks")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("Failed to build benchmarks:\n{stderr}"); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut binaries = Vec::new(); + + // Parse cargo's JSON output to find all benchmark binary paths + for line in stdout.lines() { + let Ok(msg) = serde_json::from_str::(line) else { + continue; + }; + if msg.get("reason").and_then(|r| r.as_str()) != Some("compiler-artifact") { + continue; + } + let is_bench = msg + .get("target") + .and_then(|t| t.get("kind")) + .and_then(|k| k.as_array()) + .is_some_and(|kinds| kinds.iter().any(|k| k.as_str() == Some("bench"))); + if !is_bench { + continue; + } + if let Some(filenames) = msg.get("filenames").and_then(|f| f.as_array()) { + for f in filenames { + if let Some(path) = f.as_str() { + // Skip non-executable artifacts: + // .d = dep-info files (all platforms) + // .pdb = debug symbols (Windows) + // .dSYM = debug symbol bundles (macOS) + // .dwp = DWARF packages (Linux, split-debuginfo) + // .lib = import libraries (Windows) + // .exp = export files (Windows) + let dominated = [".d", ".pdb", ".dSYM", ".dwp", ".lib", ".exp"]; + if dominated.iter().any(|ext| path.ends_with(ext)) { + continue; + } + binaries.push(PathBuf::from(path)); + } + } + } + } + + if binaries.is_empty() { + bail!("No benchmark binaries found in cargo build output"); + } + + Ok(binaries) + } + + /// List all benchmark names matching the configured filter. + pub async fn list(&self, binary: &Path) -> Result> { + let mut cmd = Command::new(binary); + cmd.args(["--bench", "--list"]); + if self.exact { + cmd.arg("--exact"); + } + if let Some(filter) = &self.filter { + cmd.arg(filter); + } + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::null()); + + let output = cmd + .output() + .await + .with_context(|| format!("Failed to run {} --bench --list", binary.display()))?; + let stdout = String::from_utf8_lossy(&output.stdout); + + let benches: Vec = stdout + .lines() + .filter_map(|line| { + let line = line.trim(); + let line = line.strip_suffix(": benchmark")?; + Some(line.to_string()) + }) + .collect(); + + Ok(benches) + } +} diff --git a/src/hyperlight_ci/src/bench/mod.rs b/src/hyperlight_ci/src/bench/mod.rs new file mode 100644 index 000000000..a23914898 --- /dev/null +++ b/src/hyperlight_ci/src/bench/mod.rs @@ -0,0 +1,67 @@ +//! The `bench` subcommand: discovers, runs, and reports on criterion benchmarks +//! using the benchmark binary directly. + +mod args; +mod discovery; +mod output; +mod process; +mod progress; +mod runner; +mod cpu; + +pub use args::BenchArgs; + +use anyhow::{Context, Result}; + +use self::discovery::BenchmarkDiscovery; +use self::runner::BenchRunner; + +/// Entry point for the bench subcommand. Builds a single-threaded tokio runtime +/// and delegates to the async implementation. +pub fn run(args: BenchArgs) -> Result<()> { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .context("Failed to build tokio runtime")? + .block_on(run_async(args)) +} + +async fn run_async(args: BenchArgs) -> Result<()> { + let discovery = BenchmarkDiscovery::new(&args.features, args.filter.as_deref(), args.exact); + + let binaries = if args.binary.is_empty() { + if args.quiet < 2 { + eprintln!("Building benchmarks ..."); + } + discovery.build().await? + } else { + args.binary.clone() + }; + + let mut benches = Vec::new(); + for binary in &binaries { + for name in discovery.list(binary).await? { + benches.push((binary.clone(), name)); + } + } + + if benches.is_empty() { + anyhow::bail!("No benchmarks found"); + } + + let max_jobs = args.max_jobs(); + let use_progress = args.use_progress(); + + if args.quiet < 2 { + eprintln!( + "Running {} benchmark(s) with parallelism {}", + benches.len(), + max_jobs + ); + } + + let runner = BenchRunner::new(max_jobs, args.quiet, use_progress); + runner.run(&benches).await?; + + Ok(()) +} diff --git a/src/hyperlight_ci/src/bench/output.rs b/src/hyperlight_ci/src/bench/output.rs new file mode 100644 index 000000000..636057c86 --- /dev/null +++ b/src/hyperlight_ci/src/bench/output.rs @@ -0,0 +1,36 @@ +use std::fmt::Write; +use std::ops::Range; + +use ansi_replace::AnsiExt as _; +use ansi_replace::replacer::Writable; + +/// Returns true if an output line is build noise that should be suppressed. +pub fn is_noisy_line(line: &str) -> bool { + line.contains("waiting for file lock on") + || line.contains("Gnuplot not found") + || line.contains("`bench` profile [optimized]") +} + +/// Strip the bench name from an output line. +/// +/// Strategy: +/// - If the line starts with the bench name, replace it with spaces to preserve alignment +/// - Any other appearance of the bench name and surrounding whitespace are removed entirely +/// - ANSI codes are preserved in all cases +pub fn strip_bench_prefix(line: &str, bench: &str) -> String { + let escaped = regex::escape(bench); + let pattern = regex::Regex::new(&format!(r" ?{escaped}")).unwrap(); + + let result = line.ansi_replace(&pattern, |m: &str, i: Range, dst: &mut Writable| { + if i.start == 0 && m == bench { + write!(dst, "{:n$}", " ", n = m.len())?; + } + Ok(()) + }); + + if result.ansi_strip().trim().is_empty() { + return String::new(); + } + + result +} diff --git a/src/hyperlight_ci/src/bench/process.rs b/src/hyperlight_ci/src/bench/process.rs new file mode 100644 index 000000000..f2387d41f --- /dev/null +++ b/src/hyperlight_ci/src/bench/process.rs @@ -0,0 +1,77 @@ +//! Spawns the benchmark binary for a single benchmark and streams its output. + +use std::ops::Deref; +use std::path::Path; +use std::process::Stdio; + +use anyhow::{Context, Result, bail}; +use cpu_pin::{CpuInfo, PinnedCommand as _}; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tokio::process::Command; +use tokio::sync::mpsc; + +/// Output of a completed benchmark process. +pub struct ProcessOutput { + pub output_lines: Vec, +} + +/// Spawns the benchmark binary for a single benchmark. +/// +/// Streams output lines through `output_tx` as they arrive (for live progress updates), +/// and returns the collected output when the process exits. +pub async fn run( + bench: &str, + binary: &Path, + core: impl Deref, + output_tx: &mpsc::UnboundedSender, +) -> Result { + let mut cmd = Command::new(binary); + cmd.args(["--bench", "--color=always", "--noplot", "--exact"]); + cmd.arg(bench); + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + + let core_id = core.logical_cpus.first().unwrap(); + + let mut child = cmd + .spawn_pinned(*core_id) + .with_context(|| format!("Failed to spawn benchmark binary: {}", binary.display()))?; + + let stdout = child.stdout.take().unwrap(); + let stderr = child.stderr.take().unwrap(); + let mut reader_stdout = BufReader::new(stdout).lines(); + let mut reader_stderr = BufReader::new(stderr).lines(); + let mut output_lines = Vec::new(); + + // combine the stream of both stdout and stderr lines + // do not exit until both streams have been closed + loop { + tokio::select! { + line = reader_stdout.next_line() => { + let Some(line) = line.context("Failed to read stdout")? else { break }; + let _ = output_tx.send(line.clone()); + output_lines.push(line); + } + line = reader_stderr.next_line() => { + let Some(line) = line.context("Failed to read stderr")? else { break }; + let _ = output_tx.send(line.clone()); + output_lines.push(line); + } + } + } + + let status = child + .wait() + .await + .context("Failed to wait for benchmark binary")?; + + if !status.success() { + bail!( + "benchmark binary exited with status {} for benchmark '{}'", + status, + bench + ); + } + + Ok(ProcessOutput { output_lines }) +} diff --git a/src/hyperlight_ci/src/bench/progress.rs b/src/hyperlight_ci/src/bench/progress.rs new file mode 100644 index 000000000..137e7181f --- /dev/null +++ b/src/hyperlight_ci/src/bench/progress.rs @@ -0,0 +1,142 @@ +//! Progress bar and spinner management for benchmark output. + +use std::collections::{HashMap, HashSet}; + +use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; + +use super::output::{is_noisy_line, strip_bench_prefix}; + +/// Manages progress bars for a set of benchmarks. +pub struct ProgressTracker { + multi: MultiProgress, + overall: ProgressBar, + spinners: HashMap, + /// Benchmarks that have been registered but not yet started (no spinner visible). + pending: HashSet, + quiet_level: u8, + enabled: bool, +} + +impl ProgressTracker { + /// Create a new progress tracker. + /// + /// If `enabled` is false, all operations become no-ops (hidden bars, no output). + pub fn new(total: usize, quiet_level: u8, enabled: bool) -> Self { + let multi = MultiProgress::new(); + let overall = if enabled { + let bar = multi.add(ProgressBar::new(total as u64)); + bar.set_style( + ProgressStyle::with_template("{prefix} [{bar:40.cyan/blue}] {pos}/{len} ({eta})") + .unwrap() + .progress_chars("━━─"), + ); + bar.set_prefix("Benchmarks"); + bar + } else { + ProgressBar::hidden() + }; + + Self { + multi, + overall, + spinners: HashMap::new(), + pending: HashSet::new(), + quiet_level, + enabled, + } + } + + /// Register a benchmark for tracking (spinner stays hidden until it starts running). + pub fn add_spinner(&mut self, bench: &str) { + if !self.enabled { + return; + } + self.pending.insert(bench.to_string()); + } + + /// Update the spinner for a benchmark with an output line. + /// + /// On the first update, the spinner is created and becomes visible. + /// Filters noisy lines and strips the benchmark prefix before displaying. + pub fn update_spinner(&mut self, bench: &str, line: &str) { + if is_noisy_line(line) { + return; + } + // If this is a pending benchmark, create and show its spinner now + if self.pending.remove(bench) { + let bar = self.multi.insert_before(&self.overall, ProgressBar::new_spinner()); + bar.set_style( + ProgressStyle::with_template(" {spinner:.green} {msg}") + .unwrap() + .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]), + ); + bar.enable_steady_tick(std::time::Duration::from_millis(100)); + self.spinners.insert(bench.to_string(), bar); + } + let Some(spinner) = self.spinners.get(bench) else { return }; + let display = strip_bench_prefix(line, bench); + if !display.is_empty() { + spinner.set_message(format!("\x1b[1;32m{bench}\x1b[0m: {display}")); + } + } + + /// Finish and remove the spinner for a benchmark. + pub fn finish_spinner(&mut self, bench: &str) { + if let Some(bar) = self.spinners.remove(bench) { + bar.finish_and_clear(); + self.multi.remove(&bar); + } + } + + /// Advance the overall progress bar by one. + pub fn advance(&self, position: u64) { + self.overall.set_position(position); + } + + /// Print a message respecting the progress system and quiet level. + pub fn println(&self, msg: &str) { + if self.enabled { + let _ = self.multi.println(msg); + } else if self.quiet_level < 1 { + eprintln!("{msg}"); + } + } + + /// Print the completion summary for a benchmark. + /// + /// At quiet_level 0, also prints filtered output lines. + pub fn print_completion( + &self, + done_count: usize, + total: usize, + bench: &str, + status: &str, + output_lines: &[String], + error: Option<&anyhow::Error>, + ) { + self.println(&format!( + "[{done_count}/{total}] \x1b[1;32m{bench}\x1b[0m ... {status}" + )); + + if self.quiet_level == 0 { + for line in output_lines { + if !is_noisy_line(line) { + let line = strip_bench_prefix(line, bench); + if !line.is_empty() && !line.starts_with("Benchmarking") { + self.println(&line); + } + } + } + self.println(""); + } + + if let Some(e) = error { + self.println(&format!(" error: {e}")); + } + } + + /// Finish the overall progress bar. + pub fn finish(&self) { + self.overall.finish_and_clear(); + } +} diff --git a/src/hyperlight_ci/src/bench/runner.rs b/src/hyperlight_ci/src/bench/runner.rs new file mode 100644 index 000000000..97a945acc --- /dev/null +++ b/src/hyperlight_ci/src/bench/runner.rs @@ -0,0 +1,188 @@ +//! Orchestrates parallel benchmark execution, wiring together process spawning +//! and progress reporting. + +use std::ops::Deref; +use std::path::{Path, PathBuf}; + +use anyhow::{Result, bail}; +use cpu_pin::CpuInfo; + +use super::cpu::PerformanceCoresPool; +use super::process::{self, ProcessOutput}; +use super::progress::ProgressTracker; + +/// Events sent from benchmark tasks to the orchestration loop. +enum BenchEvent { + /// An output line was produced by the given benchmark. + OutputLine { bench: String, line: String }, + /// The benchmark has completed. + Done(BenchResult), +} + +/// Result of a single benchmark run, combining identity with output. +struct BenchResult { + bench: String, + output_lines: Vec, + success: Result<()>, +} + +impl BenchResult { + fn status(&self) -> &str { + if self.success.is_ok() { + "done" + } else { + "FAILED" + } + } +} + +/// Orchestrates parallel benchmark execution with progress reporting. +pub struct BenchRunner { + max_jobs: usize, + quiet_level: u8, + use_progress: bool, +} + +impl BenchRunner { + /// Create a new runner with the given configuration. + pub fn new(max_jobs: usize, quiet_level: u8, use_progress: bool) -> Self { + Self { + max_jobs, + quiet_level, + use_progress, + } + } + + /// Run all benchmarks in parallel. + /// + /// Each entry is a (binary_path, benchmark_name) pair. + /// + /// Quiet levels: + /// - 0: show progress, completion headers, and per-benchmark output + /// - 1: show progress and completion headers only (no output details) + /// - 2+: fully silent (no progress, no output) + pub async fn run(&self, benches: &[(PathBuf, String)]) -> Result<()> { + let total = benches.len(); + let mut tracker = ProgressTracker::new(total, self.quiet_level, self.use_progress); + + if self.max_jobs > PerformanceCoresPool::num_cores() { + bail!( + "Requested number of jobs {} exceeds available performance cores {}, use --jobs=0 or --quick to use all available performance cores.", + self.max_jobs, + PerformanceCoresPool::num_cores(), + ); + } + + let pool = PerformanceCoresPool::new(self.max_jobs)?; + let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::(); + + // Spawn all benchmarks (they'll wait on the semaphore internally) + for (binary, bench) in benches { + tracker.add_spinner(bench); + + let bench = bench.clone(); + let binary = binary.clone(); + let tx = tx.clone(); + let pool = pool.clone(); + + tokio::spawn(async move { + let core = pool.get().await; + Self::run_one(&bench, &binary, core, &tx).await; + }); + } + + // Drop our sender so rx closes when all tasks finish + drop(tx); + + // Process events as they arrive + let mut failed = Vec::new(); + let mut done_count = 0; + + while let Some(event) = rx.recv().await { + match event { + BenchEvent::OutputLine { bench, line } => { + tracker.update_spinner(&bench, &line); + } + BenchEvent::Done(result) => { + done_count += 1; + tracker.finish_spinner(&result.bench); + tracker.advance(done_count as u64); + + let error = result.success.as_ref().err(); + tracker.print_completion( + done_count, + total, + &result.bench, + result.status(), + &result.output_lines, + error, + ); + + if result.success.is_err() { + failed.push(result.bench); + } + } + } + } + + tracker.finish(); + + if !failed.is_empty() { + anyhow::bail!( + "{} benchmark(s) failed: {}", + failed.len(), + failed.join(", ") + ); + } + + Ok(()) + } + + /// Run a single benchmark, streaming output events and sending the final result. + async fn run_one( + bench: &str, + binary: &Path, + core: impl Deref, + event_tx: &tokio::sync::mpsc::UnboundedSender, + ) { + // Create a channel for output lines from the process + let (output_tx, mut output_rx) = tokio::sync::mpsc::unbounded_channel::(); + let bench_name = bench.to_string(); + let event_tx_clone = event_tx.clone(); + + // Forward output lines as events + let forwarder = tokio::spawn(async move { + while let Some(line) = output_rx.recv().await { + let _ = event_tx_clone.send(BenchEvent::OutputLine { + bench: bench_name.clone(), + line, + }); + } + }); + + // Signal that this benchmark is starting + let _ = event_tx.send(BenchEvent::OutputLine { + bench: bench.to_string(), + line: "Starting ...".to_string(), + }); + + let result = match process::run(bench, binary, core, &output_tx).await { + Ok(ProcessOutput { output_lines }) => BenchResult { + bench: bench.to_string(), + output_lines, + success: Ok(()), + }, + Err(e) => BenchResult { + bench: bench.to_string(), + output_lines: vec![], + success: Err(e), + }, + }; + + // Ensure all output forwarding completes before sending Done + drop(output_tx); + let _ = forwarder.await; + + let _ = event_tx.send(BenchEvent::Done(result)); + } +} diff --git a/src/hyperlight_ci/src/bench_report/mod.rs b/src/hyperlight_ci/src/bench_report/mod.rs new file mode 100644 index 000000000..619e61319 --- /dev/null +++ b/src/hyperlight_ci/src/bench_report/mod.rs @@ -0,0 +1,144 @@ +//! The `bench-report` subcommand: generates a markdown table from existing +//! criterion benchmark results in `target/criterion/`. + +mod table; + +use std::path::PathBuf; +use std::process::Command; + +use anyhow::{Context, Result}; +use clap::Args; + +/// Command-line arguments for the `bench-report` subcommand. +#[derive(Args)] +pub struct BenchReportArgs { + /// Filter benchmarks by name (substring match, or exact with --exact) + pub filter: Option, + + /// Match the filter exactly instead of as a substring + #[arg(long)] + pub exact: bool, + + /// Benchmark binary to list benchmarks from (can be specified multiple times). + /// When provided, only benchmarks available in these binaries are included. + #[arg(long)] + pub binary: Vec, + + /// Path to the criterion output directory + #[arg(long, default_value = "target/criterion")] + pub criterion_dir: PathBuf, + + /// Output file path (default: stdout) + #[arg(short, long)] + pub output: Option, +} + +/// Entry point for the bench-report subcommand. +pub fn run(args: BenchReportArgs) -> Result<()> { + let allowlist = build_allowlist(&args)?; + let allowlist_ref = allowlist.as_deref(); + + let markdown = table::render(&args.criterion_dir, allowlist_ref)?; + + if let Some(path) = &args.output { + std::fs::write(path, &markdown)?; + } else { + print!("{markdown}"); + } + + Ok(()) +} + +/// Builds an allowlist of benchmark full_ids by querying binaries and applying the filter. +/// +/// - If `--binary` is specified, lists benchmarks from each binary. +/// - If a text filter is specified, applies substring (or exact) matching. +/// - If neither is specified, returns `None` (include everything). +fn build_allowlist(args: &BenchReportArgs) -> Result>> { + let mut names: Option> = None; + + if !args.binary.is_empty() { + let mut list = Vec::new(); + for binary in &args.binary { + let output = Command::new(binary) + .args(["--bench", "--list"]) + .output() + .with_context(|| format!("Failed to run {} --bench --list", binary.display()))?; + let stdout = String::from_utf8_lossy(&output.stdout); + for line in stdout.lines() { + let line = line.trim(); + if let Some(name) = line.strip_suffix(": benchmark") { + list.push(name.to_string()); + } + } + } + names = Some(list); + } + + if let Some(ref filter) = args.filter { + let base = names.take(); + let iter: Box> = match base { + Some(v) => Box::new(v.into_iter()), + None => { + // No binaries specified; discover all benchmarks from criterion dir + let all = discover_all_ids(&args.criterion_dir)?; + Box::new(all.into_iter()) + } + }; + + let filtered: Vec = if args.exact { + iter.filter(|id| id == filter).collect() + } else { + iter.filter(|id| id.contains(filter.as_str())).collect() + }; + names = Some(filtered); + } + + Ok(names) +} + +/// Discovers all benchmark full_ids from the criterion directory. +fn discover_all_ids(criterion_dir: &PathBuf) -> Result> { + // Render with no filter to get all entries, then extract IDs + // We can reuse the walk logic by reading benchmark.json files + let mut ids = Vec::new(); + walk_for_ids(criterion_dir, &mut ids)?; + Ok(ids) +} + +/// Recursively walks directories looking for `new/benchmark.json` to extract full_ids. +fn walk_for_ids(dir: &std::path::Path, ids: &mut Vec) -> Result<()> { + let new_dir = dir.join("new"); + let meta_path = new_dir.join("benchmark.json"); + if meta_path.exists() { + let data = std::fs::read_to_string(&meta_path) + .with_context(|| format!("Failed to read {}", meta_path.display()))?; + #[derive(serde::Deserialize)] + struct Meta { + full_id: String, + } + let meta: Meta = serde_json::from_str(&data) + .with_context(|| format!("Failed to parse {}", meta_path.display()))?; + ids.push(meta.full_id); + return Ok(()); + } + + let read_dir = match std::fs::read_dir(dir) { + Ok(rd) => rd, + Err(_) => return Ok(()), + }; + + for entry in read_dir { + let entry = entry?; + if entry.file_type()?.is_dir() { + let name = entry.file_name(); + let name_str = name.to_string_lossy(); + if name_str == "reports" || name_str.starts_with('.') { + continue; + } + walk_for_ids(&entry.path(), ids)?; + } + } + + Ok(()) +} diff --git a/src/hyperlight_ci/src/bench_report/table.rs b/src/hyperlight_ci/src/bench_report/table.rs new file mode 100644 index 000000000..550b1b0a6 --- /dev/null +++ b/src/hyperlight_ci/src/bench_report/table.rs @@ -0,0 +1,329 @@ +//! Reads criterion benchmark results from `target/criterion/` JSON files and +//! renders a markdown table similar to criterion-table. + +use std::collections::BTreeMap; +use std::fmt::Write; +use std::path::Path; + +use anyhow::{Context, Result}; +use serde::Deserialize; + +/// Metadata from a criterion `benchmark.json` file. +#[derive(Deserialize)] +struct BenchmarkMeta { + group_id: String, + function_id: String, + value_str: Option, + throughput: Option, + full_id: String, +} + +/// Throughput specification from `benchmark.json`. +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase")] +#[allow(dead_code)] +enum Throughput { + Bytes(u64), + Elements(u64), +} + +/// Statistical estimates from a criterion `estimates.json` file. +#[derive(Deserialize)] +struct Estimates { + slope: Option, + mean: Estimate, +} + +/// A single statistical estimate with confidence interval. +#[derive(Deserialize)] +struct Estimate { + point_estimate: f64, +} + +/// Change estimates from a criterion `change/estimates.json` file. +#[derive(Deserialize)] +struct ChangeEstimates { + mean: ChangeEstimate, +} + +/// A single change estimate with point value. +#[derive(Deserialize)] +struct ChangeEstimate { + point_estimate: f64, +} + +/// Parsed change information for a benchmark. +struct ChangeInfo { + /// Relative change as a fraction (e.g., 0.05 = +5%, -0.02 = -2%). + point_estimate: f64, +} + +/// A single benchmark entry with its metadata and timing. +struct BenchEntry { + full_id: String, + group_id: String, + function_id: String, + value_str: Option, + estimate_ns: f64, + #[allow(dead_code)] + throughput: Option, + /// Change vs the stored baseline, if available. + change: Option, +} + +impl BenchEntry { + /// Returns the column label for this benchmark (the function name). + /// + /// If `value_str` is set, the full `function_id` is the column. + /// Otherwise, if `function_id` contains "/", the part before the last "/" is the column. + fn column(&self) -> &str { + if self.value_str.is_some() { + return &self.function_id; + } + match self.function_id.rfind('/') { + Some(idx) => &self.function_id[..idx], + None => &self.function_id, + } + } + + /// Returns the row label for this benchmark (the parameter/value). + /// + /// Uses `value_str` if set, otherwise the part after the last "/" in `function_id`. + fn row(&self) -> Option<&str> { + if let Some(ref v) = self.value_str { + return Some(v.as_str()); + } + self.function_id.rfind('/').map(|idx| &self.function_id[idx + 1..]) + } +} + +/// Reads all benchmark results from the given criterion output directory +/// and renders a markdown table. +/// +/// If `allowlist` is provided, only benchmarks whose `full_id` is in the list are included. +pub fn render(criterion_dir: &Path, allowlist: Option<&[String]>) -> Result { + let mut entries = discover_benchmarks(criterion_dir)?; + if let Some(names) = allowlist { + entries.retain(|e| names.iter().any(|n| n == &e.full_id)); + } + if entries.is_empty() { + anyhow::bail!("No benchmark results found in {}", criterion_dir.display()); + } + Ok(format_table(&entries)) +} + +/// Discovers all benchmark entries by walking the criterion directory. +fn discover_benchmarks(criterion_dir: &Path) -> Result> { + let mut entries = Vec::new(); + walk_for_benchmarks(criterion_dir, &mut entries)?; + Ok(entries) +} + +/// Recursively walks directories looking for `new/benchmark.json` files. +fn walk_for_benchmarks(dir: &Path, entries: &mut Vec) -> Result<()> { + let new_dir = dir.join("new"); + if new_dir.join("benchmark.json").exists() { + if let Some(entry) = read_benchmark_entry(&new_dir)? { + entries.push(entry); + } + return Ok(()); + } + + let read_dir = std::fs::read_dir(dir) + .with_context(|| format!("Failed to read directory {}", dir.display()))?; + + for entry in read_dir { + let entry = entry?; + if entry.file_type()?.is_dir() { + let name = entry.file_name(); + let name_str = name.to_string_lossy(); + // Skip non-benchmark directories + if name_str == "reports" || name_str.starts_with('.') { + continue; + } + walk_for_benchmarks(&entry.path(), entries)?; + } + } + + Ok(()) +} + +/// Reads a single benchmark entry from a `new/` directory. +fn read_benchmark_entry(new_dir: &Path) -> Result> { + let meta_path = new_dir.join("benchmark.json"); + let estimates_path = new_dir.join("estimates.json"); + + if !estimates_path.exists() { + return Ok(None); + } + + let meta: BenchmarkMeta = serde_json::from_str( + &std::fs::read_to_string(&meta_path) + .with_context(|| format!("Failed to read {}", meta_path.display()))?, + ) + .with_context(|| format!("Failed to parse {}", meta_path.display()))?; + + let estimates: Estimates = serde_json::from_str( + &std::fs::read_to_string(&estimates_path) + .with_context(|| format!("Failed to read {}", estimates_path.display()))?, + ) + .with_context(|| format!("Failed to parse {}", estimates_path.display()))?; + + // Prefer slope (linear regression) over mean, matching criterion's "typical" behavior + let estimate_ns = estimates + .slope + .as_ref() + .unwrap_or(&estimates.mean) + .point_estimate; + + // Read change/estimates.json (sibling to new/) if it exists + let change_path = new_dir + .parent() + .map(|p| p.join("change").join("estimates.json")); + let change = change_path + .filter(|p| p.exists()) + .and_then(|p| { + let data = std::fs::read_to_string(&p).ok()?; + let ce: ChangeEstimates = serde_json::from_str(&data).ok()?; + Some(ChangeInfo { + point_estimate: ce.mean.point_estimate, + }) + }); + + Ok(Some(BenchEntry { + full_id: meta.full_id, + group_id: meta.group_id, + function_id: meta.function_id, + value_str: meta.value_str, + throughput: meta.throughput, + estimate_ns, + change, + })) +} + +/// Formats all benchmark entries into a markdown string. +fn format_table(entries: &[BenchEntry]) -> String { + // Group entries by group_id, preserving discovery order + let mut groups: BTreeMap<&str, Vec<&BenchEntry>> = BTreeMap::new(); + for entry in entries { + groups.entry(&entry.group_id).or_default().push(entry); + } + + let mut out = String::new(); + writeln!(out, "# Benchmarks\n").unwrap(); + writeln!(out, "## Benchmark Results\n").unwrap(); + + for (group_id, group_entries) in &groups { + writeln!(out, "### {group_id}\n").unwrap(); + write_group_table(&mut out, group_entries); + writeln!(out).unwrap(); + } + + out +} + +/// Writes a markdown table for a single benchmark group. +fn write_group_table(out: &mut String, entries: &[&BenchEntry]) { + // Collect unique functions (columns) and values (rows), preserving order + let mut functions: Vec<&str> = Vec::new(); + let mut values: Vec> = Vec::new(); + + for entry in entries { + let col = entry.column(); + if !functions.contains(&col) { + functions.push(col); + } + let row = entry.row(); + if !values.contains(&row) { + values.push(row); + } + } + + // Build a lookup: (column, row) -> &BenchEntry + let mut lookup: BTreeMap<(&str, Option<&str>), &BenchEntry> = BTreeMap::new(); + for entry in entries { + lookup.insert((entry.column(), entry.row()), entry); + } + + // Header row + write!(out, "|").unwrap(); + // Row label column (empty header) + write!(out, " ").unwrap(); + for func in &functions { + write!(out, " | `{func}`").unwrap(); + } + writeln!(out, " |").unwrap(); + + // Alignment row + write!(out, "|:-----------|").unwrap(); + for _ in &functions { + write!(out, ":------------------------ |").unwrap(); + } + writeln!(out).unwrap(); + + // Data rows + for val in &values { + let row_label = match val { + Some(v) => format!("**`{v}`**"), + None => String::new(), + }; + write!(out, "| {row_label:10} ").unwrap(); + + for func in &functions { + if let Some(&entry) = lookup.get(&(*func, *val)) { + let time_str = format_time(entry.estimate_ns); + let change_str = format_change(&entry.change); + write!(out, " | `{time_str}` ({change_str}) ").unwrap(); + } else { + write!(out, " | ").unwrap(); + } + } + writeln!(out, " |").unwrap(); + } +} + +/// Formats change vs baseline with tiered emojis (matching criterion-table style). +/// +/// Uses `compare = 1 / ratio` (where ratio = new/old) to determine tier: +/// - `compare >= 1.8` (44%+ faster): 🚀 +/// - `compare > 0.9` (within ~10% slower): ✅ +/// - `compare <= 0.9` (10%+ slower): ❌ +fn format_change(change: &Option) -> String { + let Some(change) = change else { + return "---".to_string(); + }; + + // ratio = new_time / old_time + let ratio = 1.0 + change.point_estimate; + // compare = old_time / new_time (criterion-table's convention) + let compare = 1.0 / ratio; + + let speedup_str = if ratio < 1.0 { + format!("{:.2}x faster", 1.0 / ratio) + } else if ratio > 1.0 { + format!("{:.2}x slower", ratio) + } else { + format!("{ratio:.2}x") + }; + + if compare >= 1.8 { + format!("🚀 **{speedup_str}**") + } else if compare > 0.9 { + format!("✅ **{speedup_str}**") + } else { + format!("❌ *{speedup_str}*") + } +} + +/// Formats a time in nanoseconds to a human-readable string with appropriate units. +fn format_time(ns: f64) -> String { + if ns < 1_000.0 { + format!("{:.2} ns", ns) + } else if ns < 1_000_000.0 { + format!("{:.2} µs", ns / 1_000.0) + } else if ns < 1_000_000_000.0 { + format!("{:.2} ms", ns / 1_000_000.0) + } else { + format!("{:.2} s", ns / 1_000_000_000.0) + } +} diff --git a/src/hyperlight_ci/src/main.rs b/src/hyperlight_ci/src/main.rs new file mode 100644 index 000000000..70850dffa --- /dev/null +++ b/src/hyperlight_ci/src/main.rs @@ -0,0 +1,27 @@ +mod bench; +mod bench_report; + +use clap::{Parser, Subcommand}; + +#[derive(Parser)] +#[command(name = "hyperlight-ci", about = "Hyperlight's CI and development tools")] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand)] +enum Commands { + /// Run benchmarks using the benchmark binary directly + Bench(bench::BenchArgs), + /// Generate a markdown table from existing criterion benchmark results + BenchReport(bench_report::BenchReportArgs), +} + +fn main() -> anyhow::Result<()> { + let cli = Cli::parse(); + match cli.command { + Commands::Bench(args) => bench::run(args), + Commands::BenchReport(args) => bench_report::run(args), + } +}