diff --git a/.github/workflows/RegenSnapshotGoldens.yml b/.github/workflows/RegenSnapshotGoldens.yml new file mode 100644 index 000000000..82ea44ce8 --- /dev/null +++ b/.github/workflows/RegenSnapshotGoldens.yml @@ -0,0 +1,248 @@ +# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json + +# Publish snapshot goldens to +# ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens. +# +# Runs automatically when a merge to main changes GOLDENS_VERSION (the +# version string lives in +# src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs). The check-published +# job reads that version and checks GHCR for its `{version}-complete` +# marker. If the marker is absent, the matrix walks every (hv, cpu, +# config) combination, dumps the canonical snapshot, and uploads it as a +# workflow artifact. A single publish job then downloads every artifact, +# pushes each as a tag named `{version}-{hv}-{cpu}-{profile}`, and +# pushes the marker last. Publishing the whole set from one job means a +# partial run leaves no marker and is republished on the next run. +# +# A version whose marker exists is left untouched, so a merge that does +# not bump the version, or a re-run of the same version, is a no-op. +# Manual dispatch with `force: true` overwrites an existing version and +# exists for recovery only. +# +# See docs/snapshot-versioning.md + +name: Regenerate Snapshot Goldens + +on: + push: + branches: [main] + paths: + - src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs + workflow_dispatch: + inputs: + version: + description: Goldens version string. Must match GOLDENS_VERSION in source (e.g. "v1.0"). + required: true + type: string + force: + description: Overwrite tags even if the version is already published (recovery only). + type: boolean + default: false + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: full + GHCR_IMAGE: ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens + +permissions: + contents: read + packages: write + +concurrency: + group: regen-snapshot-goldens-${{ github.ref }} + cancel-in-progress: false + +defaults: + run: + shell: bash + +jobs: + check-published: + runs-on: ubuntu-latest + permissions: + contents: read + packages: read + outputs: + version: ${{ steps.decide.outputs.version }} + needs_publish: ${{ steps.decide.outputs.needs_publish }} + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + + - name: Install oras + uses: oras-project/setup-oras@38de303aac69abb66f3e6255b7198bff35f323e3 # v2.0.0 + with: + version: 1.3.2 + + - name: Decide version and whether to publish + id: decide + env: + EVENT_NAME: ${{ github.event_name }} + INPUT_VERSION: ${{ inputs.version }} + FORCE: ${{ inputs.force }} + GHCR_USER: ${{ github.actor }} + GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + SRC=$(grep -oE 'GOLDENS_VERSION: &str = "[^"]+"' src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs | head -n1 | sed -E 's/.*"([^"]+)".*/\1/') + if ! [[ "${SRC}" =~ ^v[0-9]+\.[0-9]+$ ]]; then + echo "::error::GOLDENS_VERSION in source must match ^v[0-9]+\.[0-9]+$ (e.g. v1.0), found '${SRC}'" + exit 1 + fi + + # On manual dispatch the input must name the version that the + # dispatched ref actually carries. This catches a stale input. + if [ "${EVENT_NAME}" = "workflow_dispatch" ] && [ "${INPUT_VERSION}" != "${SRC}" ]; then + echo "::error::version input '${INPUT_VERSION}' does not match GOLDENS_VERSION in source '${SRC}'" + exit 1 + fi + + echo "version=${SRC}" >> "$GITHUB_OUTPUT" + + if [ "${EVENT_NAME}" = "workflow_dispatch" ] && [ "${FORCE}" = "true" ]; then + echo "force requested: will publish ${SRC} even if it already exists" + echo "needs_publish=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # A version is frozen once its completion marker exists on + # GHCR. The marker is pushed only after every matrix job has + # uploaded its tag, so a partial push (some jobs failed) + # leaves no marker and the next run republishes the missing + # combinations. Publishing only when the marker is absent makes the + # workflow idempotent and never clobbers a complete baseline. + echo "${GHCR_TOKEN}" | oras login ghcr.io -u "${GHCR_USER}" --password-stdin + if oras repo tags "${GHCR_IMAGE}" 2>/dev/null | grep -qxF "${SRC}-complete"; then + echo "${SRC} already published (marker ${SRC}-complete present). Nothing to do." + echo "needs_publish=false" >> "$GITHUB_OUTPUT" + else + echo "${SRC} not fully published yet. Will publish." + echo "needs_publish=true" >> "$GITHUB_OUTPUT" + fi + + build-guests: + needs: check-published + if: needs.check-published.outputs.needs_publish == 'true' + strategy: + matrix: + config: [debug, release] + uses: ./.github/workflows/dep_build_guests.yml + with: + config: ${{ matrix.config }} + secrets: inherit + + generate-snapshots: + needs: [check-published, build-guests] + if: needs.check-published.outputs.needs_publish == 'true' + strategy: + fail-fast: false + matrix: + hypervisor: [kvm, mshv3, hyperv-ws2025] + cpu: [amd, intel] + config: [debug, release] + runs-on: ${{ fromJson( + format('["self-hosted", "{0}", "X64", "1ES.Pool=hld-{1}-{2}", "JobId=regen-goldens-{3}-{4}-{5}-{6}"]', + matrix.hypervisor == 'hyperv-ws2025' && 'Windows' || 'Linux', + matrix.hypervisor == 'hyperv-ws2025' && 'win2025' || matrix.hypervisor == 'mshv3' && 'azlinux3-mshv' || matrix.hypervisor, + matrix.cpu, + matrix.config, + github.run_id, + github.run_number, + github.run_attempt)) }} + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + + - uses: hyperlight-dev/ci-setup-workflow@f6bd9cc86d0737976d2128c8b8ced8edc017cbb4 # v1.9.0 + with: + rust-toolchain: "1.94" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Fix cargo home permissions + if: runner.os == 'Linux' + run: sudo chown -R $(id -u):$(id -g) /opt/cargo || true + + - name: Download Rust guests + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: rust-guests-${{ matrix.config }} + path: src/tests/rust_guests/bin/${{ matrix.config }}/ + + - name: Confirm source matches resolved version + env: + RESOLVED_VERSION: ${{ needs.check-published.outputs.version }} + run: | + set -euo pipefail + SRC=$(grep -oE 'GOLDENS_VERSION: &str = "[^"]+"' src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs | head -n1 | sed -E 's/.*"([^"]+)".*/\1/') + if [ "${SRC}" != "${RESOLVED_VERSION}" ]; then + echo "::error::source GOLDENS_VERSION '${SRC}' does not match resolved '${RESOLVED_VERSION}'" + exit 1 + fi + + - name: Generate snapshots + run: just snapshot-goldens-generate ${{ matrix.config }} "$RUNNER_TEMP/snapshot-goldens" + + - name: Resolve produced tag + id: tag + env: + GOLDENS_VERSION: ${{ needs.check-published.outputs.version }} + run: | + set -euo pipefail + shopt -s nullglob + layouts=("$RUNNER_TEMP/snapshot-goldens/${GOLDENS_VERSION}-"*/) + if [ "${#layouts[@]}" -ne 1 ]; then + echo "::error::expected exactly one golden layout under $RUNNER_TEMP/snapshot-goldens, found ${#layouts[@]}: ${layouts[*]:-none}" + exit 1 + fi + layout="${layouts[0]%/}" + echo "tag=$(basename "${layout}")" >> "$GITHUB_OUTPUT" + echo "dir=${layout}" >> "$GITHUB_OUTPUT" + + - name: Upload golden layout + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: golden-${{ steps.tag.outputs.tag }} + path: ${{ steps.tag.outputs.dir }}/ + if-no-files-found: error + retention-days: 1 + + # Push every matrix job's snapshot from this single job, so the published set is + # whole or absent. `generate-snapshots` runs `fail-fast: false` and uploads each + # snapshot as an artifact, so this job's `needs` succeeds only when + # all matrix jobs did. It downloads every artifact, pushes each tag, then + # pushes the `{version}-complete` marker that `check-published` gates on. A + # push that dies partway leaves no marker, so the next run republishes. + publish: + needs: [check-published, generate-snapshots] + if: needs.check-published.outputs.needs_publish == 'true' + runs-on: ubuntu-latest + steps: + - name: Install oras + uses: oras-project/setup-oras@38de303aac69abb66f3e6255b7198bff35f323e3 # v2.0.0 + with: + version: 1.3.2 + + - name: Download all golden layouts + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + pattern: golden-* + path: layouts + + - name: Push goldens and completion marker + env: + GHCR_USER: ${{ github.actor }} + GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GOLDENS_VERSION: ${{ needs.check-published.outputs.version }} + run: | + set -euo pipefail + echo "${GHCR_TOKEN}" | oras login ghcr.io -u "${GHCR_USER}" --password-stdin + for layout in layouts/golden-*/; do + tag=$(basename "${layout%/}") + tag=${tag#golden-} + echo "::group::push ${tag}" + oras cp --from-oci-layout "${layout%/}:${tag}" "${GHCR_IMAGE}:${tag}" + echo "::endgroup::" + done + printf '%s' "${GOLDENS_VERSION}" > complete.txt + oras push "${GHCR_IMAGE}:${GOLDENS_VERSION}-complete" \ + --artifact-type application/vnd.hyperlight.goldens.complete.v1 \ + complete.txt:text/plain diff --git a/.github/workflows/ValidatePullRequest.yml b/.github/workflows/ValidatePullRequest.yml index 659ec9acc..2a0e662c3 100644 --- a/.github/workflows/ValidatePullRequest.yml +++ b/.github/workflows/ValidatePullRequest.yml @@ -79,11 +79,31 @@ jobs: with: docs_only: ${{ needs.docs-pr.outputs.docs-only }} + # Pick the goldens mode. The `regen-goldens` label means regenerate. No label means pull. + goldens-mode: + runs-on: ubuntu-latest + outputs: + regen: ${{ steps.check.outputs.regen || 'false' }} + steps: + - id: check + if: github.event_name == 'pull_request' + env: + GH_TOKEN: ${{ github.token }} + run: | + labels="$(gh pr view ${{ github.event.pull_request.number }} \ + --repo ${{ github.repository }} --json labels -q '.labels[].name')" + if grep -qx regen-goldens <<<"$labels"; then + echo "regen=true" >> "$GITHUB_OUTPUT" + else + echo "regen=false" >> "$GITHUB_OUTPUT" + fi + # Build and test - needs guest artifacts build-test: needs: - docs-pr - build-guests + - goldens-mode # Required because update-guest-locks is skipped on non-dependabot PRs, # and a skipped dependency transitively skips all downstream jobs. # See: https://github.com/actions/runner/issues/2205 @@ -101,6 +121,31 @@ jobs: hypervisor: ${{ matrix.hypervisor }} cpu: ${{ matrix.cpu }} config: ${{ matrix.config }} + regen_goldens: ${{ needs.goldens-mode.outputs.regen }} + + # Cross-CPU snapshot check for regenerated baselines. Only the regen + # path needs it: the pull path already runs self+cross per cell. + # Each build-test cell uploads its generated layout, then this job + # loads the peer CPU's layout and verifies the opposite vendor. + snapshot-cross-verify: + needs: + - docs-pr + - build-test + - goldens-mode + if: ${{ !cancelled() && !failure() && needs.goldens-mode.outputs.regen == 'true' }} + strategy: + fail-fast: false + matrix: + hypervisor: ['hyperv-ws2025', mshv3, kvm] + cpu: [amd, intel] + config: [debug, release] + uses: ./.github/workflows/dep_snapshot_cross_verify.yml + secrets: inherit + with: + docs_only: ${{ needs.docs-pr.outputs.docs-only }} + hypervisor: ${{ matrix.hypervisor }} + cpu: ${{ matrix.cpu }} + config: ${{ matrix.config }} # Run examples - needs guest artifacts, runs in parallel with build-test run-examples: @@ -164,7 +209,9 @@ jobs: - update-guest-locks - build-guests - code-checks + - goldens-mode - build-test + - snapshot-cross-verify - run-examples - fuzzing - spelling diff --git a/.github/workflows/dep_build_test.yml b/.github/workflows/dep_build_test.yml index 91ce867aa..780fc68af 100644 --- a/.github/workflows/dep_build_test.yml +++ b/.github/workflows/dep_build_test.yml @@ -22,6 +22,11 @@ on: description: CPU architecture for the build (passed from caller matrix) required: true type: string + regen_goldens: + description: Regenerate snapshot goldens from the branch and skip pulling published ones + required: false + type: string + default: "false" env: CARGO_TERM_COLOR: always @@ -29,6 +34,7 @@ env: permissions: contents: read + packages: read defaults: run: @@ -138,3 +144,64 @@ jobs: env: RUST_LOG: debug run: just test-rust-tracing ${{ inputs.config }} + + - name: Install oras + uses: oras-project/setup-oras@38de303aac69abb66f3e6255b7198bff35f323e3 # v2.0.0 + with: + version: 1.3.2 + + # Pull the published goldens for this cell and load them with the + # branch. A missing tag fails the job and flags a format break. + - name: Snapshot goldens (pull and verify) + if: ${{ inputs.regen_goldens != 'true' }} + env: + GHCR_USER: ${{ github.actor }} + GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + echo "${GHCR_TOKEN}" | oras login ghcr.io -u "${GHCR_USER}" --password-stdin + just snapshot-goldens-pull ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens ${{ inputs.config }} + just snapshot-goldens-verify ${{ inputs.config }} self + just snapshot-goldens-verify ${{ inputs.config }} cross + + # Label path: generate the goldens from the branch and load them + # back on this CPU (self). Used when no published tag set exists + # yet. The peer-CPU direction (cross) runs in snapshot-cross-verify + # after every cell has uploaded its layout. + - name: Snapshot goldens (regenerate and verify self) + if: ${{ inputs.regen_goldens == 'true' }} + env: + GHCR_USER: ${{ github.actor }} + GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + out="${RUNNER_TEMP}/snapshot-goldens" + rm -rf "${out}" + just snapshot-goldens-generate ${{ inputs.config }} "${out}" + # generate writes exactly one {tag}/ layout into the clean dir. + layout=$(echo "${out}"/*/) + tag="$(basename "${layout%/}")" + # Place it where snapshot-goldens-verify reads from. + rm -rf "target/snapshot-goldens/${tag}" + mkdir -p target/snapshot-goldens + cp -r "${layout%/}" "target/snapshot-goldens/${tag}" + # Old majors in COMPAT_VERSIONS already live in the registry. + # Stage them so the verify loop can load them next to the + # regenerated current golden. + version_file=src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs + compat=$(grep 'COMPAT_VERSIONS' "${version_file}" | grep -oE 'v[0-9]+\.[0-9]+' || true) + if [[ -n "${compat}" ]]; then + echo "${GHCR_TOKEN}" | oras login ghcr.io -u "${GHCR_USER}" --password-stdin + just snapshot-goldens-pull ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens ${{ inputs.config }} compat + fi + just snapshot-goldens-verify ${{ inputs.config }} self + + # Upload the freshly generated layout so snapshot-cross-verify can + # load it on the peer CPU runner. + - name: Upload regenerated golden layout + if: ${{ inputs.regen_goldens == 'true' }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: regen-golden-${{ inputs.hypervisor }}-${{ inputs.cpu }}-${{ inputs.config }} + path: ${{ runner.temp }}/snapshot-goldens/ + if-no-files-found: error + retention-days: 1 diff --git a/.github/workflows/dep_snapshot_cross_verify.yml b/.github/workflows/dep_snapshot_cross_verify.yml new file mode 100644 index 000000000..a2da84677 --- /dev/null +++ b/.github/workflows/dep_snapshot_cross_verify.yml @@ -0,0 +1,111 @@ +# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json + +name: Snapshot Cross-CPU Verify + +# Loads the peer CPU's freshly regenerated golden on this CPU runner +# (AMD loads the Intel layout, Intel loads the AMD layout). The pull +# path already runs self+cross per cell; this covers the regen path, +# where each new layout exists only as a build-test artifact. +on: + workflow_call: + inputs: + docs_only: + description: Skip if docs only + required: false + type: string + default: "false" + hypervisor: + description: Hypervisor for this run (passed from caller matrix) + required: true + type: string + config: + description: Build configuration for this run (passed from caller matrix) + required: true + type: string + cpu: + description: CPU vendor for this run (passed from caller matrix) + required: true + type: string + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: full + +permissions: + contents: read + packages: read + +defaults: + run: + shell: bash + +jobs: + cross-verify: + if: ${{ inputs.docs_only == 'false' }} + timeout-minutes: 30 + runs-on: ${{ fromJson( + format('["self-hosted", "{0}", "X64", "1ES.Pool=hld-{1}-{2}", "JobId=snapshot-cross-verify-{3}-{4}-{5}-{6}"]', + inputs.hypervisor == 'hyperv-ws2025' && 'Windows' || 'Linux', + inputs.hypervisor == 'hyperv-ws2025' && 'win2025' || inputs.hypervisor == 'mshv3' && 'azlinux3-mshv' || inputs.hypervisor, + inputs.cpu, + inputs.config, + github.run_id, + github.run_number, + github.run_attempt)) }} + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + + - uses: hyperlight-dev/ci-setup-workflow@f6bd9cc86d0737976d2128c8b8ced8edc017cbb4 # v1.9.0 + with: + rust-toolchain: "1.94" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Fix cargo home permissions + if: runner.os == 'Linux' + run: sudo chown -R $(id -u):$(id -g) /opt/cargo || true + + - name: Rust cache + uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1 + with: + shared-key: "${{ runner.os }}-${{ inputs.hypervisor }}-${{ inputs.config }}" + cache-on-failure: "true" + save-if: ${{ github.ref == 'refs/heads/main' }} + + - name: Download Rust guests + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: rust-guests-${{ inputs.config }} + path: src/tests/rust_guests/bin/${{ inputs.config }}/ + + # The peer CPU's layout extracts as target/snapshot-goldens/{peer-tag}/, + # which is where `snapshot-goldens-verify ... cross` looks for it. + - name: Download peer golden + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: regen-golden-${{ inputs.hypervisor }}-${{ inputs.cpu == 'amd' && 'intel' || 'amd' }}-${{ inputs.config }} + path: target/snapshot-goldens/ + + - name: Install oras + uses: oras-project/setup-oras@38de303aac69abb66f3e6255b7198bff35f323e3 # v2.0.0 + with: + version: 1.3.2 + + # Old majors in COMPAT_VERSIONS already live in the registry. Stage + # them so the cross verify loop can load them next to the peer's + # regenerated current golden. + - name: Stage compatibility goldens + env: + GHCR_USER: ${{ github.actor }} + GHCR_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + version_file=src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs + compat=$(grep 'COMPAT_VERSIONS' "${version_file}" | grep -oE 'v[0-9]+\.[0-9]+' || true) + if [[ -n "${compat}" ]]; then + echo "${GHCR_TOKEN}" | oras login ghcr.io -u "${GHCR_USER}" --password-stdin + just snapshot-goldens-pull ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens ${{ inputs.config }} compat + fi + + - name: Verify cross-CPU golden + run: just snapshot-goldens-verify ${{ inputs.config }} cross diff --git a/Cargo.lock b/Cargo.lock index 667300d7d..a7412d599 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -513,6 +513,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" dependencies = [ "clap_builder", + "clap_derive", ] [[package]] @@ -527,6 +528,18 @@ dependencies = [ "strsim", ] +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "clap_lex" version = "1.0.0" @@ -982,6 +995,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "escape8259" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5692dd7b5a1978a5aeb0ce83b7655c58ca8efdcb79d21036ea249da95afec2c6" + [[package]] name = "euclid" version = "0.22.13" @@ -1753,6 +1772,7 @@ dependencies = [ "kvm-ioctls", "lazy_static", "libc", + "libtest-mimic", "log", "metrics", "metrics-exporter-prometheus", @@ -2208,6 +2228,18 @@ dependencies = [ "libc", ] +[[package]] +name = "libtest-mimic" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14e6ba06f0ade6e504aff834d7c34298e5155c6baca353cc6a4aaff2f9fd7f33" +dependencies = [ + "anstream 1.0.0", + "anstyle", + "clap", + "escape8259", +] + [[package]] name = "libz-sys" version = "1.1.23" diff --git a/Justfile b/Justfile index 2bdb842b9..d787279d4 100644 --- a/Justfile +++ b/Justfile @@ -249,7 +249,10 @@ test-integration target=default-target features="": (witguest-wit) @# run component-util integration tests that depend on generated WIT inputs {{ cargo-cmd }} test -p hyperlight-component-util --profile={{ if target == "debug" { "dev" } else { target } }} {{ target-triple-flag }} --test wasmtime_guest_codegen - @# run the rest of the integration tests + @# run the rest of the integration tests. `snapshot_goldens` is in + @# the glob but no-ops without a mode token, so it needs no golden + @# cache here. It verifies in its own step (see the snapshot-goldens + @# recipes). {{ cargo-cmd }} test -p hyperlight-host {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} {{ target-triple-flag }} --test '*' # tests compilation with no default features on different platforms @@ -573,3 +576,67 @@ install-vcpkg: install-flatbuffers-with-vcpkg: install-vcpkg cd ../vcpkg && ./vcpkg install flatbuffers || cd - + +################################### +### SNAPSHOT GOLDEN HELPERS ### +################################### +# Test binary that checks or rebuilds snapshot goldens. It reads +# snapshots from target/snapshot-goldens/{tag}/. +# `snapshot-goldens-pull` fills that directory. It uses `oras` to copy +# from the registry (install from https://oras.land). + +# Default OCI registry image (without tag) that hosts the goldens. +default-snapshot-goldens-image := "ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens" + +# Check the local snapshots against the goldens for the current +# GOLDENS_VERSION. Run `snapshot-goldens-pull` first to fill the +# local directory. A missing entry fails the test. One run checks one +# CPU vendor: `cpu=self` (default) the host's own, `cpu=cross` the +# peer (Intel<->AMD). CI runs it twice to cover both. +snapshot-goldens-verify target=default-target cpu="self": + cargo test {{ if target == "release" { "--release" } else { "" } }} \ + -p hyperlight-host --test snapshot_goldens -- {{ cpu }} + +# Pull the goldens for this host from `image` into the +# directory that `snapshot-goldens-verify` reads. It picks the +# hypervisor from the host and fetches both CPU vendors, since +# snapshots are portable across Intel and AMD. Pass `target=release` +# to fetch the release tags. `which=all` (default) fetches the current +# version and every `COMPAT_VERSIONS` major. `which=compat` fetches +# only the old majors, used in regenerate runs where the current tag +# does not exist in the registry yet. +snapshot-goldens-pull image=default-snapshot-goldens-image target=default-target which="all": + #!/usr/bin/env bash + set -euo pipefail + if [[ -e /dev/mshv ]]; then hv=mshv + elif [[ -e /dev/kvm ]]; then hv=kvm + elif [[ "${OS:-}" == "Windows_NT" ]]; then hv=whp + else echo "snapshot-goldens-pull: no hypervisor found" >&2; exit 1 + fi + version_file=src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs + current=$(awk -F'"' '/GOLDENS_VERSION: &str =/{print $2; exit}' "${version_file}") + # Old majors kept for backwards-compat verification. Mirror of + # `COMPAT_VERSIONS` in goldens_version.rs, parsed from its line. + compat=$(grep 'COMPAT_VERSIONS' "${version_file}" | grep -oE 'v[0-9]+\.[0-9]+' || true) + case "{{ which }}" in + all) versions="${current} ${compat}" ;; + compat) versions="${compat}" ;; + *) echo "snapshot-goldens-pull: unknown which '{{ which }}' (expected all or compat)" >&2; exit 1 ;; + esac + # Mirror of `Platform::tag_for` in platform.rs. Keep both in sync. + for version in ${versions}; do + for cpu in intel amd; do + tag="${version}-${hv}-${cpu}-{{ target }}" + dir="target/snapshot-goldens/${tag}" + mkdir -p "${dir}" + oras cp --to-oci-layout "{{ image }}:${tag}" "${dir}:${tag}" + done + done + +# Build the local snapshots into the directory that +# `snapshot-goldens-verify` reads. Run `snapshot-goldens-generate` +# then `snapshot-goldens-verify` to test the round trip on one host. +# Pass `out` to write the snapshots to another directory. +snapshot-goldens-generate target=default-target out="": + cargo test {{ if target == "release" { "--release" } else { "" } }} \ + -p hyperlight-host --test snapshot_goldens -- generate {{ out }} diff --git a/docs/github-labels.md b/docs/github-labels.md index 5133f048a..e1f28c2ed 100644 --- a/docs/github-labels.md +++ b/docs/github-labels.md @@ -55,6 +55,12 @@ In addition to **kind/*** labels, we use optional **area/*** labels to specify t - **area/security** - Involves security-related changes or fixes. - **area/testing** - Related to tests or testing infrastructure. +## Workflow labels + +Some labels change CI behaviour on a PR rather than categorizing it: + +- **regen-goldens** - Switches the snapshot golden verify job into regenerate mode. A PR that intentionally changes the snapshot format and bumps `GOLDENS_VERSION` carries this label so the verify job generates the goldens from the branch and runs them back through the branch loader, rather than pulling a published tag set that does not exist yet. See [snapshot-versioning.md](snapshot-versioning.md). + ## Notes This document is a work in progress and may be updated as needed. The labels and categories are subject to change based on the evolving needs of the project and community feedback. diff --git a/docs/snapshot-versioning.md b/docs/snapshot-versioning.md new file mode 100644 index 000000000..2f0df70dd --- /dev/null +++ b/docs/snapshot-versioning.md @@ -0,0 +1,353 @@ +# Snapshot versioning + +Hyperlight snapshots are written to disk as OCI image layouts and may be +loaded by a different build than the one that produced them. This +document describes how to evolve the snapshot format while keeping +existing snapshots loadable, or while rejecting them with a clear error. + +## What is versioned + +A snapshot carries three independently evolvable version markers: + +* **Memory blob ABI**, `SNAPSHOT_ABI_VERSION` (a `u32` inside the + config blob, defined in + [src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs](../src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs)). + This is the host/guest runtime contract baked into the captured + memory: the `HyperlightPEB` layout (the struct host and guest share + to exchange state, field offsets and types), the `OutBAction` and + `VmAction` port numbers (the I/O ports the guest writes to for `Log`, + `CallFunction`, `Abort`, `DebugPrint`, and `Halt`), the layout of the + sandbox memory regions + (stack, heap, guest binary, input and output buffers, page tables), + and the calling convention used for guest function entry. The loader + trusts the captured bytes to match this contract, so any change here + invalidates older snapshots unless an explicit compat path translates + them. +* **Snapshot blob encoding**, `MT_SNAPSHOT_V1` + (`application/vnd.hyperlight.snapshot.memory.v1`), aliased as + `MT_SNAPSHOT_CURRENT`. This is the on-wire format of the snapshot + blob: framing, section ordering, alignment, dirty/zero-page elision, + anything about how the bytes are packed inside the OCI layer. +* **Config schema**, `MT_CONFIG_V1` + (`application/vnd.hyperlight.snapshot.config.v1+json`), aliased as + `MT_CONFIG_CURRENT`. This is the JSON shape of the config blob: + field names, types, required vs optional, the descriptors the loader + needs in order to reconstruct the sandbox (memory sizes, buffer + sizes, `abi_version`, `hyperlight_version`, etc.). Renaming a field, + changing its type, or adding a required field is a schema change and + bumps this constant. + +The `OCI_LAYOUT_VERSION` constant is pinned by the OCI image-layout +spec at `1.0.0`. + +Each media-type axis is a `_VN` constant with a `_CURRENT` alias. The +writer emits `_CURRENT`. The loader matches each `_VN` explicitly. To +add a version, declare `MT_FOO_V2`, point `MT_FOO_CURRENT` at it, and +add a loader arm that translates the old version or rejects it. + +The config blob also records `hyperlight_version`, the `CARGO_PKG_VERSION` +of the host crate at write time. This is informational only. The loader +records it for diagnostics and does not gate loading on it. + +## Enforcement + +The format is large and easy to change by accident. Two mechanisms +catch a change to it so reviewers do not have to spot every break by +eye, and so a developer who breaks the format unintentionally finds +out at build time rather than in production. + +Compile-time tripwires in +[src/hyperlight_host/src/sandbox/snapshot/tripwires.rs](../src/hyperlight_host/src/sandbox/snapshot/tripwires.rs) +hold a copy of every value that defines the format: +`SNAPSHOT_ABI_VERSION`, the snapshot and config media-type strings, the +OCI layout version, every `HyperlightPEB` field offset and the struct's +total size, every `OutBAction` and `VmAction` discriminant, and +`BASE_ADDRESS`. If the source value +drifts from the copy in `tripwires.rs`, the crate fails to compile. + +The snapshot golden verify test +(`cargo test -p hyperlight-host --test snapshot_goldens`) loads +snapshots from a local directory (populated by `just snapshot-goldens-pull`, +which fetches the tag set for the current `GOLDENS_VERSION` from GHCR) +and runs them through the current loader. If the new loader cannot +decode the old bytes, the test fails. + +On a pull request the verify test runs on every supported hypervisor +runner. The default path pulls the published tag set for the current +`GOLDENS_VERSION` and verifies it against the branch's loader. A pull +request that intentionally changes the format takes the labelled path +described in [Breaking the format on a pull request](#breaking-the-format-on-a-pull-request). + +## Changing the format + +When you change anything on the list above, you have three options. + +### Option 1: avoid the break + +Restructure the change so the on-disk contract stays put. Prefer this +whenever possible. + +### Option 2: backwards-compatible break + +You break the ABI for new snapshots, and you teach the loader to +accept the older version as well by translating it into the current +contract on the fly. For example, if you renumber the `OutBAction` +ports, the host's port dispatch keeps a match arm for the old port +number alongside the new one, so a resumed v1 guest that still writes +to the old port is handled correctly. + +Steps: + +1. Make the source change. +2. Update `Snapshot::to_oci` to write the new format. +3. Bump `SNAPSHOT_ABI_VERSION`. The writer stamps this value into + every config blob it produces. +4. Update `Snapshot::from_oci` to load both the old and the new + format, dispatching on `abi_version`. +5. Update the tripwire assertions in `tripwires.rs` and any affected + tests to match the new values. +6. Bump `GOLDENS_VERSION` to the next major. Apply the `regen-goldens` + label to the pull request so the verify job regenerates against the + branch. See + [Breaking the format on a pull request](#breaking-the-format-on-a-pull-request) + and [Goldens version numbering](#goldens-version-numbering). +7. Add the outgoing version to `COMPAT_VERSIONS` in + `tests/snapshot_goldens/goldens_version.rs`, so the verify run pulls + and checks the old goldens through the compatibility path. See + [Verifying multiple golden versions](#verifying-multiple-golden-versions). + +Old snapshots on disk continue to load. New snapshots use the new +contract. The compatibility path becomes part of the supported surface +and must stay correct until you formally drop the old major. + +### Option 3: hard break + +You change the contract and the loader rejects old snapshots outright. +Using the same `OutBAction` example, the host's port dispatch only +matches on the new port number, and a resumed v1 guest writing to the +old port has nowhere to land. + +Steps: + +1. Make the source change. +2. Update `Snapshot::to_oci` to write the new format. +3. Bump `SNAPSHOT_ABI_VERSION`. +4. Update the tripwire assertions in `tripwires.rs` and any affected + tests to match the new values. +5. Bump `GOLDENS_VERSION` to the next major. Apply the `regen-goldens` + label to the pull request so the verify job regenerates against the + branch. See + [Breaking the format on a pull request](#breaking-the-format-on-a-pull-request) + and [Goldens version numbering](#goldens-version-numbering). +6. Record the break in `CHANGELOG.md`. Anyone holding old snapshots on + disk has to regenerate them against the new build. + +The loader's single-version check enforces the rejection. An old +snapshot loaded against the new build fails the +`abi_version == SNAPSHOT_ABI_VERSION` test with a clear error. + +## Regenerating goldens + +The verify test (`cargo test -p hyperlight-host --test snapshot_goldens`) +loads the tag `{GOLDENS_VERSION}-{hv}-{cpu}-{profile}` from a +local directory that `just snapshot-goldens-pull` populates from GHCR. A +freshly bumped `GOLDENS_VERSION` has no tags on GHCR until the bump +merges to `main` and the publish workflow runs, so pull requests that +bump the version verify through the `regen-goldens` label instead (see +[Breaking the format on a pull request](#breaking-the-format-on-a-pull-request)). + +### Iterating locally + +`just snapshot-goldens-generate` regenerates the directory for the current +`GOLDENS_VERSION` from the local source, so the verify test runs green +against your in-progress changes on your own platform. Use this loop +for iteration that does not need to cross hypervisor boundaries. +Cross-platform coverage comes from the publish workflow's matrix, which +runs automatically when the bump merges to `main` (see +[Publishing a new version](#publishing-a-new-version)). + +### Goldens version numbering + +`GOLDENS_VERSION` follows a `vMAJOR.MINOR` scheme. The tag set on GHCR +for a given version is keyed by the full string, so `v1.0`, `v1.1`, and +`v2.0` are independent namespaces that never collide. + +* Bump **MAJOR** when the snapshot ABI changes (Option 2 or Option 3 + above). MAJOR tracks `SNAPSHOT_ABI_VERSION`: every format break bumps + both, so a new MAJOR means the on-disk contract moved and old + snapshots load through a compatibility path or not at all. The old + tag set stays on GHCR untouched. +* Bump **MINOR** when the set of golden checks changes but the ABI does + not (for example, a new check/test is added). The on-disk contract is + unchanged, so `SNAPSHOT_ABI_VERSION` stays put. The new tag set + contains every check, including the unchanged ones, regenerated + against the current source. + +`GOLDENS_VERSION` and `SNAPSHOT_ABI_VERSION` are two separate counters +with different purposes. `SNAPSHOT_ABI_VERSION` is the integer stamped into +every snapshot blob, and the loader reads it to decide how to parse the +bytes. `GOLDENS_VERSION` names the published golden tag set on GHCR. A +format break bumps both. A check-set change bumps only +`GOLDENS_VERSION`. + +A version is published once, when the bump merges to `main`, and is +frozen from then on. The publish workflow only publishes a version +whose completion marker is absent from GHCR, so a published baseline +cannot be clobbered by a later run. While a developer iterates on a v1 +to v2 bump the new version is unpublished, so they verify locally with +`just snapshot-goldens-generate` and the `regen-goldens` label rather +than pushing to GHCR. + +The freeze is enforced by the publish workflow's marker check, not by a +registry policy. Each `(hv, cpu, profile)` combination generates its snapshot +and uploads it as a workflow artifact. A single publish job downloads +every artifact, pushes each as its tag, then pushes a +`{version}-complete` marker last. Pushing the whole set from one job +means a partial run leaves no marker, so the next run republishes +rather than freezing an incomplete set. Republishing a complete version +takes a manual dispatch with `force: true`, reserved for recovering a +corrupted push. + +### Breaking the format on a pull request + +A pull request that bumps `GOLDENS_VERSION` introduces a tag set that +GHCR does not carry yet, so the default pull-and-verify path has nothing +to load. The `regen-goldens` label switches the verify job into +regenerate mode for that pull request. + +* **Without the label**, the job pulls the published tag set for the + current `GOLDENS_VERSION` and verifies it against the branch. Missing + tags fail the job. This is what turns an accidental format break into + a red build: the published bytes stop loading, and the author must + either restructure the change or own the break with the label. +* **With the `regen-goldens` label**, the job generates the current + golden from the branch source, pulls the `COMPAT_VERSIONS` goldens + from the registry, and runs both through the branch loader. This + proves the new format is internally loadable on each runner and that + every kept old major still loads. + +The label is an explicit, reviewable assertion that the format break is +intended. The verify job never regenerates on its own initiative, so a +flaky pull or a mistyped version stays a hard failure rather than +silently degrading into a self-check. + +### Publishing a new version + +Publishing is automatic. When a bump to `GOLDENS_VERSION` merges to +`main`, the `Regenerate Snapshot Goldens` workflow runs on the push and +publishes the new version's tag set. No manual step is needed, and a +merge that does not change `GOLDENS_VERSION` does not publish (the push +trigger is filtered to the file that holds the version, +`tests/snapshot_goldens/goldens_version.rs`). + +The workflow walks every supported `(hypervisor, cpu, profile)` +combination on the self-hosted runner pool, generates the canonical +snapshot with +`cargo test --test snapshot_goldens -- generate `, and uploads each +OCI layout as a workflow artifact. A single publish job downloads them +all and pushes each with `oras cp` as the tag +`{version}-{hv}-{cpu}-{profile}`, then pushes the +`{version}-complete` marker. + +A lightweight `check-published` job gates the matrix. It reads `GOLDENS_VERSION` +from source and checks GHCR for the `{version}-complete` marker tag. If +the marker is present the version is fully published and the workflow +stops there, so re-running it, or merging an unrelated change, is a +no-op. The marker is pushed last by the publish job, which runs only +after every matrix job uploaded its snapshot, so a version counts as +published only as a whole set. This makes publishing idempotent, keeps +a complete baseline from being clobbered, and lets a run that follows a +partial push fill in the missing combinations. + +The workflow can also be dispatched manually. The `version` input must +equal `GOLDENS_VERSION` in the dispatched ref, which guards against +publishing a tag set the test binary would ignore. A manual dispatch +with `force: true` republishes a version that already exists, reserved +for recovering a corrupted or partial push. + +The push-triggered publish closes the window in which a pull request +that bumped the version needs the `regen-goldens` label. Once `main` +carries the bump and the publish lands, new pull requests pass on the +default pull-and-verify path. + +### Bootstrapping the first version + +The first publish runs through the normal path. The merge that adds +`goldens_version.rs` touches the file the push trigger watches, so the workflow +fires. `check-published` lists GHCR tags for the marker. An empty +registry returns an empty list, so the job publishes. The matrix lands +the first tag set and its marker. + +The first `oras` push creates the GHCR package +`ghcr.io/hyperlight-dev/hyperlight-snapshot-goldens` on demand. The +organization must allow the Actions `GITHUB_TOKEN` to create packages. +A cold start that fails here means that setting is off. Turn it on and +re-run. + +To seed a version by hand, dispatch the workflow with `force: true` and +a `version` input equal to the `GOLDENS_VERSION` in the dispatched ref. + +## Adding a new check under the current ABI + +Adding a new entry to `CHECKS` does not change the snapshot ABI. It +does change the set of tags the verify test expects, so it requires a +minor `GOLDENS_VERSION` bump. + +Steps: + +1. Add the entry to `CHECKS` in + `src/hyperlight_host/tests/snapshot_goldens/`. Set its + `since_abi_major` + to the current `SNAPSHOT_ABI_VERSION`. This records the major the + check belongs to, so a multi-version run skips it for any golden + from an earlier major. +2. Bump `GOLDENS_VERSION` minor (e.g. `v1.2` to `v1.3`). The new prefix + has no published tags, so the default verify path fails until they + exist. +3. Apply the `regen-goldens` label to the pull request. The verify job + regenerates the full check set against the branch and runs it back + through the branch loader. See + [Breaking the format on a pull request](#breaking-the-format-on-a-pull-request). +4. Once the change lands, the new prefix is published per + [Publishing a new version](#publishing-a-new-version). The older + tag set stays on GHCR untouched. + +The older minor's tags can be deleted from GHCR once nothing depends +on them. + +`since_abi_major` is a major. The verify run loads one golden per major: the +current version and each `COMPAT_VERSIONS` entry. A minor bump +regenerates the current golden, so it holds every check. A compat +golden is a major's final minor, so it holds every check that major +shipped. One golden per major makes major granularity enough. + +## Verifying multiple golden versions + +The verify test checks every version in its verify set: the current +`GOLDENS_VERSION` plus each entry in `COMPAT_VERSIONS` +(`tests/snapshot_goldens/goldens_version.rs`). `COMPAT_VERSIONS` is +empty under one ABI, so the set is the current version alone. A hard +break (Option 3) leaves it empty, because the new tag set replaces the +old. + +A backwards-compatible break (Option 2) keeps an old major loadable, so +you verify it too. Add its version string to `COMPAT_VERSIONS`: + +```rust +pub const COMPAT_VERSIONS: &[&str] = &["v1.0"]; +``` + +The verify loop resolves a golden per platform per version, pulls each +through `just snapshot-goldens-pull`, and runs the checks. Each `Check` +records the `SNAPSHOT_ABI_VERSION` it was introduced in through +`since_abi_major`, and the run skips a check whose `since_abi_major` is +newer than the golden's major. A check from a later major stays clear of +an older golden that lacks the state it reads. + +Two pieces still need your code when the check set changed since the old +major: + +* The loader accepts the old `abi_version` (Option 2 step 4), so the old + golden loads. +* Register the host functions the old golden's checks call. + diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 4751133a4..fe0d06b56 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -109,6 +109,7 @@ metrics-util = "0.20.4" metrics-exporter-prometheus = { version = "0.18.3", default-features = false } serde_json = "1.0" hyperlight-component-macro = { workspace = true } +libtest-mimic = "0.8.2" [target.'cfg(windows)'.dev-dependencies] windows = { version = "0.62", features = [ @@ -144,3 +145,10 @@ build-metadata = ["dep:built"] [[bench]] name = "benchmarks" harness = false + +[[test]] +name = "snapshot_goldens" +path = "tests/snapshot_goldens/main.rs" +# Custom harness (see main.rs). A bare `cargo test` builds and runs it, but it +# no-ops without a mode token, so a normal run needs no golden cache. +harness = false diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/config.rs b/src/hyperlight_host/src/sandbox/snapshot/file/config.rs index 4e926a62a..b92110c2c 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/file/config.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/file/config.rs @@ -695,4 +695,310 @@ mod tests { assert_eq!(back, r, "return type {:?} did not round-trip", r); } } + + /// The architecture the current host is not running. + fn other_arch() -> Arch { + match Arch::current() { + Arch::X86_64 => Arch::Aarch64, + Arch::Aarch64 => Arch::X86_64, + } + } + + /// A config whose `arch` and `abi_version` match the current + /// build, so the architecture and ABI gates pass and a test can + /// trip a later gate in isolation. The layout is minimal: the + /// gating checks under test short-circuit before reading it. + fn gating_config() -> OciSnapshotConfig { + OciSnapshotConfig { + hyperlight_version: "test".to_string(), + arch: Arch::current(), + abi_version: SNAPSHOT_ABI_VERSION, + hypervisor: Hypervisor::Mshv, + stack_top_gva: 0x2000, + entrypoint_addr: SandboxMemoryLayout::BASE_ADDRESS as u64, + sregs: Sregs::from(&distinct_sregs()), + layout: MemoryLayout { + input_data_size: 0, + output_data_size: 0, + heap_size: 0, + code_size: 0, + init_data_size: 0, + init_data_permissions: None, + scratch_size: 0, + snapshot_size: PAGE_SIZE, + pt_size: None, + }, + memory_size: PAGE_SIZE as u64, + host_functions: Vec::new(), + snapshot_generation: 0, + } + } + + /// A snapshot built for a different architecture is rejected. + #[test] + fn validate_for_load_rejects_arch_mismatch() { + let mut cfg = gating_config(); + cfg.arch = other_arch(); + let err = cfg.validate_for_load().unwrap_err().to_string(); + assert!(err.contains("architecture mismatch"), "got: {err}"); + } + + /// A snapshot stamped with a different ABI version is rejected. + #[test] + fn validate_for_load_rejects_abi_version_mismatch() { + let mut cfg = gating_config(); + cfg.abi_version = SNAPSHOT_ABI_VERSION.wrapping_add(1); + let err = cfg.validate_for_load().unwrap_err().to_string(); + assert!(err.contains("ABI version mismatch"), "got: {err}"); + } + + /// A snapshot captured under a different hypervisor backend is + /// rejected. Without a live backend the load is rejected outright, + /// which exercises the same gate from the other side. + #[test] + fn validate_for_load_rejects_hypervisor_mismatch() { + let Some(current) = Hypervisor::current() else { + let cfg = gating_config(); + let err = cfg.validate_for_load().unwrap_err().to_string(); + assert!(err.contains("no hypervisor available"), "got: {err}"); + return; + }; + let other = [Hypervisor::Kvm, Hypervisor::Mshv, Hypervisor::Whp] + .into_iter() + .find(|h| *h != current) + .expect("three backends, at least one differs from current"); + let mut cfg = gating_config(); + cfg.hypervisor = other; + let err = cfg.validate_for_load().unwrap_err().to_string(); + assert!(err.contains("hypervisor mismatch"), "got: {err}"); + } +} + +#[cfg(test)] +mod schema_pin { + use super::*; + + const PINNED_CALL: &str = r#"{ + "hyperlight_version": "x.y.z", + "arch": "x86_64", + "abi_version": 1, + "hypervisor": "mshv", + "stack_top_gva": 3735928559, + "entrypoint_addr": 8192, + "sregs": { + "cs": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "ds": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "es": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "fs": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "gs": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "ss": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "tr": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "ldt": { + "base": 1, + "limit": 2, + "selector": 3, + "type_": 4, + "present": 5, + "dpl": 6, + "db": 7, + "s": 8, + "l": 9, + "g": 10, + "avl": 11, + "unusable": 12, + "padding": 13 + }, + "gdt": { + "base": 1, + "limit": 2 + }, + "idt": { + "base": 3, + "limit": 4 + }, + "cr0": 1, + "cr2": 2, + "cr4": 4, + "cr8": 5, + "efer": 6, + "apic_base": 7, + "interrupt_bitmap": [ + 8, + 9, + 10, + 11 + ] + }, + "layout": { + "input_data_size": 1, + "output_data_size": 2, + "heap_size": 3, + "code_size": 4, + "init_data_size": 5, + "init_data_permissions": null, + "scratch_size": 8, + "snapshot_size": 9, + "pt_size": null + }, + "memory_size": 65536, + "host_functions": [ + { + "function_name": "fn_void", + "parameter_types": [ + "bool" + ], + "return_type": "void" + } + ], + "snapshot_generation": 42 +}"#; + + const PINNED_ARCH: &str = r#"[ + "x86_64", + "aarch64" +]"#; + + const PINNED_HYPERVISOR: &str = r#"[ + "kvm", + "mshv", + "whp" +]"#; + + fn assert_round_trip(pinned: &str) { + let parsed: OciSnapshotConfig = + serde_json::from_str(pinned).expect("pinned JSON must deserialize"); + let actual = serde_json::to_string_pretty(&parsed).expect("serialize"); + assert_eq!( + actual.trim(), + pinned.trim(), + "Snapshot config JSON schema changed. If the change can break \ + existing snapshots on disk, bump `MT_CONFIG_V1` in \ + `super::media_types` and follow `docs/snapshot-versioning.md`. \ + Either way, paste the actual output below into the matching \ + `PINNED_*`.\n\nactual:\n{actual}" + ); + } + + #[test] + fn call_round_trip() { + assert_round_trip(PINNED_CALL); + } + + #[test] + fn arch_variants_round_trip() { + let parsed: Vec = + serde_json::from_str(PINNED_ARCH).expect("pinned arch JSON must deserialize"); + let actual = serde_json::to_string_pretty(&parsed).expect("serialize"); + assert_eq!(actual.trim(), PINNED_ARCH.trim(), "Arch variants changed."); + } + + #[test] + fn hypervisor_variants_round_trip() { + let parsed: Vec = serde_json::from_str(PINNED_HYPERVISOR) + .expect("pinned hypervisor JSON must deserialize"); + let actual = serde_json::to_string_pretty(&parsed).expect("serialize"); + assert_eq!( + actual.trim(), + PINNED_HYPERVISOR.trim(), + "Hypervisor variants changed." + ); + } } diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs b/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs index 0b3d64fba..31156a134 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/file/media_types.rs @@ -14,24 +14,20 @@ See the License for the specific language governing permissions and limitations under the License. */ -// Media types are versioned by suffix. The loader matches each -// version specifically (no `_CURRENT` shortcut on the read side); the -// writer always emits `_CURRENT`. A new version is added by: -// -// 1. Declare `MT_FOO_V2` next to `MT_FOO_V1`. -// 2. Point `MT_FOO_CURRENT` at `MT_FOO_V2`. -// 3. Add a dispatch arm in the loader that converts v1 -> v2 (or -// rejects v1 if no compatibility window is offered). -pub(super) const MT_CONFIG_V1: &str = "application/vnd.hyperlight.snapshot.config.v1+json"; -pub(super) const MT_CONFIG_CURRENT: &str = MT_CONFIG_V1; -pub(super) const MT_SNAPSHOT_V1: &str = "application/vnd.hyperlight.snapshot.memory.v1"; -pub(super) const MT_SNAPSHOT_CURRENT: &str = MT_SNAPSHOT_V1; +// Media types are versioned by suffix. The writer emits `_CURRENT`. +// The loader matches each version explicitly. See +// docs/snapshot-versioning.md for how to add a version. +pub(in crate::sandbox::snapshot) const MT_CONFIG_V1: &str = + "application/vnd.hyperlight.snapshot.config.v1+json"; +pub(in crate::sandbox::snapshot) const MT_CONFIG_CURRENT: &str = MT_CONFIG_V1; +pub(in crate::sandbox::snapshot) const MT_SNAPSHOT_V1: &str = + "application/vnd.hyperlight.snapshot.memory.v1"; +pub(in crate::sandbox::snapshot) const MT_SNAPSHOT_CURRENT: &str = MT_SNAPSHOT_V1; -/// ABI version for the snapshot memory blob. Bumped whenever the -/// host-guest contract for the bytes inside the snapshot blob changes -/// (PEB layout, calling convention, init state, etc.). Independent of -/// the config blob's media-type version. -pub(super) const SNAPSHOT_ABI_VERSION: u32 = 1; +/// ABI version for the snapshot memory blob. Bumped when the +/// host-guest contract for the snapshot bytes changes. See +/// docs/snapshot-versioning.md. +pub(in crate::sandbox::snapshot) const SNAPSHOT_ABI_VERSION: u32 = 1; /// OCI standard annotation key for a manifest's tag inside an image /// index. Set on the manifest descriptor in `index.json`, not on the diff --git a/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs index c95c129e6..59ecb82cb 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/file/mod.rs @@ -35,9 +35,9 @@ use oci_spec::image::{ use self::config::{Arch, HostFunction, Hypervisor, MemoryLayout, OciSnapshotConfig, Sregs}; use self::digest::{Digest256, oci_digest, parse_oci_digest, verify_blob_bytes, verify_blob_file}; use self::fsutil::{put_blob, put_blob_if_absent, read_bounded, replace_file_atomic}; -use self::media_types::{ - ANNOTATION_ARCH, ANNOTATION_HYPERVISOR, ANNOTATION_REF_NAME, MT_CONFIG_CURRENT, MT_CONFIG_V1, - MT_SNAPSHOT_CURRENT, MT_SNAPSHOT_V1, SNAPSHOT_ABI_VERSION, +use self::media_types::{ANNOTATION_ARCH, ANNOTATION_HYPERVISOR, ANNOTATION_REF_NAME}; +pub(super) use self::media_types::{ + MT_CONFIG_CURRENT, MT_CONFIG_V1, MT_SNAPSHOT_CURRENT, MT_SNAPSHOT_V1, SNAPSHOT_ABI_VERSION, }; use self::reference::{OciDigest, OciReference, OciTag}; use super::{NextAction, Snapshot}; @@ -46,7 +46,7 @@ use crate::mem::layout::SandboxMemoryLayout; use crate::mem::memory_region::MemoryRegionFlags; use crate::mem::shared_mem::{ReadonlySharedMemory, SharedMemory}; -const OCI_LAYOUT_VERSION: &str = "1.0.0"; +pub(super) const OCI_LAYOUT_VERSION: &str = "1.0.0"; /// Maximum size of any JSON blob read from disk during load: /// `oci-layout`, `index.json`, the OCI image manifest, and the diff --git a/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs index 9383d1b8b..56f0cd1f2 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs @@ -2784,3 +2784,146 @@ fn read_blob_dir( }) .collect() } + +// ============================================================================= +// `from_snapshot` config plumbing. +// ============================================================================= +// +// `from_snapshot` accepts a caller-supplied `SandboxConfiguration`. +// Layout fields must be silently overridden by the snapshot (the +// on-disk memory blob already encodes those sizes). Runtime fields +// must take effect. + +/// Layout fields supplied via `SandboxConfiguration` must be silently +/// overridden. The snapshot's own layout is authoritative. +#[test] +fn from_snapshot_silently_ignores_layout_overrides() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let original_input = snapshot.layout().input_data_size; + let original_output = snapshot.layout().output_data_size; + let original_heap = snapshot.layout().heap_size; + let original_scratch = snapshot.layout().get_scratch_size(); + + let mut config = SandboxConfiguration::default(); + config.set_input_data_size(original_input * 2); + config.set_output_data_size(original_output * 2); + config.set_heap_size((original_heap as u64) * 2); + config.set_scratch_size(original_scratch * 2); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot.clone(), HostFunctions::default(), Some(config)) + .unwrap(); + + sbox2.call::("GetStatic", ()).unwrap(); + + let new_snap = sbox2.snapshot().unwrap(); + assert_eq!(new_snap.layout().input_data_size, original_input); + assert_eq!(new_snap.layout().output_data_size, original_output); + assert_eq!(new_snap.layout().heap_size, original_heap); + assert_eq!(new_snap.layout().get_scratch_size(), original_scratch); +} + +/// `from_snapshot` honors `guest_core_dump=true` so that +/// `generate_crashdump_to_dir` writes a file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_enabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(true); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + !entries.is_empty(), + "expected core dump file when guest_core_dump=true" + ); +} + +/// `from_snapshot` honors `guest_core_dump=false` so that +/// `generate_crashdump_to_dir` produces no file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_disabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(false); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + entries.is_empty(), + "expected no core dump file when guest_core_dump=false, found {:?}", + entries.iter().map(|e| e.path()).collect::>() + ); +} + +/// Non-default `init_data_permissions` survive an OCI round-trip +/// byte-for-byte. The default code path uses `READ`, so this pins +/// `READ | WRITE` instead. A regression in the permission +/// serialisation would silently downgrade or upgrade access to the +/// init_data region. +#[test] +fn round_trip_preserves_non_default_init_data_permissions() { + use crate::mem::memory_region::MemoryRegionFlags; + use crate::sandbox::uninitialized::{GuestBlob, GuestEnvironment}; + + let path = simple_guest_as_string().unwrap(); + let data: &[u8] = b"perm-pinned-init-data"; + let env = GuestEnvironment { + guest_binary: GuestBinary::FilePath(path), + init_data: Some(GuestBlob { + data, + permissions: MemoryRegionFlags::READ | MemoryRegionFlags::WRITE, + }), + }; + let mut sbox = UninitializedSandbox::new(env, None) + .unwrap() + .evolve() + .unwrap(); + let snap = sbox.snapshot().unwrap(); + let expected = snap.layout().init_data_permissions; + assert_eq!( + expected, + Some(MemoryRegionFlags::READ | MemoryRegionFlags::WRITE), + "fixture must produce non-default init_data_permissions", + ); + + let dir = tempfile::tempdir().unwrap(); + let oci_dir = dir.path().join("layout"); + snap.save(&oci_dir, &OciTag::new("latest").unwrap()) + .unwrap(); + let loaded = Snapshot::checked_load(&oci_dir, OciTag::new("latest").unwrap()).unwrap(); + assert_eq!(loaded.layout().init_data_permissions, expected); +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs index c9ec426b4..155195475 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/mod.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/mod.rs @@ -16,6 +16,7 @@ limitations under the License. mod file; mod file_tests; +mod tripwires; use std::collections::HashMap; diff --git a/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs b/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs new file mode 100644 index 000000000..0da9a65d2 --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/tripwires.rs @@ -0,0 +1,105 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Compile-time tripwires for the snapshot ABI. +//! +//! Each assertion pins one piece of the contract that snapshots +//! depend on: the manifest media types, the OCI Image Layout version, +//! the `HyperlightPEB` field offsets, the `OutBAction` and `VmAction` +//! port numbers, and `BASE_ADDRESS`. A change to any of these breaks +//! loading of older snapshots. +//! +//! When an assertion fires, see `docs/snapshot-versioning.md`. + +use super::file::{ + MT_CONFIG_CURRENT, MT_SNAPSHOT_CURRENT, OCI_LAYOUT_VERSION, SNAPSHOT_ABI_VERSION, +}; + +const EXPECTED_ABI_VERSION: u32 = 1; +const EXPECTED_MT_CONFIG: &str = "application/vnd.hyperlight.snapshot.config.v1+json"; +const EXPECTED_MT_SNAPSHOT: &str = "application/vnd.hyperlight.snapshot.memory.v1"; +const EXPECTED_OCI_LAYOUT_VERSION: &str = "1.0.0"; + +/// `assert!` with the shared tripwire failure message. The message must +/// be a string literal for const eval, so the macro carries it. +macro_rules! abi_assert { + ($cond:expr) => { + assert!( + $cond, + "snapshot ABI changed: this breaks loading of existing snapshots. \ + Do not just update the expected value to make this compile. \ + See docs/snapshot-versioning.md." + ); + }; +} + +const _: () = { + abi_assert!(SNAPSHOT_ABI_VERSION == EXPECTED_ABI_VERSION); + abi_assert!(str_eq(MT_CONFIG_CURRENT, EXPECTED_MT_CONFIG)); + abi_assert!(str_eq(MT_SNAPSHOT_CURRENT, EXPECTED_MT_SNAPSHOT)); + abi_assert!(str_eq(OCI_LAYOUT_VERSION, EXPECTED_OCI_LAYOUT_VERSION)); +}; + +const _: () = { + use hyperlight_common::mem::{GuestMemoryRegion, HyperlightPEB}; + abi_assert!(std::mem::size_of::() == 16); + abi_assert!(std::mem::size_of::() == 4 * 16); + abi_assert!(std::mem::offset_of!(HyperlightPEB, input_stack) == 0); + abi_assert!(std::mem::offset_of!(HyperlightPEB, output_stack) == 16); + abi_assert!(std::mem::offset_of!(HyperlightPEB, init_data) == 32); + abi_assert!(std::mem::offset_of!(HyperlightPEB, guest_heap) == 48); +}; + +const _: () = { + use hyperlight_common::outb::OutBAction; + abi_assert!(OutBAction::Log as u16 == 99); + abi_assert!(OutBAction::CallFunction as u16 == 101); + abi_assert!(OutBAction::Abort as u16 == 102); + abi_assert!(OutBAction::DebugPrint as u16 == 103); + #[cfg(feature = "trace_guest")] + abi_assert!(OutBAction::TraceBatch as u16 == 104); + #[cfg(feature = "mem_profile")] + abi_assert!(OutBAction::TraceMemoryAlloc as u16 == 105); + #[cfg(feature = "mem_profile")] + abi_assert!(OutBAction::TraceMemoryFree as u16 == 106); +}; + +const _: () = { + use hyperlight_common::outb::VmAction; + abi_assert!(VmAction::PvTimerConfig as u16 == 107); + abi_assert!(VmAction::Halt as u16 == 108); +}; + +const _: () = { + use crate::mem::layout::SandboxMemoryLayout; + abi_assert!(SandboxMemoryLayout::BASE_ADDRESS == 0x1000); +}; + +const fn str_eq(a: &str, b: &str) -> bool { + let a = a.as_bytes(); + let b = b.as_bytes(); + if a.len() != b.len() { + return false; + } + let mut i = 0; + while i < a.len() { + if a[i] != b[i] { + return false; + } + i += 1; + } + true +} diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 6b5a7f8e3..995f03845 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -535,7 +535,9 @@ fn guest_malloc_abort() { }); // allocate a vector (on heap) that is bigger than the heap - let heap_size = 0x4000; + // Guest init registers every guest function into a heap map. + // 0x6000 leaves room for that so the sandbox can start. + let heap_size = 0x6000; let size_to_allocate = 0x10000; assert!( size_to_allocate > heap_size, @@ -616,7 +618,9 @@ fn corrupt_output_back_pointer_rejected() { #[test] fn guest_panic_no_alloc() { - let heap_size = 0x4000; + // Guest init registers every guest function into a heap map. + // 0x6000 leaves room for that so the sandbox can start. + let heap_size = 0x6000; let mut cfg = SandboxConfiguration::default(); cfg.set_heap_size(heap_size); diff --git a/src/hyperlight_host/tests/snapshot_goldens/checks.rs b/src/hyperlight_host/tests/snapshot_goldens/checks.rs new file mode 100644 index 000000000..b7c3c79df --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/checks.rs @@ -0,0 +1,323 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Functional checks against goldens loaded from the on-disk goldens +//! directory. +//! +//! Each check builds its own `MultiUseSandbox` from the golden with +//! `GoldenTest::load_sandbox`, so checks are independent and one +//! failure does not poison the next. See `docs/snapshot-versioning.md` +//! for how to add a check. + +use std::path::Path; +use std::sync::Arc; + +use hyperlight_host::sandbox::snapshot::{OciTag, Snapshot}; +use hyperlight_host::{HostFunctions, MultiUseSandbox}; + +use crate::fixtures::{CALL_COUNTER_BUMP, HEAP_PATTERN_LEN, register_host_echo_fns}; + +/// A staged golden handed to a check. The check builds sandboxes from +/// it with `load_sandbox`, as many as it needs. +pub(crate) struct GoldenTest<'a> { + dir: &'a Path, + tag: &'a str, +} + +impl<'a> GoldenTest<'a> { + pub(crate) fn new(dir: &'a Path, tag: &'a str) -> Self { + Self { dir, tag } + } + + /// The on-disk OCI Image Layout directory of the golden. + pub(crate) fn dir(&self) -> &Path { + self.dir + } + + /// The golden's OCI tag. + pub(crate) fn tag(&self) -> &str { + self.tag + } + + /// Load the golden into a fresh sandbox with the checks' host + /// functions registered. + pub(crate) fn load_sandbox(&self) -> Result { + let reference = OciTag::new(self.tag()) + .map_err(|e| format!("invalid golden tag {}: {e}", self.tag()))?; + let snap = Snapshot::checked_load(self.dir(), reference) + .map_err(|e| format!("Snapshot::checked_load({}): {e}", self.tag()))?; + let mut funcs = HostFunctions::default(); + register_host_echo_fns(&mut funcs); + MultiUseSandbox::from_snapshot(Arc::new(snap), funcs, None) + .map_err(|e| format!("MultiUseSandbox::from_snapshot({}): {e}", self.tag())) + } +} + +pub(crate) struct Check { + pub(crate) name: &'static str, + /// The lowest ABI major this check runs against. A check reads + /// state that `generate()` writes, so it runs only against a golden + /// whose major is at or above `since_abi`. Set it to the current + /// `SNAPSHOT_ABI_VERSION` when adding a check. See + /// `docs/snapshot-versioning.md`. + pub(crate) since_abi_major: u32, + pub(crate) run: fn(&GoldenTest) -> Result<(), String>, +} + +pub(crate) const CHECKS: &[Check] = &[ + Check { + name: "captured_bss", + since_abi_major: 1, + run: captured_bss, + }, + Check { + name: "captured_heap_pattern", + since_abi_major: 1, + run: captured_heap_pattern, + }, + Check { + name: "guest_types_round_trip", + since_abi_major: 1, + run: guest_types_round_trip, + }, + Check { + name: "host_round_trips", + since_abi_major: 1, + run: host_round_trips, + }, + Check { + name: "chained_snapshot", + since_abi_major: 1, + run: chained_snapshot, + }, +]; + +/// Captured BSS restores exactly: `COUNTER == CALL_COUNTER_BUMP`. +/// Covers the dispatch convention, sregs apply, page-table +/// relocation, captured stack/BSS. +fn captured_bss(golden: &GoldenTest) -> Result<(), String> { + let mut sbox = golden.load_sandbox()?; + let value: i32 = sbox + .call("GetStatic", ()) + .map_err(|e| format!("GetStatic: {e}"))?; + if value != CALL_COUNTER_BUMP { + return Err(format!( + "captured COUNTER expected {CALL_COUNTER_BUMP}, got {value}", + )); + } + Ok(()) +} + +/// Captured heap state restores exactly: the pinned `Vec` +/// pattern produced by `AllocAndWritePattern` survives across +/// save/load. +fn captured_heap_pattern(golden: &GoldenTest) -> Result<(), String> { + let mut sbox = golden.load_sandbox()?; + let got: Vec = sbox + .call("ReadPattern", ()) + .map_err(|e| format!("ReadPattern: {e}"))?; + let expected: Vec = (0..HEAP_PATTERN_LEN as usize) + .map(|i| (i & 0xff) as u8) + .collect(); + if got != expected { + return Err(format!( + "captured heap pattern mismatch (got len {} expected len {})", + got.len(), + expected.len(), + )); + } + Ok(()) +} + +/// Guest-call wire format for every primitive parameter and return +/// type. Each loop asserts an `EchoT` round-trips. Float NaN goes +/// through `is_nan` since `NaN != NaN`. +fn guest_types_round_trip(golden: &GoldenTest) -> Result<(), String> { + let mut sbox = golden.load_sandbox()?; + macro_rules! echo { + ($name:expr, $ty:ty, $values:expr) => {{ + for &v in $values.iter() { + let got: $ty = sbox + .call($name, v) + .map_err(|e| format!("{}({:?}): {e}", $name, v))?; + if got != v { + return Err(format!("{}({:?}) returned {:?}", $name, v, got)); + } + } + }}; + } + echo!("EchoI32", i32, [i32::MIN, -1, 0, 1, i32::MAX]); + echo!("EchoU32", u32, [0u32, 1, u32::MAX]); + echo!("EchoI64", i64, [i64::MIN, -1, 0, 1, i64::MAX]); + echo!("EchoU64", u64, [0u64, 1, u64::MAX]); + echo!( + "EchoFloat", + f32, + [ + 0.0f32, + -1.5, + 1.5, + f32::MIN, + f32::MAX, + f32::INFINITY, + f32::NEG_INFINITY, + ] + ); + let got: f32 = sbox + .call("EchoFloat", f32::NAN) + .map_err(|e| format!("EchoFloat(NaN): {e}"))?; + if !got.is_nan() { + return Err(format!("EchoFloat(NaN) returned {got}")); + } + echo!( + "EchoDouble", + f64, + [ + 0.0f64, + -1.5, + 1.5, + f64::MIN, + f64::MAX, + f64::INFINITY, + f64::NEG_INFINITY, + ] + ); + let got: f64 = sbox + .call("EchoDouble", f64::NAN) + .map_err(|e| format!("EchoDouble(NaN): {e}"))?; + if !got.is_nan() { + return Err(format!("EchoDouble(NaN) returned {got}")); + } + echo!("EchoBool", bool, [false, true]); + + for v in [String::new(), "hello".to_string(), "héllo 🌍".to_string()] { + let got: String = sbox + .call("Echo", v.clone()) + .map_err(|e| format!("Echo({v:?}): {e}"))?; + if got != v { + return Err(format!("Echo({v:?}) returned {got:?}")); + } + } + for v in [ + Vec::::new(), + vec![0u8, 1, 2, 3, 0xff], + (0..256u32).map(|i| (i & 0xff) as u8).collect::>(), + ] { + let got: Vec = sbox + .call("GetSizePrefixedBuffer", v.clone()) + .map_err(|e| format!("GetSizePrefixedBuffer(len={}): {e}", v.len()))?; + if got != v { + return Err(format!( + "GetSizePrefixedBuffer(len={}) did not round-trip", + v.len(), + )); + } + } + let _: () = sbox.call("NoOp", ()).map_err(|e| format!("NoOp: {e}"))?; + let mixed: i32 = sbox + .call( + "PrintElevenArgs", + ( + "a".to_string(), + 1i32, + 2i64, + "b".to_string(), + "c".to_string(), + true, + false, + 3u32, + 4u64, + 5i32, + 6.5f32, + ), + ) + .map_err(|e| format!("PrintElevenArgs: {e}"))?; + if mixed < 0 { + return Err(format!("PrintElevenArgs returned {mixed}")); + } + Ok(()) +} + +/// Host-call wire format for every primitive parameter and return +/// type. Each `RoundTripHostT` invokes the matching `HostEchoT` on +/// the registered host-fn set. +fn host_round_trips(golden: &GoldenTest) -> Result<(), String> { + let mut sbox = golden.load_sandbox()?; + macro_rules! rt { + ($name:expr, $ty:ty, $value:expr) => {{ + let v: $ty = $value; + let got: $ty = sbox + .call($name, v.clone()) + .map_err(|e| format!("{}({:?}): {e}", $name, v))?; + if got != v { + return Err(format!("{}({:?}) returned {:?}", $name, v, got)); + } + }}; + } + rt!("RoundTripHostI32", i32, -7); + rt!("RoundTripHostU32", u32, 0xdead_beef); + rt!("RoundTripHostI64", i64, i64::MIN); + rt!("RoundTripHostU64", u64, u64::MAX); + rt!("RoundTripHostF32", f32, -1.25); + rt!("RoundTripHostF64", f64, 1234.5); + rt!("RoundTripHostBool", bool, false); + rt!("RoundTripHostString", String, "round-trip".to_string()); + rt!("RoundTripHostVecBytes", Vec, vec![0u8, 1, 2, 3, 0xff]); + let _: () = sbox + .call("RoundTripHostNoOp", ()) + .map_err(|e| format!("RoundTripHostNoOp: {e}"))?; + Ok(()) +} + +/// Snapshot-from-loaded-snapshot path. Mutates state on the loaded +/// golden, takes a fresh snapshot, round-trips it through an +/// OCI layout on disk, and asserts the mutation survives. +fn chained_snapshot(golden: &GoldenTest) -> Result<(), String> { + let mut sbox = golden.load_sandbox()?; + let val: i32 = sbox + .call("AddToStatic", 5i32) + .map_err(|e| format!("AddToStatic: {e}"))?; + if val != CALL_COUNTER_BUMP + 5 { + return Err(format!( + "AddToStatic returned {val}, expected {}", + CALL_COUNTER_BUMP + 5, + )); + } + let snap = sbox + .snapshot() + .map_err(|e| format!("take chained snapshot: {e}"))?; + + let tmp = tempfile::tempdir().map_err(|e| format!("tempdir: {e}"))?; + let layout = tmp.path().join("chained"); + let tag = OciTag::new("chained").map_err(|e| format!("tag: {e}"))?; + snap.save(&layout, &tag).map_err(|e| format!("save: {e}"))?; + + let loaded = Snapshot::checked_load(&layout, tag).map_err(|e| format!("checked_load: {e}"))?; + let mut funcs = HostFunctions::default(); + register_host_echo_fns(&mut funcs); + let mut sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None) + .map_err(|e| format!("from_snapshot: {e}"))?; + let val: i32 = sbox2 + .call("GetStatic", ()) + .map_err(|e| format!("GetStatic on chained: {e}"))?; + if val != CALL_COUNTER_BUMP + 5 { + return Err(format!( + "chained snapshot observed COUNTER={val}, expected {}", + CALL_COUNTER_BUMP + 5, + )); + } + Ok(()) +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs b/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs new file mode 100644 index 000000000..d33b6bb3d --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/fixtures.rs @@ -0,0 +1,127 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Canonical fixture builders. These define exactly what bytes a +//! goldens push contains. Any change here is a snapshot content +//! change and requires a goldens regen. + +use std::sync::Arc; + +use hyperlight_host::func::Registerable; +use hyperlight_host::sandbox::SandboxConfiguration; +use hyperlight_host::sandbox::snapshot::Snapshot; +use hyperlight_host::{GuestBinary, MultiUseSandbox, UninitializedSandbox}; +use hyperlight_testing::simple_guest_as_string; + +/// Heap pattern length used by the golden. Small enough to +/// stay cheap, large enough to exercise non-trivial heap state. +pub(crate) const HEAP_PATTERN_LEN: u64 = 1024; + +/// Value the captured `COUNTER` static must hold in the golden. +/// Set by `AddToStatic(CALL_COUNTER_BUMP)` at generate time. +pub(crate) const CALL_COUNTER_BUMP: i32 = 42; + +/// Canonical `SandboxConfiguration` used to produce the goldens. +/// Layout knobs are deliberately bumped away from defaults so any +/// silent arithmetic change in `SandboxMemoryLayout::new` shifts at +/// least one region between generate-time and load-time. +fn golden_config() -> SandboxConfiguration { + let mut cfg = SandboxConfiguration::default(); + cfg.set_input_data_size(64 * 1024); + cfg.set_output_data_size(64 * 1024); + cfg.set_heap_size(256 * 1024); + cfg.set_scratch_size(512 * 1024); + cfg +} + +fn simpleguest_path() -> String { + simple_guest_as_string().expect("simpleguest_path") +} + +pub(crate) fn generate() -> Arc { + let mut u = UninitializedSandbox::new( + GuestBinary::FilePath(simpleguest_path()), + Some(golden_config()), + ) + .expect("UninitializedSandbox::new"); + register_host_echo_fns(&mut u); + let mut sbox = u.evolve().expect("evolve"); + run_canonical_calls(&mut sbox); + sbox.snapshot().expect("snapshot") +} + +/// Deterministic sequence of guest calls that mutate captured state +/// before snapshotting. Each call lands a specific bit of state +/// (BSS, heap, host-call wiring) that one of the per-surface +/// checks then asserts on after the golden is loaded. +fn run_canonical_calls(sbox: &mut MultiUseSandbox) { + let bumped: i32 = sbox + .call("AddToStatic", CALL_COUNTER_BUMP) + .expect("AddToStatic"); + assert_eq!(bumped, CALL_COUNTER_BUMP); + + let _: () = sbox + .call("AllocAndWritePattern", HEAP_PATTERN_LEN) + .expect("AllocAndWritePattern"); + + // Drive every host fn once so the captured host_function_details + // blob has known signatures and dispatch regressions surface at + // generate time. + sbox.call::("RoundTripHostI32", 1234i32) + .expect("RTH i32"); + sbox.call::("RoundTripHostU32", 4321u32) + .expect("RTH u32"); + sbox.call::("RoundTripHostI64", -42i64) + .expect("RTH i64"); + sbox.call::("RoundTripHostU64", 1u64 << 40) + .expect("RTH u64"); + sbox.call::("RoundTripHostF32", 3.5f32) + .expect("RTH f32"); + sbox.call::("RoundTripHostF64", -2.25f64) + .expect("RTH f64"); + sbox.call::("RoundTripHostBool", true) + .expect("RTH bool"); + sbox.call::("RoundTripHostString", "hi".to_string()) + .expect("RTH string"); + sbox.call::>("RoundTripHostVecBytes", vec![1u8, 2, 3]) + .expect("RTH vec"); + sbox.call::<()>("RoundTripHostNoOp", ()).expect("RTH noop"); +} + +/// Register the `HostEcho*` family used by the golden. Used at +/// both generate and load time so the registered set matches the +/// captured `host_function_details`. +pub(crate) fn register_host_echo_fns(r: &mut R) { + r.register_host_function("HostEchoI32", |v: i32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoU32", |v: u32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoI64", |v: i64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoU64", |v: u64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoF32", |v: f32| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoF64", |v: f64| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoBool", |v: bool| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoString", |v: String| Ok(v)) + .unwrap(); + r.register_host_function("HostEchoVecBytes", |v: Vec| Ok(v)) + .unwrap(); + r.register_host_function("HostNoOp", || Ok(())).unwrap(); +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs b/src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs new file mode 100644 index 000000000..32572f417 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/goldens_version.rs @@ -0,0 +1,46 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! The goldens version string, kept in its own file. +//! +//! The `.github/workflows/RegenSnapshotGoldens.yml` path filter watches +//! this file, so a version bump is the only edit that triggers a +//! publish. See `docs/snapshot-versioning.md`. + +/// Goldens version, a `vMAJOR.MINOR` string. +pub(crate) const GOLDENS_VERSION: &str = "v1.0"; + +/// Old majors kept loadable through a compatibility path, verified +/// alongside `GOLDENS_VERSION`. A backwards-compatible break (Option 2) +/// adds the outgoing version here. See `docs/snapshot-versioning.md`. +pub(crate) const COMPAT_VERSIONS: &[&str] = &[]; + +/// Every version the verify test checks: the current one and each kept +/// old major. +pub(crate) fn verify_versions() -> impl Iterator { + std::iter::once(GOLDENS_VERSION).chain(COMPAT_VERSIONS.iter().copied()) +} + +/// The ABI major in a `vMAJOR.MINOR` string. MAJOR tracks +/// `SNAPSHOT_ABI_VERSION`, so the verify run uses it to skip checks +/// newer than a golden. See `docs/snapshot-versioning.md`. +pub(crate) fn abi_major(version: &str) -> u32 { + version + .strip_prefix('v') + .and_then(|s| s.split('.').next()) + .and_then(|s| s.parse().ok()) + .expect("version must be a vMAJOR.MINOR string") +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/main.rs b/src/hyperlight_host/tests/snapshot_goldens/main.rs new file mode 100644 index 000000000..ba5fdbf86 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/main.rs @@ -0,0 +1,186 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use std::ffi::OsString; +use std::path::{Path, PathBuf}; +use std::process::ExitCode; + +use hyperlight_host::sandbox::snapshot::OciTag; +use libtest_mimic::{Arguments, Failed, Trial}; + +mod checks; +mod fixtures; +mod goldens_version; +mod oci; +mod platform; + +use checks::Check; +use platform::Platform; + +/// The first CLI argument selects the mode: +/// +/// * `generate [out-dir]` writes the canonical snapshot for the local +/// platform under `out-dir`, defaulting to the verify directory. +/// * `self` verifies the local CPU vendor's golden. +/// * `cross` verifies the peer CPU vendor's golden. +/// * no token is a no-op success, so the target sits in the +/// `cargo test --test '*'` glob without a staged golden cache. +/// +/// `self`, `cross`, and `generate` need their golden staged on disk by +/// `just snapshot-goldens-pull` or `just snapshot-goldens-generate`. +/// Arguments after the mode token pass through to libtest-mimic. +fn main() -> ExitCode { + let mut args: Vec = std::env::args_os().collect(); + match args.get(1).and_then(|a| a.to_str()) { + Some("generate") => { + let out = args + .get(2) + .map(PathBuf::from) + .unwrap_or_else(oci::goldens_root); + run_generate(&out) + } + Some("self") => { + args.remove(1); + run_verify(CpuSelect::Local, &args) + } + Some("cross") => { + args.remove(1); + run_verify(CpuSelect::Cross, &args) + } + _ => { + eprintln!( + "snapshot goldens: no mode selected, skipping. Run via \ + `just snapshot-goldens-verify` or `just snapshot-goldens-generate`." + ); + libtest_mimic::run(&Arguments::from_iter(args), Vec::new()).exit_code() + } + } +} + +fn run_verify(cpu: CpuSelect, args: &[OsString]) -> ExitCode { + let args = Arguments::from_iter(args.iter().cloned()); + let Some(host) = Platform::detect() else { + eprintln!("snapshot goldens: no (hypervisor, cpu, profile) platform detected on this host",); + return ExitCode::FAILURE; + }; + // One run checks one CPU vendor: the host's own by default, the + // peer with the `cross` token. Snapshots are portable across Intel + // and AMD, so CI runs this twice to cover both. The selected + // golden must be staged (pull or generate); a missing one fails. + let platform = match cpu { + CpuSelect::Local => host, + CpuSelect::Cross => host.cross_cpu(), + }; + // Verify the current version and every kept old major. A check runs + // against a golden only when its `since_abi` is at or below the + // golden's major, so a newer check stays clear of an older golden + // that predates the state it reads. + let mut trials = Vec::new(); + for version in goldens_version::verify_versions() { + let golden = match Golden::resolve(platform, version) { + Ok(g) => g, + Err(e) => { + eprintln!("snapshot goldens: {e}"); + return ExitCode::FAILURE; + } + }; + println!("snapshot goldens: verifying {}", golden.tag); + let golden_abi = goldens_version::abi_major(version); + trials.extend( + checks::CHECKS + .iter() + .filter(|c| c.since_abi_major <= golden_abi) + .map(|c| golden.trial(c)), + ); + } + let conclusion = libtest_mimic::run(&args, trials); + if conclusion.has_failed() { + eprintln!( + "snapshot goldens: a golden failed to load or verify. This usually means a change \ + broke the on-disk snapshot format. Do not regenerate the goldens to make this pass. \ + See docs/snapshot-versioning.md." + ); + } + conclusion.exit_code() +} + +/// Which CPU vendor's golden a verify run checks. +#[derive(Copy, Clone, Debug)] +enum CpuSelect { + Local, + Cross, +} + +/// A golden staged on disk for one platform and version, ready to +/// verify. +struct Golden { + version: &'static str, + tag: String, + cpu: &'static str, + dir: PathBuf, +} + +impl Golden { + fn resolve(platform: Platform, version: &'static str) -> Result { + let tag = platform.tag_for(version); + let dir = oci::golden_dir(&tag)?; + Ok(Self { + version, + cpu: platform.cpu_str(), + tag, + dir, + }) + } + + fn trial(&self, check: &'static Check) -> Trial { + let tag = self.tag.clone(); + let dir = self.dir.clone(); + let name = format!("{} [{} {}]", check.name, self.cpu, self.version); + Trial::test(name, move || { + let golden = checks::GoldenTest::new(&dir, &tag); + (check.run)(&golden).map_err(Failed::from) + }) + } +} + +fn run_generate(out_dir: &Path) -> ExitCode { + let Some(platform) = Platform::detect() else { + eprintln!( + "snapshot goldens: generate: no (hypervisor, cpu, profile) platform detected on this host", + ); + return ExitCode::FAILURE; + }; + if let Err(e) = std::fs::create_dir_all(out_dir) { + eprintln!("snapshot goldens: generate: create {out_dir:?}: {e}"); + return ExitCode::FAILURE; + } + let tag = platform.tag(); + let oci_tag = match OciTag::new(&tag) { + Ok(t) => t, + Err(e) => { + eprintln!("snapshot goldens: generate: invalid tag {tag}: {e}"); + return ExitCode::FAILURE; + } + }; + let dir = out_dir.join(&tag); + let snap = fixtures::generate(); + if let Err(e) = snap.save(&dir, &oci_tag) { + eprintln!("snapshot goldens: generate: save({tag}): {e}"); + return ExitCode::FAILURE; + } + println!("snapshot goldens: wrote {tag} -> {}", dir.display()); + ExitCode::SUCCESS +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/oci.rs b/src/hyperlight_host/tests/snapshot_goldens/oci.rs new file mode 100644 index 000000000..609e69c10 --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/oci.rs @@ -0,0 +1,50 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use std::path::PathBuf; + +pub(crate) fn goldens_root() -> PathBuf { + // Workspace target dir is two levels up from this crate. + let target = std::env::var_os("CARGO_TARGET_DIR") + .map(PathBuf::from) + .unwrap_or_else(|| { + let raw = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("target"); + std::fs::canonicalize(&raw).unwrap_or(raw) + }); + target.join("snapshot-goldens") +} + +fn goldens_dir_for(tag: &str) -> PathBuf { + goldens_root().join(tag) +} + +/// Locate the golden OCI Image Layout for `tag` in the local +/// directory. A missing layout is an error with guidance to populate +/// it. +pub(crate) fn golden_dir(tag: &str) -> Result { + let dir = goldens_dir_for(tag); + if dir.join("oci-layout").is_file() { + return Ok(dir); + } + Err(format!( + "no golden OCI layout found at {dir:?} for tag `{tag}`. \ + Run `just snapshot-goldens-pull` to fetch the published goldens, \ + or `just snapshot-goldens-generate` to regenerate them locally.", + )) +} diff --git a/src/hyperlight_host/tests/snapshot_goldens/platform.rs b/src/hyperlight_host/tests/snapshot_goldens/platform.rs new file mode 100644 index 000000000..6564b365c --- /dev/null +++ b/src/hyperlight_host/tests/snapshot_goldens/platform.rs @@ -0,0 +1,188 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Local platform detection and tag naming for snapshot goldens. +//! +//! A snapshot is portable across CPU vendor (Intel and AMD), but +//! not across CPU architecture, hypervisor, or build profile. Each +//! `(hypervisor, cpu vendor, build profile)` triple gets its own +//! tag, named `{GOLDENS_VERSION}-{hv}-{cpu}-{profile}`. The +//! cross-CPU verify relies on the Intel/AMD portability. + +use crate::goldens_version::GOLDENS_VERSION; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Hypervisor { + #[cfg_attr(target_os = "windows", allow(dead_code))] + Kvm, + #[cfg_attr(target_os = "windows", allow(dead_code))] + Mshv, + #[cfg_attr(not(target_os = "windows"), allow(dead_code))] + Whp, +} + +impl Hypervisor { + fn as_str(self) -> &'static str { + match self { + Self::Kvm => "kvm", + Self::Mshv => "mshv", + Self::Whp => "whp", + } + } + + /// Detect the locally available hypervisor. Order matches the + /// host crate's preference: `/dev/mshv` over `/dev/kvm` on + /// Linux, WHP on Windows. + fn detect() -> Option { + #[cfg(target_os = "linux")] + { + if std::path::Path::new("/dev/mshv").exists() { + return Some(Self::Mshv); + } + if std::path::Path::new("/dev/kvm").exists() { + return Some(Self::Kvm); + } + None + } + #[cfg(target_os = "windows")] + { + Some(Self::Whp) + } + #[cfg(not(any(target_os = "linux", target_os = "windows")))] + { + None + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum CpuVendor { + Intel, + Amd, +} + +impl CpuVendor { + fn as_str(self) -> &'static str { + match self { + Self::Intel => "intel", + Self::Amd => "amd", + } + } + + fn other(self) -> Self { + match self { + Self::Intel => Self::Amd, + Self::Amd => Self::Intel, + } + } + + /// Detect the local CPU vendor via the `0` leaf of `cpuid`. + /// Returns `None` on non-`x86_64` targets or unknown vendor + /// strings. + fn detect() -> Option { + #[cfg(target_arch = "x86_64")] + { + let r = core::arch::x86_64::__cpuid(0); + let mut bytes = [0u8; 12]; + bytes[0..4].copy_from_slice(&r.ebx.to_le_bytes()); + bytes[4..8].copy_from_slice(&r.edx.to_le_bytes()); + bytes[8..12].copy_from_slice(&r.ecx.to_le_bytes()); + match &bytes { + b"GenuineIntel" => Some(Self::Intel), + b"AuthenticAMD" => Some(Self::Amd), + _ => None, + } + } + #[cfg(not(target_arch = "x86_64"))] + { + None + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum Profile { + Debug, + Release, +} + +impl Profile { + fn as_str(self) -> &'static str { + match self { + Self::Debug => "debug", + Self::Release => "release", + } + } + + fn detect() -> Self { + if cfg!(debug_assertions) { + Self::Debug + } else { + Self::Release + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub(crate) struct Platform { + hv: Hypervisor, + cpu: CpuVendor, + profile: Profile, +} + +impl Platform { + pub(crate) fn detect() -> Option { + Some(Self { + hv: Hypervisor::detect()?, + cpu: CpuVendor::detect()?, + profile: Profile::detect(), + }) + } + + fn suffix(&self) -> String { + // The `snapshot-goldens-pull` recipe in the Justfile rebuilds this + // same `{hv}-{cpu}-{profile}` string in bash. Keep both in sync. + format!( + "{}-{}-{}", + self.hv.as_str(), + self.cpu.as_str(), + self.profile.as_str(), + ) + } + + pub(crate) fn tag(&self) -> String { + self.tag_for(GOLDENS_VERSION) + } + + /// The golden tag for this platform at `version`. + pub(crate) fn tag_for(&self, version: &str) -> String { + format!("{}-{}", version, self.suffix()) + } + + pub(crate) fn cpu_str(&self) -> &'static str { + self.cpu.as_str() + } + + /// The same platform on the other CPU vendor. Snapshots are + /// portable across Intel and AMD, so each host verifies its own + /// golden and the peer vendor's. + pub(crate) fn cross_cpu(&self) -> Self { + Self { + cpu: self.cpu.other(), + ..*self + } + } +} diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index acc176052..afcfa8133 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -134,6 +134,11 @@ fn test_exception_handler( /// Install handler for a specific vector #[guest_function("InstallHandler")] fn install_handler(vector: i32) { + // The exception handler increments a counter that starts on a + // copy-on-write page. Write the counter once here so its page copies + // into scratch. The handler then increments a writable page and does + // not fault while it runs on the exception stack. + HANDLER_INVOCATION_COUNT.fetch_add(0, Ordering::SeqCst); hyperlight_guest_bin::exception::arch::HANDLERS[vector as usize].store( test_exception_handler as *const () as usize as u64, Ordering::Release, @@ -389,6 +394,132 @@ fn get_size_prefixed_buffer(data: Vec) -> Vec { data } +#[guest_function("EchoI32")] +fn echo_i32(v: i32) -> i32 { + v +} + +#[guest_function("EchoU32")] +fn echo_u32(v: u32) -> u32 { + v +} + +#[guest_function("EchoI64")] +fn echo_i64(v: i64) -> i64 { + v +} + +#[guest_function("EchoU64")] +fn echo_u64(v: u64) -> u64 { + v +} + +#[guest_function("EchoBool")] +fn echo_bool(v: bool) -> bool { + v +} + +#[guest_function("NoOp")] +fn no_op() {} + +#[host_function("HostEchoI32")] +fn host_echo_i32(v: i32) -> Result; + +#[host_function("HostEchoU32")] +fn host_echo_u32(v: u32) -> Result; + +#[host_function("HostEchoI64")] +fn host_echo_i64(v: i64) -> Result; + +#[host_function("HostEchoU64")] +fn host_echo_u64(v: u64) -> Result; + +#[host_function("HostEchoF32")] +fn host_echo_f32(v: f32) -> Result; + +#[host_function("HostEchoF64")] +fn host_echo_f64(v: f64) -> Result; + +#[host_function("HostEchoBool")] +fn host_echo_bool(v: bool) -> Result; + +#[host_function("HostEchoString")] +fn host_echo_string(v: String) -> Result; + +#[host_function("HostEchoVecBytes")] +fn host_echo_vec_bytes(v: Vec) -> Result>; + +#[host_function("HostNoOp")] +fn host_noop() -> Result<()>; + +#[guest_function("RoundTripHostI32")] +fn round_trip_host_i32(v: i32) -> Result { + host_echo_i32(v) +} + +#[guest_function("RoundTripHostU32")] +fn round_trip_host_u32(v: u32) -> Result { + host_echo_u32(v) +} + +#[guest_function("RoundTripHostI64")] +fn round_trip_host_i64(v: i64) -> Result { + host_echo_i64(v) +} + +#[guest_function("RoundTripHostU64")] +fn round_trip_host_u64(v: u64) -> Result { + host_echo_u64(v) +} + +#[guest_function("RoundTripHostF32")] +fn round_trip_host_f32(v: f32) -> Result { + host_echo_f32(v) +} + +#[guest_function("RoundTripHostF64")] +fn round_trip_host_f64(v: f64) -> Result { + host_echo_f64(v) +} + +#[guest_function("RoundTripHostBool")] +fn round_trip_host_bool(v: bool) -> Result { + host_echo_bool(v) +} + +#[guest_function("RoundTripHostString")] +fn round_trip_host_string(v: String) -> Result { + host_echo_string(v) +} + +#[guest_function("RoundTripHostVecBytes")] +fn round_trip_host_vec_bytes(v: Vec) -> Result> { + host_echo_vec_bytes(v) +} + +#[guest_function("RoundTripHostNoOp")] +fn round_trip_host_noop() -> Result<()> { + host_noop() +} + +static mut HEAP_PATTERN: Option> = None; + +#[guest_function("AllocAndWritePattern")] +fn alloc_and_write_pattern(len: u64) { + let v: Vec = (0..len as usize).map(|i| (i & 0xff) as u8).collect(); + // SAFETY: the guest is single threaded, so the static has no concurrent access. + unsafe { HEAP_PATTERN = Some(v) }; +} + +#[guest_function("ReadPattern")] +fn read_pattern() -> Vec { + // SAFETY: the guest is single threaded, so the static has no concurrent access. + #[allow(static_mut_refs)] + unsafe { + HEAP_PATTERN.clone().unwrap_or_default() + } +} + #[expect( clippy::empty_loop, reason = "This function is used to keep the CPU busy"