diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..8af5481 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,61 @@ +name: Test + +# Fast host-side unit tests for the Python (MoonDeck / build scripts) and JS +# (web installer) code that the C++ ctest/scenario suites can't reach. Runs on +# every PR and on pushes to main/next-iteration. Scoped to the paths these tests +# cover so a docs-only or pure-firmware change doesn't spend a runner here. +# +# Today this pins the Improv frame wire format (test/python + test/js assert a +# shared golden vector so the device C++, Python, and JS builders can't drift). +# New Python/JS unit suites land under test/python and test/js and run here. + +# Paths cover every input to the host-side tests: the Python/JS sources under test +# (scripts, docs/install), the test files themselves, AND the device-side C++ frame +# contract (src/core/Improv*.h + the platform handler) — a wire-format change in the +# firmware must run the cross-language golden-vector tests so it can't drift from the +# Python/JS builders silently. pull_request gates every PR; push runs main only (a +# direct-to-main hotfix). A PR branch is covered by pull_request alone — listing +# feature branches under push too would double-run every PR (push + pull_request). +on: + pull_request: + paths: &test-paths + - 'scripts/**' + - 'docs/install/**' + - 'src/core/ImprovFrame.h' + - 'src/core/ImprovOpReassembler.h' + - 'src/platform/esp32/platform_esp32_improv.cpp' + - 'test/python/**' + - 'test/js/**' + - '.github/workflows/test.yml' + push: + branches: + - main + paths: *test-paths + +permissions: + contents: read + +jobs: + python: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: astral-sh/setup-uv@v3 + # pytest + pyserial come from the test file's inline PEP-723 block; passing + # them via --with is the explicit, discovery-friendly form (a bare `pytest + # ` doesn't honour a test file's own inline deps). + - name: pytest + run: uv run --with pytest --with pyserial pytest test/python -q + + js: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + # Node's built-in test runner — no npm install, no package.json. The glob + # form is required: a bare directory arg is treated as a module to execute. + - name: node --test + run: node --test "test/js/**/*.test.mjs" diff --git a/CLAUDE.md b/CLAUDE.md index df5fb57..3fbebe1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -55,7 +55,7 @@ The design rationale for each rule below lives in [docs/architecture.md](docs/ar Then check the recommendation against [§ Principles](#principles) (minimalism, data over objects, concrete first) and propose it as a question, not a fait accompli. The product owner picks; the agent implements only what was picked. If the picked option turns out to need a follow-up change (e.g. an updated naming convention to make the new layout consistent), surface that *before* starting the move so it's a single coherent refactor, not three round-trips. -**Plan before implementing.** Use `/plan` mode before every feature. Review plans for: unnecessary files, inheritance where structs suffice, modifications outside the relevant directory. Reject and regenerate bad plans. +**Plan before implementing.** Use `/plan` mode before every feature. Review plans for: unnecessary files, inheritance where structs suffice, modifications outside the relevant directory. Reject and regenerate bad plans. **Save every approved plan** to `docs/history/plans/` named `Plan-YYYYMMDD - .md` (ISO-8601 date order so the directory sorts chronologically, e.g. `Plan-20260620 - Improv-as-REST.md` for 2026-06-20), as the first implementation step. The plan is the design record that complements `decisions.md` (the lesson record): the plan says what we set out to build and why; decisions.md captures what we learned doing it. **These saved plans are a reference archive for the product owner — agents WRITE a plan when creating one, but do NOT read the existing plan files for context unless the product owner explicitly points to one** (they're under the "Never automatically" rule below alongside the rest of `docs/history/`). Like the rest of `history/`, plans are pruned under *Mandatory subtraction* once their design is fully absorbed into the code + module specs. **Use `uv` for every Python invocation.** Never type `python` or `python3` directly; always go through `uv run` (e.g. `uv run scripts/build/build_desktop.py`, `uv run python -c "…"`). This applies to shell commands, CMake `add_custom_command` / `execute_process`, documentation examples, and anything that shells out. In CMake, resolve `find_program(UV_EXECUTABLE NAMES uv REQUIRED HINTS "$ENV{USERPROFILE}/.local/bin" "$ENV{HOME}/.local/bin")` once and use `${UV_EXECUTABLE} run python …` thereafter. Reason: uv manages the project venv and is the project standard ([scripts/MoonDeck.md](scripts/MoonDeck.md)); bare `python3` isn't on PATH on Windows (and macOS Python Launcher pops a Store prompt). If you catch yourself about to type `python`, stop and prefix with `uv run`. @@ -85,7 +85,7 @@ Each commit produces visible output. The product owner picks what to build next. 1. **Pick what to build.** One layout, one effect, one driver, one modifier, one system module: whatever adds the next useful capability. 2. **Spec it.** Write (or review) the module's spec. A spec-in-progress is a plain `.md` in `docs/backlog/` (like any other forward-looking note); when the module ships, its final spec is written in `docs/moonmodules/<Name>.md` and the temporary backlog draft (if any) is deleted. Most small modules go straight to `docs/moonmodules/` in the same change that implements them — the backlog draft is only for specs worth circulating before the code exists. -3. **`/plan` it.** Plan references only the relevant `docs/moonmodules/` specs + architecture docs. Plans are not committed to the repo; the implemented code, docs, and commit message together describe what landed. +3. **`/plan` it.** Plan references only the relevant `docs/moonmodules/` specs + architecture docs. The approved plan is saved to `docs/history/plans/` (see *Plan before implementing*); the implemented code, docs, and commit message together describe what actually landed (which may diverge from the plan — that's expected, the plan is the intent record, not a contract). 4. **Implement in a branch** (`next-iteration` or feature branch). Test on hardware. Run the commit gates (see Lifecycle Events below). Commit. 5. **Push.** Product owner pushes. CodeRabbit reviews the PR. Process findings. 6. **Repeat.** @@ -114,8 +114,9 @@ The narrow safety net: "this snapshot is internally consistent." 7. KPI collection, `collect_kpi.py --commit`, if any file under `src/` changed. **The one-liner MUST include `tick:Xus(FPS:Y)` for every supported target** (PC + ESP32 today; Teensy/RPi when added). If a target's tick/FPS is missing (e.g. ESP32 wasn't monitored recently and `esp32/monitor.log` is stale), re-run a short live capture before committing, or note explicitly in the commit body why the value is absent. 8. Device-model catalog, `check_devices.py`, fast (<1s), if `docs/install/deviceModels.json` or `scripts/check/check_devices.py` changed. Validates the installer catalog: required fields, `firmwares` a non-empty list of non-empty strings (`firmwares[0]` is the default), every `image` resolves on disk, each entry's `System.deviceModel` control equals its entry `name`, module `type`s are factory-registered (or boot-wired singletons), `pins` controls live only on `*LedDriver` modules, and `supported` capabilities stay within the known vocabulary. 9. Firmware list, `check_firmwares.py`, fast (<1s), if `scripts/build/build_esp32.py`, `docs/install/firmwares.json`, or `scripts/check/check_firmwares.py` changed. Regenerates the firmware projection from the `FIRMWARES` dict and fails on drift from the committed `docs/install/firmwares.json` (so a `FIRMWARES` edit without regenerating is caught). Trigger includes `build_esp32.py` because that dict is the upstream source. +10. Host-side unit tests (Python + JS), fast (<2s), the suites the C++ ctest can't reach (MoonDeck / build-script logic, web-installer logic). Run `uv run --with pytest --with pyserial pytest test/python -q` if any file under `scripts/` or `test/python/` changed, and `node --test "test/js/**/*.test.mjs"` if any file under `docs/install/` or `test/js/` changed. (Same suites the PR-triggered `.github/workflows/test.yml` runs.) Today these pin the Improv frame wire format — `test/python` + `test/js` assert a shared golden vector so the device C++, Python, and JS frame builders can't drift. New Python/JS unit suites land under `test/python` / `test/js` and run here. -A commit that touches *only* `.github/`, `docs/`, `scripts/` (non-test), `README.md`, `CLAUDE.md`, or `.claude/` therefore runs only the spec check (plus the board-catalog and firmware checks when their specific files changed); the build/test/ESP32/KPI gates are no-ops because their triggers don't fire. This is the intended pre-commit cost for CI-only or doc-only changes. +A commit that touches *only* `.github/`, `docs/` (excluding `docs/install/`), `README.md`, `CLAUDE.md`, or `.claude/` therefore runs only the spec check (plus the board-catalog and firmware checks when their specific files changed); the build/test/ESP32/KPI gates are no-ops because their triggers don't fire. A `scripts/` or `docs/install/` change adds the relevant host-side unit-test gate but still skips the C++ build/ESP32/KPI gates. This is the intended pre-commit cost for CI-only or doc-only changes. **Recommended (manual, not blocking):** @@ -200,6 +201,7 @@ docs/ history/ ← backward-looking: accumulated wisdom README.md ← index: what's here + cross-repo trends + digest prompt decisions.md ← actions, lessons, proven patterns + plans/ ← approved feature plans (Plan-YYYYMMDD - <title>.md; PO reference, agents don't auto-read) *-inventory.md ← prior-project surveys (v1, v2, moonlight) <repo>.md ← friend-repo monthly activity digests (FastLED, WLED, …) moonmodules/ ← one page per MoonModule (specs before code) diff --git a/docs/architecture.md b/docs/architecture.md index a6bf0a4..999134f 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -228,7 +228,7 @@ Three distinct things, kept distinct in the vocabulary: **Firmware** is the compiled binary: chip target plus which radios/peripherals/sdkconfig fragments are included. Today's variants: `esp32` (classic, WiFi **and** RMII Ethernet in one binary — Ethernet comes up only when a PHY is present, pins/PHY per deviceModel), `esp32-eth` (classic, Ethernet only, WiFi excluded), `esp32-16mb` (classic with 16 MB flash, WiFi + Ethernet), `esp32s3-n16r8` / `esp32s3-n8r8` (S3 with WiFi + W5500 SPI Ethernet), `esp32p4-eth` (Waveshare ESP32-P4-NANO, Ethernet only), `esp32p4-eth-wifi` (the same P4 hardware with WiFi via its on-board ESP32-C6 over esp_hosted). Each chip's firmware carries the Ethernet *driver(s)* it can host (RMII EMAC for classic/P4, W5500 SPI for S3); which PHY/pins a deviceModel uses is runtime config. Selected by `build_esp32.py --firmware <key>`, reported by `SystemModule.firmware`, used as the contract target key in scenarios. -**deviceModel** is the physical hardware: chip + PCB + on-board peripherals (PHY, USB-serial, PSRAM, antenna), identified by its product name. Examples: `Olimex ESP32-Gateway Rev G`, `LOLIN D32`, `Generic ESP32 Dev`. A unit cannot identify its own deviceModel (no readable PCB ID on classic ESP32), so MoonDeck deduces it from the firmware where unambiguous (`esp32-eth*` ⇒ Olimex) and otherwise lets the user pick. It is stored on the unit as SystemModule's `deviceModel` Text control (display-only in the UI; HTTP `/api/control` writes still apply). MoonDeck mirrors the picked / deduced value to the unit via `POST /api/control` after each discover and after every dropdown change. The catalog of valid deviceModels lives at [docs/install/deviceModels.json](install/deviceModels.json), shared between MoonDeck and the web installer: MoonDeck reads it for its dropdown and HTTP push; the web installer reads it for its picker, pushes the pick over Improv on first flash, and provides an HTTP fallback (Inject button on *Your devices*) when Improv isn't available on the firmware variant. +**deviceModel** is the physical hardware: chip + PCB + on-board peripherals (PHY, USB-serial, PSRAM, antenna), identified by its product name. Examples: `Olimex ESP32-Gateway Rev G`, `LOLIN D32`, `Generic ESP32 Dev`. A unit cannot identify its own deviceModel (no readable PCB ID on classic ESP32), so MoonDeck deduces it from the firmware where unambiguous (`esp32-eth*` ⇒ Olimex) and otherwise lets the user pick. It is stored on the unit as SystemModule's `deviceModel` Text control (display-only in the UI; HTTP `/api/control` writes still apply). MoonDeck mirrors the picked / deduced value to the unit via `POST /api/control` after each discover and after every dropdown change. The catalog of valid deviceModels lives at [docs/install/deviceModels.json](install/deviceModels.json), shared between MoonDeck and the web installer: MoonDeck reads it for its dropdown and HTTP push (plain REST on the LAN); the web installer reads it for its picker and pushes the whole entry — deviceModel plus every module/control — over serial during provisioning as REST ops (**"Improv = REST over serial"**, the `APPLY_OP` vendor RPC; see [ImprovProvisioningModule.md](moonmodules/core/ImprovProvisioningModule.md)). Pushing over serial sidesteps the mixed-content block that stops an HTTPS installer page from POSTing to an `http://` device; an already-running device is re-configured via MoonDeck on the LAN. A deviceModel can run multiple firmwares (the Olimex Gateway runs both `esp32-eth` and the default `esp32`); a firmware can run on multiple deviceModels (`esp32` runs on any classic ESP32 dev kit). The `esp32s3-n16r8` firmware is S3-only and does not run on the Olimex Gateway or other classic-ESP32 hardware. The codebase reserves "deviceModel" exclusively for the physical product and "firmware" exclusively for the compiled binary. diff --git a/docs/backlog/backlog.md b/docs/backlog/backlog.md index 0147cd2..30c9cfa 100644 --- a/docs/backlog/backlog.md +++ b/docs/backlog/backlog.md @@ -10,13 +10,13 @@ Completed items are removed. This file is deleted when empty. 1.0 ships ESP32 firmware (4 variants) + macOS arm64 + Windows x64. Still to add: -- **ESP32-P4** firmware variant — **`esp32p4-eth` (Ethernet-only) shipped**: in `build_esp32.py`'s `FIRMWARES`, the `boards.json` catalog (Waveshare P4-NANO), and CI builds + publishes it to the web installer + releases. **Still to ship: `esp32p4-eth-wifi`** (the C6-WiFi variant) — it doesn't build reproducibly in CI yet (the `CONFIG_WIFI_RMT_*` Kconfig defaults don't survive a plain build without a fresh `set-target`), so it's held out of the release matrix until that's fixed; see [§ ESP32-P4 round 3](#esp32-p4-support--rounds-3-4-in-progress). +- **ESP32-P4** firmware variant — **`esp32p4-eth` (Ethernet-only) shipped**: in `build_esp32.py`'s `FIRMWARES`, the `deviceModels.json` catalog (Waveshare P4-NANO), and CI builds + publishes it to the web installer + releases. **Still to ship: `esp32p4-eth-wifi`** (the C6-WiFi variant) — it doesn't build reproducibly in CI yet (the `CONFIG_WIFI_RMT_*` Kconfig defaults don't survive a plain build without a fresh `set-target`), so it's held out of the release matrix until that's fixed; see [§ ESP32-P4 round 3](#esp32-p4-support--rounds-3-4-in-progress). - **Linux desktop binary** — third desktop job in `release.yml`, static-linked libstdc++. - **Teensy 4.1** — toolchain-file build, `.hex` for Teensy Loader. - **Raspberry Pi** — ARM64, cross-built or native. - **macOS code-signing** — drops the Gatekeeper "downloaded from internet" prompt. - **Windows code-signing** — drops the SmartScreen warning on first run of `projectMM.exe`. Same shape as macOS signing; needs an EV / OV code-signing certificate (Microsoft Trusted Signing is the cheapest current option). Until then, the README notes the SmartScreen prompt. -- **Live RMII Ethernet reconfigure** — runtime PHY/pin config shipped (`ethType` + pin controls in NetworkModule, per-board defaults in `boards.json`, `platform::setEthConfig`/`ethInit` dispatch). W5500 (SPI) on S3 applies **live** — `ethStop()` tears down the SPI bus and `ethInit()` re-runs on the next `loop1s()` with no reboot. RMII (classic/P4 internal EMAC) still saves config and asks for a restart to apply, because the EMAC bring-up is fiddlier to hot-cycle cleanly. Make RMII live too: a hot `esp_eth_stop` + EMAC/netif teardown + re-init on config change, matching the W5500 path, so every interface honours the no-reboot principle. +- **Live RMII Ethernet reconfigure** — runtime PHY/pin config shipped (`ethType` + pin controls in NetworkModule, per-board defaults in `deviceModels.json`, `platform::setEthConfig`/`ethInit` dispatch). W5500 (SPI) on S3 applies **live** — `ethStop()` tears down the SPI bus and `ethInit()` re-runs on the next `loop1s()` with no reboot. RMII (classic/P4 internal EMAC) still saves config and asks for a restart to apply, because the EMAC bring-up is fiddlier to hot-cycle cleanly. Make RMII live too: a hot `esp_eth_stop` + EMAC/netif teardown + re-init on config change, matching the W5500 path, so every interface honours the no-reboot principle. - **Installer UX polish** — clear "Pre-release (beta)" warning on RC/latest picks, yank-by-asset-tag instead of yank-by-release-deletion. - **ESP32-P4 DHCP hostname not shown by the router (recheck later)** — the device sets its DHCP hostname (option 12 = `deviceName`, default `MM-XXXX`) in the `ETHERNET_EVENT_CONNECTED` handler, verified working on two boards: the S3 over WiFi (router shows `MM-70BC`) and the Olimex over RMII Ethernet (`MM-BD3C`) — the *same* `ethEventHandler` code path the P4 uses. Yet the bench P4 (Waveshare P4-NANO, RMII) still shows as blank/"Unknown" in the GL.iNet client list, while serial confirms `set_hostname` succeeds with no error. Two unconfirmed suspects, neither our logic: (1) the router holds a **sticky lease** for the P4's MAC and won't relearn the hostname until it fully expires (the per-client "forget" isn't exposed in this GL.iNet UI, and a plain reboot didn't clear it); (2) a P4-specific IDF netif quirk serializing option 12 differently on the newer P4 Ethernet path. Since the shared code path is proven on two other boards, this is not treated as a code bug. Recheck after the P4's lease naturally expires, or on a different router, before spending more on it. **Possibly correlated:** the DevicesModule HTTP sweep also intermittently misses the P4 at `.132` (a single-pass probe timeout) while finding the S3 and PC reliably — both symptoms point at the P4 being slower/flakier to answer at the network layer (DHCP and/or TCP-accept latency on the P4 Ethernet path), not at our discovery or hostname logic. Investigate the P4's network responsiveness as the common cause. @@ -194,13 +194,31 @@ No FreeRTOS tasks are pinned today. At 16K LEDs the render task takes ~52 ms/tic Today `setEnabled(false)` only makes the Scheduler skip the module's `loop`/`loop1s`/`loop20ms` callbacks (gated via `respectsEnabled()`/`enabled()` in `MoonModule`/`Scheduler`). The module still **holds whatever it acquired**: AudioModule keeps its I2S channel open, an LED driver keeps its RMT/LCD/Parlio peripheral + DMA buffers, NetworkSendDriver keeps its socket. So a disabled module stops *acting* but doesn't *free* — which is fine for a quick mute (a non-ticking module can't pollute a perf measurement, the use case that surfaced this), but wrong if "disabled" should mean "give the pins/peripheral/memory back so another module can use them, or so a mic-less reconfig works." The mechanism for this already exists — `MoonModule::onEnabledChanged()` (a no-op hook today) is exactly where a module should deinit/reinit its resource on the flip. Work: audit every resource-holding module (AudioModule, the LED drivers, NetworkSend/Receive, anything with a socket/peripheral/large buffer) and implement `onEnabledChanged()` to release on disable + re-acquire on enable, mirroring what `setup()`/`teardown()` do. Decide the contract: does disable free the buffer (cheaper RAM, slower re-enable) or keep it (instant re-enable, holds RAM)? Probably per-module. Pin controls becoming the standard `Pin` type (just landed) is a related enabler — a disabled driver releasing its pins lets the same GPIO be reassigned live. +### Pin-uniqueness check across modules (prevents conflicts; replaces a singleton hack) + +**Problem it solves.** Two modules must not drive the same physical GPIO. Today nothing stops it: add two `RmtLedDriver`s with `pins="18"`, or two `AudioModule`s with the same `wsPin/sdPin/sckPin`, and they fight over the pin — at best garbage output, at worst (for I2S) endless `i2s_new_channel` driver-error spam every tick. This surfaced when a repeated catalog inject stacked duplicate AudioModules and the device spammed I2S failures (a clean install is fine; the duplicates were the artifact). + +**Why pin-uniqueness, not a per-type singleton.** The first instinct was "make AudioModule single-instance" — but that's a crude proxy. The *real* invariant is pin non-overlap: a board legitimately can have **two LED drivers on different GPIOs** (multi-output rigs do exactly this), or even two mics on distinct pin sets. "One mic" isn't fundamentally true; "no two modules on the same pin" is. So check pin conflicts, which both prevents the breakage **and** allows legitimate multi-instance setups. (A per-type singleInstance flag was prototyped and rejected in favour of this.) + +**The clean mechanism — reuse `ControlType::Pin`.** Pins are already their own control type (the `addPin` work). So the check is domain-neutral and needs no per-module declaration: enumerate every `Pin`-typed control's value across the whole tree; a value of `-1` is "unused" (ignored); any other value appearing on two controls is a conflict. Handle the list case: `RmtLedDriver.pins` is a comma list (`"18,19,20"`), so the enumerator expands list-of-pins controls too. + +**Where it runs.** Two sites, because a pin can be introduced at add *or* edit: +- `POST /api/modules` (add): if the new module's catalog/default pins collide, reject. +- `POST /api/control` (pin write): if setting a `Pin` control to a value already used elsewhere, reject (or soft-flag — see below). + +**Open decision (UX).** Conflict on add → reject with a clear message (`"GPIO 18 already used by RmtLed"`). Conflict on a live pin edit → reject is safest but blocks mid-reassignment (you can't swap two drivers' pins without a free intermediate); a **soft-flag** (accept, set a status warning) is friendlier for live editing. Leaning: reject on add, soft-flag on live edit. Product-owner call. + +**Hardware-limit tail (not covered by the pin check).** Pin-uniqueness rejects the common case but not the controller-count limit: the S3 has **2 I2S controllers** regardless of pins, so a 3rd mic on distinct pins passes the pin check yet fails `i2s_new_channel` at runtime. That tail is already handled — the platform I2S init returns false on failure (no panic, module stays `inited_=false`); verified live (4 pinned AudioModules → error spam, no crash). So scope = pin-uniqueness check + the existing graceful-degrade; don't try to make the pin check also model controller counts. + +**Related:** [§ Disabling a module should release its resources](#disabling-a-module-should-release-its-resources-not-just-stop-its-loop-backlog) — a disabled module freeing its pins is what lets the same GPIO be reassigned live without a conflict-reject. + ### Extract shared lane-driver scaffolding when the 3rd parallel backend lands (deferred) The LcdLedDriver (S3 LCD_CAM i80) and ParlioLedDriver (P4 Parlio) share ~245 of 362 lines, and their platform-side loopback capture+verify is ~100 lines byte-for-byte identical (`platform_esp32_parlio.cpp` even notes "The RX capture half is byte-for-byte identical" to the LCD one). The status-string lifecycle (`failBuf_` / `configErr_` / `clearFailBuf` / `clearConfigErr`) is triplicated across all three LED drivers (RMT/LCD/Parlio), ~60 lines. The branch deliberately extracted the *encoders* (`LcdSlots.h` shared by i80+Parlio, `RmtSymbol.h`, `PinList.h`) on the "extract when the second user lands" rule, but stopped at the lifecycle/loopback scaffolding. **Accepted for this merge** (the reviewer agreed driver-level extraction can wait): the duplication is in mechanical lifecycle/test scaffolding, not domain logic, and a DriverBase-level refactor touching three drivers is riskier than the duplication it removes. **Do it when the third parallel backend arrives** (16-lane widening, or Teensy FlexIO), at which point the pattern is proven three ways: (a) a `detail::` platform helper for capture+verify (the only per-peripheral difference is the transmit call, pass a callback, beside the already-shared `loopbackJumperOk`), and (b) a small owned-status helper or DriverBase members for the fail/config strings. Until then the cost is line count, not correctness. ### 1..8-pin LCD output (future) — would let S3 default to LCD -`LcdLedDriver` requires **all 8** i80 data lanes (`kExactLaneCount = true`, `LcdLedDriver.h`): the ESP-IDF `esp_lcd` i80 bus configures every data line of the bus width and rejects a partial set, so even a few WS2812 strands claim 8 GPIOs. That's why **S3 boards default to `RmtLedDriver`** in `boards.json` (RMT runs one channel per pin, 1..N) rather than LCD — a board with fewer than 8 strips can't sensibly use the LCD driver, and the 8-lane LCD bench wiring (`1,2,4,5,6,7,8,9`) collides with common peripheral pins (e.g. the mic on 4/5/6). A **1..8-pin LCD mode** (drive only the lanes named in `pins`, leave the rest unclaimed — matching Parlio's flexibility) would let the parallel S3 path run any lane count, at which point an S3 board entry could choose LCD vs RMT by intent. Parlio already does this (`kExactLaneCount = false`, 1..8 lanes), so the P4 default *is* the parallel driver. Until LCD gains the same flexibility, S3 stays on RMT by default. Low priority — RMT covers the few-strip S3 case today. +`LcdLedDriver` requires **all 8** i80 data lanes (`kExactLaneCount = true`, `LcdLedDriver.h`): the ESP-IDF `esp_lcd` i80 bus configures every data line of the bus width and rejects a partial set, so even a few WS2812 strands claim 8 GPIOs. That's why **S3 boards default to `RmtLedDriver`** in `deviceModels.json` (RMT runs one channel per pin, 1..N) rather than LCD — a board with fewer than 8 strips can't sensibly use the LCD driver, and the 8-lane LCD bench wiring (`1,2,4,5,6,7,8,9`) collides with common peripheral pins (e.g. the mic on 4/5/6). A **1..8-pin LCD mode** (drive only the lanes named in `pins`, leave the rest unclaimed — matching Parlio's flexibility) would let the parallel S3 path run any lane count, at which point an S3 board entry could choose LCD vs RMT by intent. Parlio already does this (`kExactLaneCount = false`, 1..8 lanes), so the P4 default *is* the parallel driver. Until LCD gains the same flexibility, S3 stays on RMT by default. Low priority — RMT covers the few-strip S3 case today. ### Classic ESP32 I2S 16-lane parallel LED driver (future) — beyond RMT's 8 channels @@ -224,7 +242,7 @@ Board preset catalog + upload (later, when the runtime config has real consumers - Pin reassignment requires reboot (ESP-IDF can't hot-reconfigure EMAC pins after `esp_eth_driver_install`); document the constraint. - A first attempt at this catalog landed and was rolled back during the firmware-vs-board separation work — the catalog only earns its keep once the device reads it, otherwise it's a docs-shaped file in the wrong place. -**Prior art — MoonLight's per-board pin database** ([ModuleIO.h](https://github.com/MoonModules/MoonLight/blob/main/src/MoonBase/Modules/ModuleIO.h)). MoonLight (our own project) already models exactly this for ~25 boards across ESP32-D0 / S3 / P4: a `pins[]` array of `{GPIO, usage, index}` plus board-level `maxPower`, `ethernetType`, `ethPhyAddr`, `ethClkMode`. Don't copy the file or paste its tables here — read it when building the catalog and write our own. Its `usage` enum enumerates the hardware functionalities a projectMM board preset *could* drive once the device-side consumers exist (each needs its own module/control before the corresponding `boards.json` / catalog field earns its keep — none exist today beyond `Board.board` + `Network.txPowerSetting`): +**Prior art — MoonLight's per-board pin database** ([ModuleIO.h](https://github.com/MoonModules/MoonLight/blob/main/src/MoonBase/Modules/ModuleIO.h)). MoonLight (our own project) already models exactly this for ~25 boards across ESP32-D0 / S3 / P4: a `pins[]` array of `{GPIO, usage, index}` plus board-level `maxPower`, `ethernetType`, `ethPhyAddr`, `ethClkMode`. Don't copy the file or paste its tables here — read it when building the catalog and write our own. Its `usage` enum enumerates the hardware functionalities a projectMM board preset *could* drive once the device-side consumers exist (each needs its own module/control before the corresponding `deviceModels.json` / catalog field earns its keep — none exist today beyond `System.deviceModel` + `Network.txPowerSetting`): - **LED output pins** — per-strip data GPIOs (1–16 outputs/board); the first real consumer (a Driver pin control) unblocks multi-output boards (QuinLED Dig-Quad/Octa, SE16, LightCrafter). - **Ethernet PHY config** — LAN8720/RMII (MDC/MDIO/CLK/power-pin/PHY-addr/clock-mode) vs W5500/SPI (MISO/MOSI/SCK/CS/IRQ); the consumer is the runtime `Network.eth_*` controls listed above, replacing the hardcoded Olimex pins. @@ -239,7 +257,7 @@ Board preset catalog + upload (later, when the runtime config has real consumers Sequencing rule (unchanged): each functionality lands a device-side control first, then its preset field; the catalog grows one earned consumer at a time, never as a speculative pin dump. -**Module variant + PSRAM within the classic-ESP32 family.** `getChipDescription()` and MoonLight's `ModuleIO.h` both report only the *core* family ("ESP32"), not the *module* (WROOM / WROVER / PICO) — so neither distinguishes whether a classic-ESP32 board has PSRAM. This matters for projectMM (whose large-LED story leans on PSRAM) in a way it doesn't for MoonLight: e.g. the **QuinLED Dig-Next-2 is built on an ESP32-PICO with 2 MB PSRAM**, but projectMM's `esp32` build has no `CONFIG_SPIRAM` (see the `#ifdef CONFIG_SPIRAM` gate in `platform_esp32.cpp::psramAlloc`), so it flashes and runs as a no-PSRAM device and hits the non-PSRAM fragmentation ceiling at large grids that the 2 MB would otherwise relieve. A PSRAM-enabled classic-ESP32 firmware variant (e.g. `esp32-psram`) would unlock it; `boards.json` could then carry a `psram` hint per board to steer the picker — but only once that variant exists (no consumer today). `boards.json` currently maps every classic board to the WiFi-only `esp32` variant, which is correct-but-unoptimised for PSRAM-bearing PICO boards. +**Module variant + PSRAM within the classic-ESP32 family.** `getChipDescription()` and MoonLight's `ModuleIO.h` both report only the *core* family ("ESP32"), not the *module* (WROOM / WROVER / PICO) — so neither distinguishes whether a classic-ESP32 board has PSRAM. This matters for projectMM (whose large-LED story leans on PSRAM) in a way it doesn't for MoonLight: e.g. the **QuinLED Dig-Next-2 is built on an ESP32-PICO with 2 MB PSRAM**, but projectMM's `esp32` build has no `CONFIG_SPIRAM` (see the `#ifdef CONFIG_SPIRAM` gate in `platform_esp32.cpp::psramAlloc`), so it flashes and runs as a no-PSRAM device and hits the non-PSRAM fragmentation ceiling at large grids that the 2 MB would otherwise relieve. A PSRAM-enabled classic-ESP32 firmware variant (e.g. `esp32-psram`) would unlock it; `deviceModels.json` could then carry a `psram` hint per board to steer the picker — but only once that variant exists (no consumer today). `deviceModels.json` currently maps every classic board to the WiFi-only `esp32` variant, which is correct-but-unoptimised for PSRAM-bearing PICO boards. ### Multi-layer composition (backlog) @@ -270,47 +288,15 @@ Minimum-scope fix before the move: Several `platform.h` APIs still use `(buf, len)` pairs where `std::span` would catch length/pointer mismatches at compile time. Concrete sites: `http_fetch_to_ota`, `improvProvisioningInit`, and friends. ~2 h including ripple updates to callers. Do alongside the next platform-API expansion (Windows socket port or POST /api/firmware streaming). -### Board injection + Improv as a general data injector (multi-commit, partially landed) - -Today the **firmware** the device runs is baked in at compile time (`MM_FIRMWARE_NAME`) and self-reported via SystemModule. The **board** the firmware runs on (Olimex Gateway, LOLIN D32, generic ESP32, …) the device cannot self-identify — no readable PCB ID on classic ESP32. MoonDeck deduces it from the firmware where the firmware uniquely identifies hardware (`esp32-eth*` ⇒ Olimex) and otherwise asks the user via a picker; the value lives in `scripts/moondeck.json` on the laptop only. The device's own UI and API have no concept of board. +### Improv-as-REST follow-ups -Goal: get the board key onto the device (persisted, reported via `/api/state`) so it survives between MoonDeck sessions and other clients (HomeAssistant, future MQTT, the device's own OTA-picker compatibility filter) can read it. Then make injection a first-class part of the install flow (web installer + Improv) so end users get the right board key without needing MoonDeck at all. - -Builds on existing plan items: see [Runtime board presets](#runtime-board-presets-multi-commit-partially-landed) for the longer-term goal of pin maps / module-config defaults living per-board on disk; this section is the prerequisite — getting the *key* onto the device — that unlocks that work. - -**Step 1 — Catalog + device module + MoonDeck push (DONE, commit `8a76be2`):** -- `docs/install/boards.json` is the single source of truth for valid board names. Schema landed as `[{ name, firmwares[] }]` — single `name` field (no key/label split; `name` is both identifier and display label), and `firmwares[0]` is the default the picker pre-selects (no separate `default_firmware` field; reorder the array to change the default). -- New `BoardModule` (code-wired child of SystemModule) carries one `board` Text control with the new `readonly` UI flag (display-only on the device's own web UI; HTTP writes still apply, that's how the injectors push). Persisted to `/.config/BoardModule.json` via the standard FilesystemModule path — no bespoke setter, no bespoke route. Injection is a regular `POST /api/control { "module":"Board", "control":"board", "value":"<name>" }`. -- MoonDeck loads `boards.json` at startup; `_deduce_board` is a catalog reverse-lookup (firmware → unique board, else ""); pushes the picked / deduced value to the device on every discover / refresh / dropdown change (`POST /api/push-board` MoonDeck endpoint → `_push_board_to_device` → device's `/api/control`). - -**Step 2 — Web installer board picker (DONE, UX-only):** -- Installer page's `install-picker.js` fetches `boards.json` same-origin at init and renders a board `<select>` above the existing release + firmware selects (opt-in via `enableBoardPicker:true`, default for the web installer; the on-device OTA picker passes `false` because the device already knows its board). -- Picking a board narrows the firmware dropdown to that board's `firmwares[]`, pre-selects the default (precedence: own-firmware-key > localStorage saved > board default = `firmwares[0]` > first compatible), and disables the firmware select when only one option remains. -- **No automatic device push.** Original plan called for a post-`PROVISIONED` HTTP fetch to inject the board, but ESP Web Tools 10.x emits `state-changed` on the internal `ImprovSerial` client (inside the dialog's shadow DOM), not as a bubbling DOM event on `<esp-web-install-button>`. Reading the EWT source (`src/install-dialog.ts`) confirmed there's no public event surface for "Improv just succeeded with URL X". (The pre-existing `devices.js` "Your devices" auto-add silently broke for the same reason — kept as best-effort for compatibility with future EWT releases that may re-expose the event.) Step 3 picks up the push on the Improv Web Serial channel — no DOM events, no mixed-content concern. -- Net Step 2 win: end users at the public installer pick "LOLIN D32" first and can't accidentally flash `esp32s3-n16r8` on it. MoonDeck remains the working board-injection path until Step 3 lands. - -**Step 3 — Improv RPC injection + full EWT replacement (DONE):** -- Device: `platform_esp32_improv.cpp::improvHandleSetBoard` dispatches vendor RPC `0xFE` (high end of the 0x80–0xFE vendor range). Payload is a length-prefixed UTF-8 board name (1..23 ASCII-printable bytes). Validates inline; on accept publishes via the same producer/consumer pattern as `SEND_WIFI_CREDENTIALS` (atomic ready flag + buffer); `ImprovProvisioningModule::loop1s()` picks it up on the scheduler thread and calls `BoardModule::setBoard()`. Same dirty-flag + debounced-save chain MoonDeck's HTTP write triggers. -- Browser: ESP Web Tools' install button was the blocker (OS-level SerialPort exclusivity + shadow-DOM event isolation). Dropped EWT entirely. New `docs/install/install-orchestrator.js` owns the SerialPort across flash (esptool-js) → WiFi provision (improv-wifi-serial-sdk) → SET_BOARD (raw frame bytes written via `port.writable.getWriter()` — the SDK's `writePacketToStream` is private as of 2.5.0). Custom install modal replaces EWT's dialog. -- Same root cause behind the `devices.js` "Your devices" auto-add — fixed in this commit. `myDevices.addProvisionedDevice(url, board)` now fires from the orchestrator's `onSuccess` callback, populating the bookmark list as designed. -- Vendor RPC dispatcher is the seed for the "Improv as a general data injector" forward-look. Step 4+ additions reuse the same pattern: new command ID + dispatcher case + orchestrator helper. -- The future contributor note: don't naively re-add ESP Web Tools — the orchestrator works because it owns the port. Putting EWT back means giving up Improv RPC injection. - -**Step 4 — Catalog grows (only when there's a consumer):** -- Once the device-side runtime board presets work ([Runtime board presets](#runtime-board-presets-multi-commit-partially-landed)) actually lands, `boards.json` entries gain optional `presets` fields (`ethernet.{phy, rmii_clock_gpio, mdio_gpio, …}`, `default_module_config.{Network, Layouts, …}`). MoonDeck pushes the relevant subset alongside the board key via a new `POST /api/system/board-preset` route. Until then, **don't add `presets` fields** — JSON shape grows when a consumer earns its keep, not before. - -**Improv as a general data injector (deferred until a second use case lands):** - -Step 3's custom RPC infrastructure is the seed. Plausible follow-on injectables: device name override (skip the `MM-CAFE` default), MQTT broker URL (when MQTT module ever lands), static IP, DMX universe assignments, pre-shared API token. **Don't generalise yet** — building a generic key-value Improv injector before there's a second use case is premature abstraction. If two or three more inject-at-install fields land with the same shape, *then* refactor ImprovProvisioningModule into a generic handler that dispatches by RPC command ID to registered callbacks. - -**Resolved risks (Steps 1-3 done):** -- ~~HTTP injection only works in dev~~ — Step 3's Improv RPC path works on HTTPS Pages, the dev/prod gap is closed. -- ~~ESP Web Tools' custom-Improv-RPC sending API~~ — EWT doesn't expose ImprovSerial; Step 3 replaced the install button with our own esptool-js + improv-wifi-serial-sdk orchestrator (`docs/install/install-orchestrator.js`). SDK's `writePacketToStream` was also private; raw frame bytes via `port.writable.getWriter()` solved that. -- ~~SET_BOARD command ID collision~~ — picked `0xFE` (high end of 0x80-0xFE vendor range), documented at the definition site in `platform_esp32_improv.cpp` and in `BoardModule.md`. Renegotiable if the spec ever expands into the high vendor range. +Device-model injection over Improv shipped as **"Improv = REST over serial"** (the `APPLY_OP` vendor RPC pushes the whole `deviceModels.json` entry over serial during install; the device runs the same apply-core the HTTP REST API does, on WiFi *and* eth-only firmware). That subsumed the earlier multi-step "board injection + Improv as a general data injector" plan — the general injector *is* APPLY_OP. What remains: **Open follow-up: per-control validator hook on `ControlDescriptor`.** `SystemModule::setDeviceModel()` validates ASCII-printable (rejecting control bytes, embedded NUL); the HTTP `POST /api/control` write path uses the generic `applyControlValue()` in `Control.cpp` which has no per-control validator and writes the raw bytes through. Acceptable today (HTTP-write callers source values from `deviceModels.json` which the project controls), but the right fix is a per-control validator hook on `ControlDescriptor` so any control can declare an inline validation function pointer. Worth doing when the next control with non-trivial input constraints lands, or when the threat model grows (an integration accepts arbitrary external input and POSTs it through). Sketch: `ControlDescriptor` grows a `bool (*validate)(const void*, size_t)` slot defaulting to nullptr; `applyControlValue` calls it before writing and returns `ApplyResult::Malformed` on false; `addText` / `addPassword` get an optional validator argument. Touches ~5 sites; no protocol change. -**Open follow-up: shared JS helpers across device-UI and web-installer.** `safeLocalGet` / `safeLocalSet` (3-line hostile-storage guards) are duplicated in `src/ui/install-picker.js` (device firmware, embedded as a C string via `embed_ui.cmake`) and `docs/install/devices.js` (web installer page, served from Pages). The two live in different build contexts so the shared extract isn't trivial — it'd need a new `src/ui/safe-storage.js` plus updates to: `embed_ui.cmake` (embed the new file), `ui_embedded.h` generator (new C array), HTTP server file routing (new path served), `release.yml` workflow staging, `preview_installer.py` staging. Five files for one 3-line helper is too much pre-merge. Worth doing when the next shared helper arrives — `relativeTime`, `formatBytes`, and the catalog-parse helper (`tryHttpInjectBoard` + `consumePendingBoardParam` share a fetch+find+iterate shape) are candidates. Two helpers earn the build-glue cost; one doesn't. +**Open follow-up: closed-loop APPLY_OP pacing (read-back ack + retry).** The installer paces APPLY_OP frames open-loop (`sendApplyOpFrame` waits a fixed ~120 ms between ops) rather than reading the device's ack back, because a Web Serial duplex read while the writer lock is held is awkward. The delay covers the worst-case single-buffer consume window with headroom, and each op is idempotent (a lost op re-applies cleanly on a re-flash), so this is robust today. The closed-loop upgrade — read the RPC response, retry once on error `0x82` (buffer busy) — removes the fixed delay (faster install) and makes op-loss impossible rather than improbable. Worth doing if a real install is ever observed dropping an op, or when the config push grows large enough that the cumulative fixed delay is noticeable. Touches only `install-orchestrator.js`. + +**Open follow-up: shared JS helpers across device-UI and web-installer.** `safeLocalGet` / `safeLocalSet` (3-line hostile-storage guards) are duplicated in `src/ui/install-picker.js` (device firmware, embedded as a C string via `embed_ui.cmake`) and `docs/install/devices.js` (web installer page, served from Pages). The two live in different build contexts so the shared extract isn't trivial — it'd need a new `src/ui/safe-storage.js` plus updates to: `embed_ui.cmake` (embed the new file), `ui_embedded.h` generator (new C array), HTTP server file routing (new path served), `release.yml` workflow staging, `preview_installer.py` staging. Five files for one 3-line helper is too much pre-merge. Worth doing when the next shared helper arrives — `relativeTime` and `formatBytes` are candidates. Two helpers earn the build-glue cost; one doesn't. --- @@ -440,7 +426,7 @@ The build IDF is `v6.1-dev-399-gd1b91b79b5`, a dev-branch snapshot (2025-11-05) ### Three-level device model: MCU → Board → Device (config provenance) -The model itself is now a shipped design — see [architecture.md § Config provenance](../architecture.md#config-provenance-mcu--board--device) (the three levels + the `txPowerSetting` example + "default only at the level that fixes it"). The catalog that carries it is [`install/boards.json`](../install/boards.json) ([schema](../install/README.md)). **MoonDeck device-profile save/restore is shipped** — capture a device's pin/peripheral config (`/api/save-profile`) and re-apply it after a reflash or to a clone (`/api/apply-profile`), stored per-device in `moondeck.json`. The remaining forward-looking pieces — a `devices.json`/MCU-layer split and annotated-pin images — stay gated by the sequencing rule (no catalog field ahead of a consumer). +The model itself is now a shipped design — see [architecture.md § Config provenance](../architecture.md#config-provenance-mcu--board--device) (the three levels + the `txPowerSetting` example + "default only at the level that fixes it"). The catalog that carries it is [`install/deviceModels.json`](../install/deviceModels.json) ([schema](../install/README.md)). **MoonDeck device-profile save/restore is shipped** — capture a device's pin/peripheral config (`/api/save-profile`) and re-apply it after a reflash or to a clone (`/api/apply-profile`), stored per-device in `moondeck.json`. The remaining forward-looking pieces — a `devices.json`/MCU-layer split and annotated-pin images — stay gated by the sequencing rule (no catalog field ahead of a consumer). ### Persistence overlay: partial-save / schema-change audit (backlog) diff --git a/docs/history/decisions.md b/docs/history/decisions.md index 72e0f64..318f3f8 100644 --- a/docs/history/decisions.md +++ b/docs/history/decisions.md @@ -695,3 +695,5 @@ The installer was reworked so a board catalog ([`boards.json`](../install/boards **A GPIO pin is its own control type (`ControlType::Pin`), not an overloaded int16.** Pins were first added as `addInt16` with a `-1..48` range, which the UI rendered as a *slider* — meaningless for a GPIO, and the cap wrongly excluded the P4's high pins (MDIO 52, clk 50). Dropping the range didn't help: the UI's `int16` case *always* draws a slider (an unbounded int16 falls back to a −100..200 percentage slider that Layer start/end positions rely on), so int16 couldn't be made to mean both "position slider" and "pin number." The fix is a dedicated `Pin` type: `int8_t` storage (one byte — a GPIO never exceeds ~54, and on a DRAM-scarce ESP32 the per-pin byte matters across many pin controls), −1 = unused, the UI always renders a plain number input keyed off the `"pin"` type string, and min/max are a server-side write-clamp guard only. Serializes/parses as a plain integer (same as int16). This also serves every future pin control (LED-driver clockPin/dcPin, GyroDriver SDA/SCL, board pins) — they migrate to `addPin` for free. Lesson: when one control type is doing two jobs with different UX (slider vs number), that's the smell for a new type, not a range hack; and pick the smallest storage that fits the domain (int8 for a pin). **`deviceName` (identity) vs `deviceModel` (product) vs board (bare PCB) — one term was doing three jobs.** "Board" had been overloaded to mean the per-unit network identity, the hardware product/catalog key, AND the bare PCB. Untangling it: `deviceName` is the **per-unit identity** — one string that drives mDNS (`<deviceName>.local`), the SoftAP name, and the DHCP hostname, so the device shows up under one name everywhere; it's RFC-1123-coerced (`sanitizeHostname`) because it becomes a hostname. `deviceModel` is the **hardware product** (the `deviceModels.json` catalog key, e.g. "projectMM testbench S3") — display-form, spaces allowed, never a hostname. "Device" is the umbrella noun; "board" now means **only the bare PCB**. This drove the BoardModule→SystemModule fold (the identity is core unit state, not a separate module), the `board`→`deviceModel` rename across catalog/installer/Improv (SET_BOARD→SET_DEVICE_MODEL, byte 0xFE unchanged), and the eth pin-map clarification (driver = firmware, pin map = firmware-seeded but **deviceModel-authoritative** so an Olimex entry can override). Lesson: when one noun answers three different questions ("what do I call this unit on the network?", "what product is it?", "what's the bare board?"), that's a naming smell — split it into the qualified terms, pick one umbrella word, and make the split visible in every layer (control names, RPC symbols, catalog keys, docs) so the three concepts can't re-merge. + +**"Improv = REST over serial" — one apply-core, two transports, and the testability that follows from extracting the hard part.** The deployed HTTPS installer couldn't configure a flashed device: a browser blocks an HTTPS page from POSTing to an `http://` device (mixed-content), and the `?deviceModel=` pull/handoff that replaced it only ran if the user opened that exact link. The fix reframed the problem — the installer already owns the USB serial port during provisioning, so push the config over it as the *same REST operations the HTTP API runs*: a new `APPLY_OP` (0xFC) Improv vendor RPC whose payload is `{"op":"add|set|clearChildren",…}`, the same JSON a `POST /api/modules`/`/api/control` body carries. On the device the op routes to **one transport-free apply-core** (`HttpServerModule::applyAddModule/applySetControl/applyClearChildren/applyOp`) the HTTP handlers also call, so a network REST call and a serial APPLY_OP execute identical code; the handlers became thin `switch(applyX())` → status-code mappers. This **deleted** the whole browser handoff (device-side catalog fetch, `?deviceModel=` decoration, the inject button) — a net subtraction — and works on Ethernet-only firmware once the Improv listener is decoupled from WiFi (the vendor RPCs compile in unconditionally; only `WIFI_SETTINGS`/`GET_WIFI_NETWORKS` stay `#ifndef MM_NO_WIFI`). Lesson 1: when a push is blocked by the *medium* (mixed-content on HTTPS), look for a medium you already control (the serial port mid-flash) instead of bolting on a fragile pull. Lesson 2 (the one with legs): the way to make it *provable* was to **extract the hard part into a pure core primitive** — the chunk reassembly + out-of-order/duplicate sequence guard moved from the ESP32-only handler into `src/core/ImprovOpReassembler.h` (header-only state machine, returns `Continue/Ready/Error`), and the JS frame builders into `docs/install/improv-frame.js` so `node:test` imports them without the orchestrator's browser deps. Both are *Complexity lives in core; domain modules stay simple* applied for testability: the device handler keeps only its serial I/O, the algorithm gets unit-tested on the desktop, and a format implemented three times (device C++, Python, JS) is pinned by **one shared golden vector** asserted in `test/python` + `test/js` — a contract test is the right answer to *forced* duplication no shared compilation target can remove. The reflex worth keeping: a hard mechanism buried in a platform `.cpp` that "can only be tested on hardware" is a smell — extract its pure core, and "rock solid proven" becomes a unit test instead of a bench session. diff --git "a/docs/history/plans/Plan-20260519 - Core Pipeline on Desktop \342\200\224 Lights on Panel via ArtNet.md" "b/docs/history/plans/Plan-20260519 - Core Pipeline on Desktop \342\200\224 Lights on Panel via ArtNet.md" new file mode 100644 index 0000000..23e7173 --- /dev/null +++ "b/docs/history/plans/Plan-20260519 - Core Pipeline on Desktop \342\200\224 Lights on Panel via ArtNet.md" @@ -0,0 +1,310 @@ +# Plan: Core Pipeline on Desktop — Lights on Panel via ArtNet + +## Context + +Item 1 from docs/plan.md. This is the first implementation commit of projectMM v3. No source code exists yet — only architecture docs and promoted specs. The goal is a working pipeline: GridLayout → RainbowEffect → ArtNetSendDriver → lights visible on a real hub75 panel via ArtNet receiver, running on macOS desktop. + +Agreed simplifications (from product owner): +- **No MappingLUT** — Grid is 1:1 unshuffled, no mapping table needed +- **No DriverGroup buffer** — reads directly from Layer buffer +- **No BlendMap** — single layer, 1:1 unshuffled +- **LightConfig minimal** — RGB only (channelsPerLight=3) +- **EffectBase** — start with thin class (may absorb into Layer later) +- **Scheduler drives everything** + +## File Tree + +``` +CMakeLists.txt # Root: C++20, -Wall -Wextra -Werror, core + platform libs, test target +src/ + platform/ + platform.h # mm::platform API: millis, micros, alloc, free, UdpSocket + desktop/ + platform_desktop.cpp # std::chrono, std::malloc, BSD sockets + core/ + types.h # nrOfLightsType (uint32_t), lengthType (int16_t), CoordCallback + color.h # hsvToRgb, scale8 — constexpr, integer, no floats + Control.h # ControlDescriptor (<16B on ESP32), ControlList<N> + MoonModule.h # Base class: lifecycle, controls, name, parent + Scheduler.h # Module registry, tick(), elapsed(), loop/20ms/1s dispatch + light/ + Buffer.h # uint8_t* buffer, move-only, allocate/free/clear/span + LayoutGroup.h # Groups layouts, forEachCoord with index offset + GridLayout.h # width×height×depth grid, row-major coordinates + EffectBase.h # Thin accessors to parent Layer + Layer.h # Owns buffer + effects list, render = run effects in order + RainbowEffect.h # Diagonal rainbow, BPM speed control + DriverGroup.h # Groups drivers, passes layer buffer to each + ArtNetSendDriver.h # ArtNet OpDmx packets over UDP, universe splitting, FPS limit + main.cpp # Wire pipeline, run scheduler loop +test/ + CMakeLists.txt # Test executable + doctest.h # Vendored header-only test framework + test_color.cpp # hsvToRgb at cardinal hues, scale8 + test_buffer.cpp # Allocate, clear, move, double-free safety + test_moonmodule.cpp # Lifecycle, control binding + test_grid_layout.cpp # Coordinate iteration, row-major order, 3D + test_rainbow.cpp # Buffer contains expected hsvToRgb values + test_artnet_packet.cpp # Header format, byte order, universe splitting + test_pipeline.cpp # Full pipeline: grid→layer→rainbow→artnet packets +``` + +17 source files, 7 test files, 2 CMake files. All MoonModules are single `.h` files. Only `platform_desktop.cpp` is a `.cpp` file. + +## Implementation Steps + +### Step 1: CMake + Platform + Types + +Files: `CMakeLists.txt`, `src/platform/platform.h`, `src/platform/desktop/platform_desktop.cpp`, `src/core/types.h`, `test/CMakeLists.txt`, `test/doctest.h` + +- Root CMake: C++20, warnings as errors, `mm_core` (INTERFACE lib — all headers), `mm_platform` (desktop .cpp), `mmv3` executable, test target +- Platform API in `mm::platform`: `millis()`, `micros()`, `alloc(size)`, `free(ptr)`, `UdpSocket` class (open/send/close) +- Desktop: `std::chrono::steady_clock`, `std::malloc`/`std::free`, BSD sockets (`socket`, `sendto`, `inet_pton`) +- Types: `nrOfLightsType = uint32_t`, `lengthType = int16_t` (desktop uses larger types) +- Vendor `doctest.h` into `test/` + +### Step 2: Color Math + +Files: `src/core/color.h`, `test/test_color.cpp` + +```cpp +namespace mm { + struct RGB { uint8_t r, g, b; }; + constexpr RGB hsvToRgb(uint8_t h, uint8_t s, uint8_t v); // 6-sector integer + constexpr uint8_t scale8(uint8_t val, uint8_t scale); +} +``` + +RGB struct is a return type only — buffers remain `uint8_t*`. Tests: h=0→red, h=85→green, h=170→blue, s=0→white, v=0→black, scale8(255,128)≈127. + +### Step 3: Control + MoonModule + +Files: `src/core/Control.h`, `src/core/MoonModule.h`, `test/test_moonmodule.cpp` + +```cpp +namespace mm { + enum class ControlType : uint8_t { Uint8, Uint16, Bool, Text }; + + struct ControlDescriptor { // <16 bytes on ESP32 (32-bit pointers) + void* ptr; // pointer to class variable + const char* name; // flash/constexpr string + ControlType type; + uint8_t min, max; + }; + + template<size_t Capacity = 8> + struct ControlList { ... }; + + class MoonModule { + public: + virtual ~MoonModule() = default; + virtual void setup() {} + virtual void loop() {} + virtual void loop20ms() {} + virtual void loop1s() {} + virtual void teardown() {} + virtual void onBuildControls() {} + virtual void onAllocateMemory() {} + const char* name() const; + MoonModule* parent() const; + void setParent(MoonModule* p); + protected: + ControlList<8> controls_; + template<typename T> + void addControl(const char* name, T& var, T min = {}, T max = {}); + private: + const char* name_ = nullptr; + MoonModule* parent_ = nullptr; + }; +} +``` + +Tests: concrete subclass with uint8_t control, verify pointer binding, lifecycle calls. + +### Step 4: Buffer + +Files: `src/light/Buffer.h`, `test/test_buffer.cpp` + +```cpp +namespace mm { + class Buffer { + public: + bool allocate(nrOfLightsType nrOfLights, uint8_t channelsPerLight); + void free(); + void clear(); + uint8_t* data(); + std::span<uint8_t> span(); + nrOfLightsType count() const; + uint8_t channelsPerLight() const; + size_t bytes() const; + // Move-only + }; +} +``` + +Tests: allocate 256×3, verify bytes/count/channelsPerLight, clear zeros, move leaves source null, double-free safe. + +### Step 5: LayoutGroup + GridLayout + +Files: `src/light/LayoutGroup.h`, `src/light/GridLayout.h`, `test/test_grid_layout.cpp` + +```cpp +namespace mm { + using CoordCallback = void(*)(void* ctx, nrOfLightsType idx, lengthType x, lengthType y, lengthType z); + + class LayoutBase : public MoonModule { + virtual nrOfLightsType lightCount() const = 0; + virtual void forEachCoord(CoordCallback cb, void* ctx) const = 0; + }; + + class LayoutGroup : public MoonModule { + void addLayout(LayoutBase* layout); + nrOfLightsType totalLightCount() const; + void forEachCoord(CoordCallback cb, void* ctx) const; + }; + + class GridLayout : public LayoutBase { + lengthType width = 16, height = 16, depth = 1; + }; +} +``` + +Tests: 4×4×1 yields 16 coords row-major, 2×2×2 yields 8, totalLightCount with multiple layouts. + +### Step 6: Scheduler + +Files: `src/core/Scheduler.h` + +```cpp +namespace mm { + class Scheduler { + public: + void addModule(MoonModule* mod); + void setup(); // setup → onBuildControls → onAllocateMemory on each + void tick(); // loop on all, loop20ms/loop1s when due + void teardown(); + uint32_t elapsed() const; + private: + std::array<MoonModule*, 32> modules_{}; + uint8_t moduleCount_ = 0; + }; +} +``` + +Tested via integration test. Fixed-capacity array, no heap. + +### Step 7: Layer + EffectBase + RainbowEffect + +Files: `src/light/EffectBase.h`, `src/light/Layer.h`, `src/light/RainbowEffect.h`, `test/test_rainbow.cpp` + +```cpp +namespace mm { + class EffectBase : public MoonModule { + // Accessors delegate to parent Layer + uint8_t* buffer(); + lengthType width() const; + lengthType height() const; + // ... + }; + + class Layer : public MoonModule { + void setLayoutGroup(LayoutGroup* lg); + void addEffect(EffectBase* effect); + void onAllocateMemory() override; // allocate buffer from layout dims + void loop() override; // run each effect's loop() + Buffer& buffer(); + lengthType width() const; + // elapsed_ updated from platform::millis() at start of loop() + }; + + class RainbowEffect : public EffectBase { + uint8_t speed = 60; // BPM + void loop() override; + // hue = (x + y) * scale + elapsed_phase, hsvToRgb(hue, 255, 255) + }; +} +``` + +Tests: 4×4 grid + rainbow at elapsed=0, verify pixel (0,0) matches hsvToRgb(0,255,255), buffer non-zero. + +### Step 8: DriverGroup + ArtNetSendDriver + +Files: `src/light/DriverGroup.h`, `src/light/ArtNetSendDriver.h`, `test/test_artnet_packet.cpp` + +```cpp +namespace mm { + class DriverBase : public MoonModule { + virtual void setSourceBuffer(Buffer* buf) = 0; + }; + + class DriverGroup : public MoonModule { + void addDriver(DriverBase* driver); + void setLayer(Layer* layer); // reads layer buffer directly + void loop() override; // calls each driver's loop() + }; + + class ArtNetSendDriver : public DriverBase { + char ip[16] = "192.168.1.70"; + uint16_t universeStart = 0; + uint8_t fps = 50; + // buildPacket(buf, universe, data, len) — testable without network + // sendUniverse() calls buildPacket then socket.send + }; +} +``` + +`buildPacket()` is a separate method for testability (writes to byte array, no network I/O). + +Tests: header "Art-Net\0", OpCode 0x5000 (LE), ProtVer 14 (BE), sequence, universe (LE), length (BE), data at offset 18. Universe splitting: 256 RGB lights → 2 universes. + +ArtNet byte order details: +- OpCode at offset 8: little-endian (0x00, 0x50) +- ProtVer at offset 10: big-endian (0x00, 0x0e) +- Universe at offset 14: little-endian +- Length at offset 16: big-endian + +### Step 9: main.cpp + Integration Test + +Files: `src/main.cpp`, `test/test_pipeline.cpp` + +```cpp +int main() { + mm::Scheduler scheduler; + mm::LayoutGroup layoutGroup; + mm::GridLayout grid; + layoutGroup.addLayout(&grid); + + mm::Layer layer; + layer.setLayoutGroup(&layoutGroup); + mm::RainbowEffect rainbow; + layer.addEffect(&rainbow); + + mm::DriverGroup driverGroup; + driverGroup.setLayer(&layer); + mm::ArtNetSendDriver artnet; + driverGroup.addDriver(&artnet); + + scheduler.addModule(&layoutGroup); + scheduler.addModule(&grid); + scheduler.addModule(&layer); + scheduler.addModule(&rainbow); + scheduler.addModule(&driverGroup); + scheduler.addModule(&artnet); + + scheduler.setup(); + while (true) scheduler.tick(); + scheduler.teardown(); +} +``` + +All objects stack-allocated. Only the Buffer inside Layer uses `platform::alloc`. + +Integration test: create full pipeline, run a few ticks, use `buildPacket()` to verify ArtNet packets contain non-zero rainbow data and correct universe count. + +## Verification + +1. `cmake -B build && cmake --build build` — zero warnings +2. `cd build && ctest --output-on-failure` — all 7 test files pass +3. `./build/mmv3` — runs, sends ArtNet packets to 192.168.1.70 +4. Lights visible on hub75 panel via ArtNet receiver — animated rainbow +5. Platform boundary check: no `#ifdef` or platform includes outside `src/platform/` diff --git a/docs/history/plans/Plan-20260519 - ESP32 Deployment.md b/docs/history/plans/Plan-20260519 - ESP32 Deployment.md new file mode 100644 index 0000000..deab2d0 --- /dev/null +++ b/docs/history/plans/Plan-20260519 - ESP32 Deployment.md @@ -0,0 +1,135 @@ +# Plan: ESP32 Deployment + +## Context + +Item 2 from docs/plan.md. The core pipeline works on desktop (GridLayout → RainbowEffect → ArtNet → lights on panel). Now deploy the same pipeline on ESP32dev to prove the platform abstraction works. No System module — that comes after the UI. + +## What needs to happen + +1. ESP32 platform implementations (timing, alloc, UDP socket) +2. ESP-IDF project wrapper (`esp32/`) +3. Refactor `src/main.cpp` to share pipeline wiring between desktop and ESP32 entry points +4. WiFi init in ESP32 entry point +5. FreeRTOS watchdog yield + +## Files + +``` +src/platform/ + platform.h # MODIFY: add yield() + desktop/ + platform_desktop.cpp # MODIFY: add yield(), move UdpSocket::close to use ::close directly + main_desktop.cpp # NEW: int main() with SIGINT handler + esp32/ + platform_esp32.cpp # NEW: esp_timer, heap_caps_malloc, lwIP sockets, vTaskDelay +src/ + main.cpp # MODIFY: extract mm_main(volatile bool&), add platform::yield() +esp32/ + CMakeLists.txt # NEW: ESP-IDF project root + main/ + CMakeLists.txt # NEW: idf_component_register + main.cpp # NEW: app_main, WiFi init, calls mm_main + wifi_credentials.example.h # NEW: template for SSID/password +CMakeLists.txt # MODIFY: add main_desktop.cpp to mmv3 executable +.gitignore # MODIFY: add esp32 build artifacts, wifi_credentials.h +``` + +## Implementation Steps + +### Step 1: Add `platform::yield()` and refactor entry points + +Add `void yield()` to `platform.h`. Desktop: `sched_yield()` or no-op. + +Refactor `src/main.cpp`: extract `void mm_main(volatile bool& keepRunning)` with the pipeline wiring + scheduler loop + `platform::yield()` call each iteration. No signal handling, no `int main()`. + +Create `src/platform/desktop/main_desktop.cpp`: +```cpp +#include <csignal> +extern void mm_main(volatile bool& keepRunning); +static volatile bool running = true; +static void signalHandler(int) { running = false; } +int main() { + std::signal(SIGINT, signalHandler); + mm_main(running); + return 0; +} +``` + +Update root `CMakeLists.txt`: +```cmake +add_executable(mmv3 src/main.cpp src/platform/desktop/main_desktop.cpp) +``` + +Verify: desktop build + tests still pass. + +### Step 2: ESP32 platform implementation + +Create `src/platform/esp32/platform_esp32.cpp`: +- `millis()` → `esp_timer_get_time() / 1000` +- `micros()` → `esp_timer_get_time()` +- `alloc()` → `heap_caps_malloc(MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT)` with fallback to `MALLOC_CAP_8BIT` +- `free()` → `heap_caps_free()` +- `UdpSocket` → same BSD socket code as desktop but with `lwip/sockets.h` +- `yield()` → `vTaskDelay(pdMS_TO_TICKS(1))` + +### Step 3: ESP-IDF project wrapper + +`esp32/CMakeLists.txt`: +```cmake +cmake_minimum_required(VERSION 3.16) +include($ENV{IDF_PATH}/tools/cmake/project.cmake) +project(mmv3) +``` + +`esp32/main/CMakeLists.txt`: +```cmake +idf_component_register( + SRCS "main.cpp" "../../src/main.cpp" "../../src/platform/esp32/platform_esp32.cpp" + INCLUDE_DIRS "../../src" +) +target_compile_options(${COMPONENT_LIB} PRIVATE -Wall -Wextra -Werror) +``` + +### Step 4: ESP32 entry point + +`esp32/main/main.cpp`: +- NVS init +- WiFi STA connect (hardcoded credentials from `wifi_credentials.h`) +- Wait for IP +- Call `mm_main(running)` + +`esp32/main/wifi_credentials.example.h`: +```cpp +#pragma once +#define WIFI_SSID "your_ssid" +#define WIFI_PASS "your_password" +``` + +Actual `wifi_credentials.h` is gitignored. + +### Step 5: sdkconfig.defaults + .gitignore + +`esp32/sdkconfig.defaults`: +- `CONFIG_ESP_MAIN_TASK_STACK_SIZE=8192` +- `CONFIG_SPIRAM=y` + `CONFIG_SPIRAM_USE_CAPS_ALLOC=y` +- `CONFIG_COMPILER_CXX_EXCEPTIONS=n` +- `CONFIG_LWIP_SO_REUSE=y` + +`.gitignore` additions: `esp32/build/`, `esp32/sdkconfig`, `esp32/sdkconfig.old`, `esp32/main/wifi_credentials.h` + +## Verification + +1. `cmake --build build` — desktop still builds, zero warnings +2. `cd build && ctest --output-on-failure` — all tests pass +3. `./build/test/mm_scenarios` — scenario passes +4. `python scripts/check/check_platform_boundary.py` — passes +5. `cd esp32 && idf.py set-target esp32 && idf.py build` — ESP32 builds +6. Flash + monitor: WiFi connects, serial shows "mmv3 running", ArtNet packets arrive at receiver +7. Lights visible on hub75 panel from ESP32 + +## Notes + +- WiFi credentials are hardcoded for this deployment. Proper WiFi MoonModule comes later. +- Grid defaults to 128x128 (fits in PSRAM). For ESP32 without PSRAM, pass smaller dimensions. +- ESP-IDF v5.1+ required for C++20 support. +- The `volatile bool` for keepRunning is sufficient — no signal handler on ESP32, no cross-thread access. diff --git a/docs/history/plans/Plan-20260519 - HTTP Server + WebSocket + Web UI (Items 5+6a).md b/docs/history/plans/Plan-20260519 - HTTP Server + WebSocket + Web UI (Items 5+6a).md new file mode 100644 index 0000000..d0d5134 --- /dev/null +++ b/docs/history/plans/Plan-20260519 - HTTP Server + WebSocket + Web UI (Items 5+6a).md @@ -0,0 +1,162 @@ +# Plan: HTTP Server + WebSocket + Web UI (Items 5+6a) + +## Context + +Add HTTP server MoonModule, WebSocket for real-time state push, and a basic Web UI with tree view and auto-rendered controls. Enables effect/modifier switching from the browser and live scenario testing via HTTP API. Port 80. + +## Files + +``` +src/platform/ + platform.h # MODIFY: add TcpServer + TcpConnection + desktop/ + platform_desktop.cpp # MODIFY: BSD socket implementations + esp32/ + platform_esp32.cpp # MODIFY: lwIP socket implementations (same API) +src/core/ + Scheduler.h # MODIFY: add moduleCount()/module(i) accessors + HttpServerModule.h # NEW: HTTP + WebSocket + REST API + JSON state +src/light/ + LayoutGroup.h # MODIFY: add layout(i) accessor + Layer.h # MODIFY: add effectCount/effect(i)/modifierCount/modifier(i) + DriverGroup.h # MODIFY: add driverCount/driver(i) +src/ui/ + index.html # NEW: minimal HTML5 with sidebar + cards + app.js # NEW: WebSocket, tree render, control render, debounce + style.css # NEW: dark theme per ui-spec +src/main.cpp # MODIFY: wire HttpServerModule +test/ + test_http_server.cpp # NEW: JSON state, control setter, HTTP parse + CMakeLists.txt # MODIFY: add test +``` + +## Implementation Steps + +### Step 1: Platform — TcpServer + TcpConnection + +Add to `platform.h`: +```cpp +class TcpConnection { + explicit TcpConnection(int fd); + bool valid() const; + int read(uint8_t* buf, size_t maxLen); // non-blocking, -1 = nothing, 0 = closed + bool write(const uint8_t* data, size_t len); + void close(); + // Move-only +}; + +class TcpServer { + bool open(uint16_t port); + TcpConnection accept(); // non-blocking + void close(); +}; +``` + +Desktop: BSD sockets with `O_NONBLOCK`, `SO_REUSEADDR`, `listen(backlog=8)`. +ESP32: same lwIP socket API. + +### Step 2: Scheduler + container accessors + +`Scheduler.h`: add `moduleCount()`, `module(i)` — one-liners. + +`LayoutGroup.h`: add `layout(i)` accessor. +`Layer.h`: add `effectCount()`, `effect(i)`, `modifierCount()`, `modifier(i)`. +`DriverGroup.h`: add `driverCount()`, `driver(i)`. + +### Step 3: HttpServerModule + +`src/core/HttpServerModule.h` — single-file MoonModule, ~400 lines. + +- Control: `port` (uint16_t, default 80) +- `setup()`: open TcpServer on port +- `loop20ms()`: accept connection, parse HTTP, route, respond, close (or upgrade to WebSocket) +- `loop1s()`: push state JSON to WebSocket clients + +**REST API:** +- `GET /` → index.html +- `GET /app.js` → app.js +- `GET /style.css` → style.css +- `GET /api/state` → JSON module tree with controls +- `POST /api/control` → set value: `{"module":"Noise","control":"scale","value":8}` + +**JSON state format:** +```json +{"modules": [ + {"name": "LayoutGroup", "controls": [], "children": [ + {"name": "Grid", "controls": [{"name":"width","type":"uint8","value":128,"min":1,"max":127}]} + ]}, + {"name": "Layer", "controls": [], "children": [ + {"name": "Noise", "controls": [{"name":"scale","type":"uint8","value":4,"min":1,"max":32}]}, + {"name": "Mirror", "controls": [{"name":"mirrorX","type":"bool","value":true}]} + ]}, + {"name": "DriverGroup", "controls": [], "children": [ + {"name": "ArtNet", "controls": [{"name":"ip","type":"text","value":"192.168.1.70"}]} + ]} +]} +``` + +**Tree walking:** HttpServerModule gets explicit pointers (`setLayoutGroup`, `setLayer`, `setDriverGroup`) — concrete, type-safe, no virtual children interface needed. + +**WebSocket:** RFC 6455 upgrade on `GET /ws`. SHA-1 + base64 for handshake (~60 lines). Fixed array of 4 `TcpConnection` clients. State push via text frames in `loop1s()`. Server→client only; client mutations via REST POST. + +**Static file serving:** `fopen`/`fread` from `uiPath_` (configurable, default `"src/ui"`). Content-type by extension. + +### Step 4: Web UI + +`src/ui/index.html` (~80 lines): +- Status bar with WebSocket dot (green/gray) +- Side nav listing root modules +- Main area for module cards with controls + +`src/ui/app.js` (~200 lines): +- `connectWs()` → `ws://host/ws`, auto-reconnect +- `handleState(data)` → selective DOM update (not full rebuild) +- `renderControl(ctrl)` → slider (uint8 with min/max), checkbox (bool), text input (text), number (uint16) +- `sendControl(module, control, value)` → POST /api/control +- 150ms slider debounce, 500ms text debounce +- `dragTs` per control to prevent WS updates overwriting active drags + +`src/ui/style.css` (~100 lines): +- Dark theme: bg `#1a1a2e`, text `#e0e0e0`, accent `#a78bfa` +- Module cards, responsive sidebar, system-ui font + +### Step 5: Wire into main.cpp + +```cpp +mm::HttpServerModule httpServer; +httpServer.setName("HttpServer"); +httpServer.setScheduler(&scheduler); +httpServer.setLayoutGroup(&layoutGroup); +httpServer.setLayer(&layer); +httpServer.setDriverGroup(&driverGroup); +scheduler.addModule(&httpServer); +``` + +Print `HTTP server → http://localhost:80` at startup. + +### Step 6: Tests + +`test/test_http_server.cpp`: +- JSON state contains expected module names and control values +- Control setter: set via name, verify bound variable changed +- HTTP request line parsing +- WebSocket accept key computation (SHA-1 + base64) + +## What's NOT in this commit + +- 3D WebGL preview (5+6b) +- Type picker / module creation from UI +- Drag reorder +- Config persistence +- Module add/remove from UI +- ESP32 asset embedding (serve from disk only) + +## Verification + +1. `cmake --build build` — zero warnings +2. `ctest --output-on-failure` — all tests pass +3. `./build/mmv3` → open http://localhost:80 → see module tree with controls +4. Change effect control (e.g. Noise scale slider) → ArtNet output changes +5. WebSocket connection dot is green +6. Platform boundary check passes +7. ESP32 build still compiles (TcpServer added to esp32 platform too) diff --git a/docs/history/plans/Plan-20260519 - Noise Effect + Mirror Modifier + MappingLUT.md b/docs/history/plans/Plan-20260519 - Noise Effect + Mirror Modifier + MappingLUT.md new file mode 100644 index 0000000..3d91c87 --- /dev/null +++ b/docs/history/plans/Plan-20260519 - Noise Effect + Mirror Modifier + MappingLUT.md @@ -0,0 +1,113 @@ +# Plan: Noise Effect + Mirror Modifier + MappingLUT + +## Context + +Items 3+4 from plan.md. Add a second effect (Noise) and the first modifier (Mirror kaleidoscope) with the full MappingLUT. Proves effect variety, modifiers, 1:N mapping, and LUT rebuild. + +## Implementation Steps + +### Step 1: NoiseEffect + +File: `src/light/NoiseEffect.h` (NEW), `test/test_noise.cpp` (NEW) + +Same pattern as RainbowEffect. Controls: `scale` (uint8_t, 1-32, default 4), `speed` (uint8_t, 0-255, default 50). Hash-based value noise: `(x*1619 + y*31337 + t*6271)` with bilinear interpolation and smoothstep. Output: `hsvToRgb(noiseValue, 200, 255)`. All integer math. + +Tests: non-zero output, spatial variation, different from rainbow. + +Promote `docs/moonmodules_draft/light/effects/NoiseEffect.md` → `docs/moonmodules/light/effects/NoiseEffect.md`. + +### Step 2: MappingLUT + +File: `src/light/MappingLUT.h` (NEW), `test/test_mapping_lut.cpp` (NEW) + +Simplified CSR format (skip union/bitpacking for now): +- `oneToOneMapping_` flag — skip LUT when logical == physical +- `offsets_[logicalCount + 1]` + `destinations_[]` flat arrays for 1:N +- `setOneToOne(count)`, `build(logicalCount, maxDest)`, `setMapping(idx, physicals, count)`, `finalize()` +- `forEachDestination(logicalIdx, callback)` — hot-path accessor +- Allocated via `platform::alloc` + +Tests: default is oneToOne, build with known 1:N mappings, verify destinations, free/rebuild. + +### Step 3: ModifierBase + MirrorModifier + +Files: `src/light/ModifierBase.h` (NEW), `src/light/MirrorModifier.h` (NEW), `test/test_mirror.cpp` (NEW) + +ModifierBase: +```cpp +virtual void logicalDimensions(physW, physH, physD, &logW, &logH, &logD) const = 0; +virtual void mapToPhysical(lx, ly, lz, physW, physH, physD, + nrOfLightsType* outPhysicals, nrOfLightsType& outCount, + nrOfLightsType maxOut) const = 0; +``` + +Output array pattern (not template callback) — max 8 entries on stack for XYZ mirror. + +MirrorModifier: +- Controls: `mirrorX` (bool, true), `mirrorY` (bool, true), `mirrorZ` (bool, false) +- `logicalDimensions`: halves mirrored axes with ceiling division +- `mapToPhysical`: nested iteration over mirror combinations, deduplication for centre-axis lights +- Physical index: `pz * physW * physH + py * physW + px` (matches GridLayout row-major) + +Tests: logical dimensions (even/odd), corner pixel → 4 positions, centre pixel dedup, no-mirror → 1 position. + +Promote draft spec → `docs/moonmodules/light/modifiers/MirrorModifier.md`. + +### Step 4: Layer — modifier support + rebuildLUT + +File: `src/light/Layer.h` (MODIFY) + +- Add `std::array<ModifierBase*, 4> modifiers_` + `addModifier()` +- Add `MappingLUT lut_` member +- Track logical vs physical dimensions separately +- `width()`/`height()`/`depth()` return logical (effects see logical space) +- Add `physicalLightCount()` accessor +- `rebuildLUT()`: if no modifiers → `lut_.setOneToOne()`, logical == physical. If modifier → compute logical dims, allocate CSR, iterate logical coords calling `mapToPhysical`, fill LUT. +- `onAllocateMemory()`: call `rebuildLUT()`, allocate buffer to logical size +- Propagate lifecycle to modifiers (same as effects) +- Expose `const MappingLUT& lut() const` + +### Step 5: BlendMap + +File: `src/light/BlendMap.h` (NEW), `test/test_blend_map.cpp` (NEW) + +Free function: `void blendMap(const Buffer& src, Buffer& dst, const MappingLUT& lut, uint8_t channelsPerLight)` + +- If oneToOne: memcpy (fast path, but DriverGroup skips blendMap entirely in this case) +- Otherwise: clear dst, iterate logical lights, for each destination write src channels with additive clamping + +Tests: oneToOne copies, 1:N mapping produces duplicated pixels, additive clamping. + +### Step 6: DriverGroup — output buffer + +File: `src/light/DriverGroup.h` (MODIFY) + +- Add `Buffer outputBuffer_` +- `onAllocateMemory()`: if `layer_->lut().isOneToOne()`, pass layer buffer to drivers (current behavior). Otherwise allocate outputBuffer_ to physical size, pass to drivers. +- `loop()`: if LUT active, call `blendMap()` before driver loops +- Add `physicalLightCount` from `layer_->physicalLightCount()` + +### Step 7: Wire + scenarios + +File: `src/main.cpp` (MODIFY) + +Add MirrorModifier to the pipeline. Keep both Rainbow and Noise as effects (Noise runs after Rainbow, overwriting — proves second effect works). + +File: `test/scenario_runner.cpp` (MODIFY) — add NoiseEffect, MirrorModifier to registry. + +File: `test/scenarios/mirror.json` (NEW) — grid with mirror, verify pipeline works with LUT. + +### Step 8: Documentation + +- Promote NoiseEffect and MirrorModifier specs from draft +- Update docs/testing.md with new test sections +- Add test links to promoted specs + +## Verification + +1. `cmake --build build` — zero warnings +2. `ctest --output-on-failure` — all tests pass (existing + new) +3. `./build/test/mm_scenarios` — all scenarios pass including mirror +4. Platform boundary check passes +5. Desktop: rainbow+noise with mirror visible on ArtNet panel (kaleidoscope pattern) +6. ESP32: rebuild and flash — same pipeline with mirror works on device diff --git a/docs/history/plans/Plan-20260520 - 3D WebGL Preview (Item 5b).md b/docs/history/plans/Plan-20260520 - 3D WebGL Preview (Item 5b).md new file mode 100644 index 0000000..c256738 --- /dev/null +++ b/docs/history/plans/Plan-20260520 - 3D WebGL Preview (Item 5b).md @@ -0,0 +1,122 @@ +# Plan: 3D WebGL Preview (Item 5b) + +## Context + +Add a PreviewDriver that streams binary light data via WebSocket, and a 3D point-cloud renderer in the browser UI. This gives visual feedback without needing hardware — see the noise/rainbow/mirror pattern in 3D in the browser. + +## Design + +### How the PreviewDriver sends binary frames + +The PreviewDriver is a DriverBase (like ArtNetSendDriver) — it reads from the source buffer in `loop()`. But it needs to send data to WebSocket clients, which are owned by HttpServerModule. + +Options: (a) PreviewDriver gets a pointer to HttpServerModule, (b) a shared broadcast function, (c) PreviewDriver builds the frame and a callback sends it. + +Cleanest: **HttpServerModule exposes a `broadcastBinary(data, len)` method.** The PreviewDriver gets a pointer to HttpServerModule (set in main.cpp wiring). This is similar to how DriverGroup gets a Layer pointer. HttpServerModule is a system service — drivers that need network output reference it. + +But wait — this couples a light-domain driver to a core module. That's the same issue we just fixed. Better: add a generic broadcast interface. But that's over-engineering for one use case. + +Pragmatic approach: **PreviewDriver stores a function pointer** `void(*)(const uint8_t*, size_t)` set by the caller. HttpServerModule provides the function. No #include needed between them — just a function pointer set in main.cpp. + +Actually even simpler: **HttpServerModule already runs loop1s() for state push. Add binary preview push to the same loop.** HttpServerModule already has access to the Scheduler, which has the Layer. It can read the output buffer directly and send binary frames. No PreviewDriver needed — just a toggle control on HttpServerModule. + +Wait — that violates the architecture. The DriverGroup owns the output buffer and the blend+map step. HttpServerModule reading the buffer directly bypasses the pipeline. + +Best approach: **PreviewDriver as a real driver in DriverGroup.** It builds the binary frame in its `loop()` and stores it in a member buffer. HttpServerModule checks for this buffer in its `loop20ms()` (or a faster rate) and broadcasts it. The connection: HttpServerModule finds the PreviewDriver via the Scheduler's generic `childCount()`/`child()` tree — no light domain includes needed. + +Actually this is too complex. Let me go with the simplest thing that works: + +**PreviewDriver builds frames. HttpServerModule broadcasts them.** They're connected via a shared pointer to a frame buffer. Main.cpp sets it up. + +Simplest concrete approach: +1. A global/shared `struct PreviewFrame { uint8_t* data; size_t len; bool ready; }` +2. PreviewDriver writes to it in `loop()` +3. HttpServerModule reads from it in `loop20ms()` and broadcasts + +This is essentially a single-slot producer/consumer with no lock (single-threaded scheduler). + +## Files + +``` +src/light/PreviewDriver.h # NEW: builds binary preview frames +src/core/HttpServerModule.h # MODIFY: add binary frame broadcast +src/ui/app.js # MODIFY: add WebGL 3D renderer +src/ui/style.css # MODIFY: add canvas styling +src/main.cpp # MODIFY: wire PreviewDriver +``` + +## Implementation Steps + +### Step 1: PreviewFrame shared struct + +Add to a small header or inline in PreviewDriver: +```cpp +struct PreviewFrame { + uint8_t* data = nullptr; + size_t len = 0; + bool ready = false; +}; +``` + +Allocated once at setup, reused every frame. Single writer (PreviewDriver), single reader (HttpServerModule). + +### Step 2: PreviewDriver + +`src/light/PreviewDriver.h` — single-file MoonModule, DriverBase. + +- Control: `fps` (uint8_t, default 20, range 1-60) +- `setup()`: allocate frame buffer (7 header + w*h*d*3 data) +- `loop()`: FPS-limited. Build frame: header `[0x02][w16][h16][d16]` + RGB data from source buffer. Set `ready = true`. +- Frame format matches v1: 7-byte header + flat RGB. +- Gets grid dimensions from Layer (via DriverGroup → Layer → width/height/depth). But PreviewDriver only has the source buffer, not the Layer. Solution: store width/height/depth in the PreviewDriver, set when buffer is passed. + +Actually, the driver needs the dimensions to build the header. Options: +- Pass dimensions when setting source buffer (add to DriverBase interface? No, that changes existing drivers) +- PreviewDriver gets a pointer to the Layer (like DriverGroup does) +- Store dimensions alongside the frame buffer + +Simplest: PreviewDriver stores `w`, `h`, `d` set by the caller in main.cpp or by DriverGroup. DriverGroup already knows the Layer's dimensions. Add a `setDimensions(w, h, d)` method on PreviewDriver, called from DriverGroup::onAllocateMemory(). + +But that requires DriverGroup to know about PreviewDriver specifically... No. Better: add dimensions to the DriverBase interface or pass them generically. + +Cleanest: `PreviewDriver` has public `lengthType width, height, depth` fields set in main.cpp. When grid changes, the HttpServerModule's `onAllocateMemory` rebuild (which calls all modules) will handle it. Actually main.cpp can just set them once and they match the grid. + +Even simpler: **PreviewDriver reads from the physical output buffer (same as ArtNet driver).** The physical buffer IS the grid layout. PreviewDriver knows the grid size because it's set in main.cpp. For this commit, hardcode or pass as constructor args. + +Actually — let me just make it work: PreviewDriver stores a PreviewFrame pointer, dimensions, and an fps control. Main.cpp sets up the shared frame and passes it to both PreviewDriver and HttpServerModule. + +### Step 3: HttpServerModule — binary broadcast + +Add `sendWsBinaryFrame()` (same as `sendWsTextFrame` but opcode `0x82`). + +Add `setPreviewFrame(PreviewFrame*)`. In `loop20ms()`, if `frame->ready`, broadcast to all WebSocket clients and set `ready = false`. + +### Step 4: WebGL 3D renderer in app.js + +Add to `src/ui/app.js`: +- Detect binary WebSocket messages (`evt.data instanceof ArrayBuffer`) +- Parse 7-byte header for dimensions +- Build WebGL point cloud: interleaved [x,y,z,r,g,b] float array +- Orbit camera with mouse drag + wheel zoom +- Auto-sized point rendering + +Add canvas element to `src/ui/index.html`. +Add canvas styling to `src/ui/style.css`. + +### Step 5: Wire in main.cpp + +```cpp +PreviewFrame previewFrame; +previewDriver.setPreviewFrame(&previewFrame); +httpServer.setPreviewFrame(&previewFrame); +``` + +## Verification + +1. `cmake --build build` — zero warnings +2. `ctest` — all tests pass +3. `./build/mmv3` → open http://localhost:8080 → see 3D preview canvas +4. Noise effect visible as colored point cloud, mirror creates kaleidoscope pattern +5. Mouse drag orbits, wheel zooms +6. ESP32 build compiles +7. Platform boundary check passes diff --git a/docs/history/plans/Plan-20260520 - Adaptive Memory Allocation & Memory Scenario Testing.md b/docs/history/plans/Plan-20260520 - Adaptive Memory Allocation & Memory Scenario Testing.md new file mode 100644 index 0000000..9eec5a5 --- /dev/null +++ b/docs/history/plans/Plan-20260520 - Adaptive Memory Allocation & Memory Scenario Testing.md @@ -0,0 +1,202 @@ +# Plan: Adaptive Memory Allocation & Memory Scenario Testing + +## Context + +The system drives 128x128 (16384 LEDs) on ESP32 without PSRAM (~320KB internal RAM). This plan defines the adaptive memory allocation strategy and the scenario testing that guards it. This is the core architectural piece — every byte matters, and the system must degrade gracefully rather than fail when memory is insufficient. + +**Why now:** Per-module timing is in place, scenario infrastructure works, but there's no memory prediction, no adaptive allocation, and no scenarios that verify memory behavior. Without this, adding features (more layers, modifiers, drivers) will silently break the 128x128 baseline on ESP32. + +## Invariants (non-negotiable) + +1. Effects ALWAYS write to their layer's logical buffer. Never to output, never to physical coordinates. +2. DriverGroup ALWAYS owns the output path (blending, mapping, brightness correction, channel reordering). +3. Layer buffer is mandatory — if it doesn't fit, reduce dimensions until it does ("at least see something"). +4. No heap allocations in the hot path (loop). All structural allocations during setup/onAllocateMemory. + +## Allocation Rules + +**Mapping LUT**: Created only if ALL of these are true: +- Modifiers exist on the layer +- Layout is not a simple non-serpentine grid (where physical == logical, making the modifier mapping trivially 1:1) +- Enough heap available (after reserving HEAP_RESERVE for stack/HTTP/overhead) + +**Driver output buffer**: Created only if: +- At least one layer has a mapping LUT actually allocated (not just "has modifiers" — the LUT must exist) +- Enough heap available + +**Result**: For 1:1 unshuffled (no modifiers, or grid-without-serpentine), zero intermediate buffers. ArtNet reads directly from layer buffer. Maximum LED count. + +## Degradation Cascade + +When memory is insufficient, degrade in this order: +1. **Full pipeline** — LUT + driver output buffer (modifier applied, clean separation) +2. **Skip driver output buffer** — LUT exists, but DriverGroup does mapping inline (slower, sequential) +3. **Skip LUT** — modifier not applied, forced 1:1 mapping +4. **Reduce layer dimensions** — halve until buffer fits, minimum 8x8 + +Each degradation is observable via flags on the module (`degraded()`, `lutSkipped()`, `outputBufferSkipped()`). + +## Phases + +### Phase 1: Memory Reporting + +Add per-module memory tracking so we can measure before we optimize. + +**MoonModule base** (`src/core/MoonModule.h`): +- Add `virtual size_t classSize() const { return sizeof(MoonModule); }` +- Add `size_t dynamicBytes_ = 0` + accessor/setter — set during onAllocateMemory + +**Each MoonModule subclass** (one-liner each): +- Override `classSize()` → `return sizeof(ThisClass);` +- In `onAllocateMemory()`: set `dynamicBytes_` to actual heap used + +**MappingLUT** (`src/light/MappingLUT.h`): +- Add `size_t memoryUsed() const` — returns bytes allocated (offsets + destinations), 0 for oneToOne +- Add `static size_t estimateBytes(logicalCount, maxDest)` — pre-flight estimation + +**Buffer** (`src/light/Buffer.h`): +- `bytes()` already exists — sufficient + +**HttpServerModule** (`src/core/HttpServerModule.h`): +- Extend `writeModuleTimingJson()` to include `classSize` and `dynamicBytes` per module +- `/api/system` response grows: `{"name":"Layer","us":65,"classSize":280,"heap":49152}` + +**Console output** (`src/main.cpp`): +- Boot line: `sizeof: MoonModule=88 Layer=280 DriverGroup=120 ...` +- Per-module timing includes heap: `Layer:65us/49KB` + +### Phase 2: Adaptive Allocation + +The core algorithm. Layer and DriverGroup check available heap before allocating. + +**Constants** (`src/core/types.h`): +- `constexpr size_t HEAP_RESERVE = 32768;` — minimum free heap to preserve for stack/HTTP/WiFi + +**Layer** (`src/light/Layer.h`): +- In `rebuildLUT()`: before `lut_.build()`, estimate bytes via `MappingLUT::estimateBytes()` and check `min(freeHeap() - HEAP_RESERVE, maxAllocBlock()) >= needed` +- If insufficient: `lut_.setOneToOne(physicalCount)`, set `lutSkipped_ = true`, log warning +- For buffer: if `buffer_.allocate()` fails, halve dimensions in a loop until fit or 8x8 minimum +- Add `bool lutSkipped() const` and `bool degraded() const` accessors + +**DriverGroup** (`src/light/DriverGroup.h`): +- In `onAllocateMemory()`: only allocate `outputBuffer_` if `!layer_->lut().isOneToOne()` (already done) AND enough heap +- Add `bool outputBufferSkipped() const` flag +- If skipped: still do mapping but inline (iterate LUT, write directly... or fall back to 1:1 if LUT was also skipped) + +**Grid layout** (`src/light/GridLayout.h`): +- Add `bool isSerpentine() const` (currently always false — straight grid) +- Layer uses this + modifier presence to decide if LUT is truly needed + +**Desktop testing**: Add `platform::setSimulatedFreeHeap(size_t)` to desktop platform for testing degradation without real memory pressure. + +### Phase 3: Memory Scenarios + +Scenarios that verify memory behavior. Both in-process and live. + +**New scenario step types** in `test/scenario_runner.cpp`: +- `"measure": true` already captures heap — extend with memory-specific bounds +- Add `"bounds": { "heap": { "min": N }, "maxBlock": { "min": N } }` support +- Add `"bounds": { "dynamicBytes": { "module": "Layer", "equals": 768 } }` for precise checks +- Report per-step: heapBefore → heapAfter → delta + +**New scenarios:** + +`test/scenarios/memory-boot.json` — Boot overhead: +- Add all modules (no grid yet) +- Measure: sizeof() values, dynamicBytes = 0, heap baseline + +`test/scenarios/memory-1to1.json` — 1:1 unshuffled: +- Grid 16x16 + Layer + Effect + DriverGroup + ArtNet, no modifier +- Assert: LUT is oneToOne, no driver output buffer, Layer dynamicBytes = 768 + +`test/scenarios/memory-shuffled.json` — With modifier: +- Same + MirrorModifier +- Assert: LUT allocated, driver buffer allocated, report sizes + +`test/scenarios/memory-scaling.json` — Find boundaries: +- Start 8x8, increase to 16x16, 32x32, 64x64, 128x128, 256x256 +- Each step: measure heap, check bounds +- On ESP32: observe degradation cascade kicking in at some grid size + +**Live runner** (`scripts/scenario/run_live_scenario.py`): +- Parse heap/maxBlock bounds from scenario JSON +- Report memory deltas per step + +### Phase 4: Predict-Measure-Compare + +Before each step, predict memory impact. After, compare. + +**Prediction function** in `scenario_runner.cpp`: +- Given grid dimensions + channelsPerLight + modifiers → compute expected buffer sizes +- Layer buffer: `W × H × D × cpl` +- LUT: `MappingLUT::estimateBytes(logicalCount, maxDest)` +- Driver buffer: `physicalCount × cpl` (if LUT exists) +- Total predicted delta = sum of new allocations + +**Scenario output**: +```text + PREDICT Layer buffer: 49152, LUT: 0, driver buffer: 0 → total: 49152 + MEASURE heap delta: 49168 (variance: +16 bytes, 0.03%) + PASS variance < 5% +``` + +**Variance threshold**: configurable, default 5%. Catches leaks (consistent positive variance) and accounting errors. + +### Phase 5: Direct-to-Packet (deferred) + +For 1:1 sequential with multiple layers: DriverGroup blends directly into ArtNet packets / LED DMA. Requires multi-layer support (DriverGroup knowing about multiple layers). Also includes brightness correction and channel reordering in the output chain. + +**Defer until**: multi-layer support is implemented. Document the design now, implement later. + +### Phase 6: Architecture & Spec Updates + +Updated alongside each phase: + +- `docs/architecture-light.md` — memory tiers, degradation cascade, invariants, allocation rules +- `docs/moonmodules/core/MoonModule.md` — classSize, dynamicBytes reporting +- `docs/moonmodules/light/Layer.md` — adaptive LUT allocation, degradation behavior +- `docs/moonmodules/light/MappingLUT.md` — estimateBytes, memory formulas +- `docs/moonmodules/light/drivers/` — direct-to-packet design (for Phase 5) +- `docs/testing.md` — memory scenario descriptions +- `docs/history/memory-budget.md` — updated with actual measured values + +## Files Summary + +```text +src/core/MoonModule.h # classSize(), dynamicBytes_ +src/core/types.h # HEAP_RESERVE constant +src/light/Layer.h # adaptive LUT allocation, degradation +src/light/DriverGroup.h # adaptive output buffer, degradation flags +src/light/MappingLUT.h # memoryUsed(), estimateBytes() +src/light/GridLayout.h # isSerpentine() +src/core/HttpServerModule.h # memory fields in /api/system +src/main.cpp # sizeof boot log +src/platform/desktop/platform_desktop.cpp # setSimulatedFreeHeap +test/scenario_runner.cpp # memory bounds, predict-measure, per-step heap +test/scenarios/memory-boot.json +test/scenarios/memory-1to1.json +test/scenarios/memory-shuffled.json +test/scenarios/memory-scaling.json +scripts/scenario/run_live_scenario.py # memory bounds support +docs/architecture-light.md # memory tiers, invariants +docs/moonmodules/core/MoonModule.md +docs/moonmodules/light/Layer.md +docs/testing.md +``` + +## Implementation Order + +**Do now**: Phases 1 + 2 + 3 + 6 (reporting → adaptive allocation → scenarios → docs) +**Next step**: Phase 4 (predict-measure-compare) +**Deferred**: Phase 5 (direct-to-packet, needs multi-layer) + +## Verification + +1. Desktop build + all existing tests pass (no regression) +2. `sizeof` values logged at boot +3. `/api/system` returns classSize + dynamicBytes per module +4. Memory scenarios pass: 1:1 has zero LUT/driver buffer, shuffled has both +5. On desktop with simulated low heap: degradation cascade triggers correctly +6. On ESP32: 128x128 still runs, memory-scaling scenario finds actual boundary +7. Platform boundary check passes +8. Architecture docs accurately describe the implemented behavior diff --git a/docs/history/plans/Plan-20260520 - Live Scenario Testing (Item 8).md b/docs/history/plans/Plan-20260520 - Live Scenario Testing (Item 8).md new file mode 100644 index 0000000..01b9149 --- /dev/null +++ b/docs/history/plans/Plan-20260520 - Live Scenario Testing (Item 8).md @@ -0,0 +1,166 @@ +# Plan: Live Scenario Testing (Item 8) + +## Context + +Add live scenario testing: a Python runner that replays scenario JSON files via HTTP against a running device (desktop or ESP32). Same JSON format as the in-process runner. Includes per-step performance measurements (FPS, heap) and baseline regression detection. Full module CRUD via REST API. + +## What needs to happen + +### 1. HTTP API additions (HttpServerModule) + +New endpoints: +- `GET /api/system` — returns FPS, heap free, heap max block, uptime. Needed for performance measurements after each step. +- `POST /api/modules` — create a module: `{"type":"NoiseEffect","id":"noise","parent_id":"layer"}`. HttpServerModule creates the module and wires it into the tree. Triggers pipeline rebuild. +- `DELETE /api/modules/{name}` — remove a module by name. Teardown, unwire, rebuild. + +These require a **module registry** — a way to create modules by type name at runtime. Currently modules are stack-allocated in main.cpp. For dynamic creation, they need to be heap-allocated with a factory. + +### 2. Module Factory + +A simple registry mapping type name → create function. Lives in core (domain-neutral): +```cpp +// In main.cpp or a new ModuleFactory.h +using CreateFn = MoonModule*(*)(); +struct ModuleFactory { + static MoonModule* create(const char* type); + static void registerType(const char* type, CreateFn fn); +}; +``` + +Registration happens in main.cpp: +```cpp +ModuleFactory::registerType("NoiseEffect", []() -> MoonModule* { return new NoiseEffect(); }); +ModuleFactory::registerType("RainbowEffect", []() -> MoonModule* { return new RainbowEffect(); }); +// etc. +``` + +HttpServerModule calls `ModuleFactory::create(type)` in `POST /api/modules`. The factory returns a heap-allocated module. The caller (HttpServerModule) adds it to the appropriate parent via `childCount()`/`child()` — but wait, we need an `addChild()` method too. + +### 3. Generic addChild on MoonModule + +Currently `addEffect()`, `addModifier()`, `addDriver()`, `addLayout()` are type-specific. For dynamic add from HTTP, we need a generic `addChild(MoonModule*)` that each container overrides: + +```cpp +// MoonModule base +virtual bool addChild(MoonModule*) { return false; } +virtual bool removeChild(MoonModule*) { return false; } +``` + +Overridden in Layer (adds as effect or modifier based on type), DriverGroup (adds as driver), LayoutGroup (adds as layout). The HTTP handler calls `parent->addChild(newModule)`. + +But how does addChild know if it's an effect or modifier? The module itself knows — EffectBase vs ModifierBase. The container can try: if `dynamic_cast<EffectBase*>` succeeds, add as effect. But RTTI is disabled on ESP32. + +Alternative: the factory also stores the "role" (effect/modifier/driver/layout). Or: addChild uses a type tag. + +Simplest: add a virtual `moduleRole()` to MoonModule: +```cpp +enum class ModuleRole : uint8_t { Generic, Effect, Modifier, Driver, Layout }; +virtual ModuleRole role() const { return ModuleRole::Generic; } +``` + +EffectBase returns Effect, ModifierBase returns Modifier, etc. Then `addChild` switches on role. + +### 4. System metrics endpoint + +`GET /api/system` returns: +```json +{ + "fps": 15, + "freeHeap": 124316, + "maxBlock": 63488, + "uptime": 12345 +} +``` + +HttpServerModule tracks FPS by counting frames in loop() — but HttpServerModule uses loop20ms, not loop. Better: read from the main loop's frame counter. Or: add a simple counter to the Scheduler. + +Simplest: Scheduler already has `elapsed()`. Add `fps()` that tracks frames per second. The main loop in mm_main already counts frames — expose that. + +Actually, for live scenarios we just need the values. The Python runner calls `GET /api/system` after each step, waits for settle time, then reads. The FPS and heap come from the platform. + +### 5. Python live scenario runner + +`scripts/scenario/run_live_scenario.py`: +- Connects to a device via HTTP (host:port) +- Reads scenario JSON (same format as in-process) +- Executes steps: + - `add_module` → POST /api/modules + - `set_control` → POST /api/control + - After each step with `"measure": true`: + - Wait settle time (1-2 seconds) + - GET /api/system → record FPS, heap + - Check bounds +- Reports results +- Baseline support: `--compare-baseline`, `--update-baseline` + +### 6. MoonDeck Live tab + +- Device discovery: scan subnet, probe `/api/state` +- Device selector (checkboxes) +- Run scenario against selected device +- Show results + +## Files + +``` +src/core/MoonModule.h # MODIFY: add ModuleRole, addChild, removeChild +src/core/ModuleFactory.h # NEW: type registry, create by name +src/core/HttpServerModule.h # MODIFY: POST /api/modules, DELETE, GET /api/system +src/core/Scheduler.h # MODIFY: add fps tracking +src/light/EffectBase.h # MODIFY: role() returns Effect +src/light/ModifierBase.h # MODIFY: role() returns Modifier +src/light/DriverGroup.h # MODIFY: addChild/removeChild, role() +src/light/LayoutGroup.h # MODIFY: addChild/removeChild, role() +src/light/Layer.h # MODIFY: addChild/removeChild, role() +src/main.cpp # MODIFY: register module types with factory +scripts/scenario/run_live_scenario.py # NEW: Python HTTP scenario runner +scripts/moondeck_config.json # MODIFY: add Live tab entries +scripts/moondeck_ui/index.html # MODIFY: Live tab content +scripts/moondeck_ui/app.js # MODIFY: device discovery UI +test/scenarios/control-change.json # NEW: scenario with set_control steps +docs/moonmodules/core/HttpServerModule.md # MODIFY: new endpoints +docs/testing.md # MODIFY: live scenario section +``` + +## Implementation Steps + +### Step 1: ModuleRole + addChild/removeChild + +Add virtual `role()` and `addChild()`/`removeChild()` to MoonModule base. Override in containers (Layer, DriverGroup, LayoutGroup) and base classes (EffectBase, ModifierBase, DriverBase, LayoutBase). Lifecycle-aware: addChild calls setup/onBuildControls/onAllocateMemory on new child if parent is already running. + +### Step 2: ModuleFactory + +Simple static registry. `registerType(name, createFn)`. `create(name)` returns heap-allocated module. Registration in main.cpp for all known types. + +### Step 3: HTTP endpoints + +- `GET /api/system` — FPS (from Scheduler), freeHeap, maxAllocBlock, uptime +- `POST /api/modules` — parse JSON, create via factory, find parent, addChild, rebuild +- `DELETE /api/modules/{name}` — find module, parent->removeChild, teardown, delete + +### Step 4: Scheduler FPS tracking + +Add frame counter and FPS to Scheduler, updated in `tick()`. + +### Step 5: Python live scenario runner + +Adapted from v1's `scenario.py`. HTTP client using urllib. Per-step measurements: wait, GET /api/system, record, check bounds. Baseline JSON file. + +### Step 6: MoonDeck integration + +Live tab: device discovery (subnet scan + /api/state probe), scenario execution against selected device. + +### Step 7: New scenario + docs + +`control-change.json` — scenario that changes controls and measures impact. Update testing.md and HttpServerModule.md. + +## Verification + +1. Desktop build + tests pass +2. In-process scenarios still pass +3. Start mmv3, run live scenario against localhost:8080 — steps execute, measurements collected +4. POST /api/modules creates a new effect visible in UI +5. DELETE removes it +6. GET /api/system returns valid FPS/heap +7. ESP32: run live scenario against device IP +8. Platform boundary check passes diff --git a/docs/history/plans/Plan-20260520 - SystemModule + NetworkModule (Items 9+10).md b/docs/history/plans/Plan-20260520 - SystemModule + NetworkModule (Items 9+10).md new file mode 100644 index 0000000..55f3e88 --- /dev/null +++ b/docs/history/plans/Plan-20260520 - SystemModule + NetworkModule (Items 9+10).md @@ -0,0 +1,153 @@ +# Plan: SystemModule + NetworkModule (Items 9+10) + +## Context + +Add system diagnostics and network connectivity as MoonModules. SystemModule shows heap/fps/uptime/deviceName. NetworkModule manages Ethernet → WiFi STA → WiFi AP cascade with automatic fallback. Both appear as cards in the web UI. + +Requires new control types (ReadOnly, Select, Progress) and platform functions (getMacAddress, WiFi, Ethernet, mDNS). + +## Phase 1: New Control Types + +Add three control types to support SystemModule and NetworkModule. + +**Control.h** — add to ControlType enum: +- `ReadOnly` — display-only text (ptr → char buffer, max = bufSize) +- `Select` — dropdown (ptr → uint8_t index, options stored via aux field) +- `Progress` — bar with value/total (ptr → uint32_t value, aux = total) + +Add `uint32_t aux = 0` to ControlDescriptor (Progress total, Select options pointer). + +Add methods: `addReadOnly()`, `addSelect()`, `addProgress()`. + +**HttpServerModule.h** — serialize new types in writeControls: +- ReadOnly: `{"name":"fps","type":"display","value":"42"}` +- Select: `{"name":"addressing","type":"select","value":0,"options":["DHCP","Static"]}` +- Progress: `{"name":"freeHeap","type":"progress","value":180000,"total":320000}` + +**handleSetControl** — after setting any value, also clear+rebuild controls on the target module (for dynamic onBuildControls). ReadOnly and Progress are skipped (read-only). + +**app.js** — render new types: +- `display`: read-only span +- `select`: `<select>` element with options +- `progress`: `<progress>` element with percentage label + +**Files**: `src/core/Control.h`, `src/core/HttpServerModule.h`, `src/ui/app.js` + +## Phase 2: SystemModule + +**src/core/SystemModule.h** — new MoonModule: +- `deviceName` (Text, default MM-XXXX from MAC) +- Dynamic (loop1s): uptime, fps, tickTimeUs (ReadOnly), freeHeap, freeInternal (Progress) +- Static: chip, idfVersion (ReadOnly) +- Needs `setScheduler()` for fps/tickTimeUs access + +**Platform additions** (`src/platform/platform.h`): +- `getMacAddress(uint8_t[6])` — ESP32: `esp_efuse_mac_get_default()`, desktop: stable fake +- `totalHeap()` — ESP32: `heap_caps_get_total_size()`, desktop: 0 +- `totalInternalHeap()` — same for internal +- `chipModel()` — ESP32: `esp_chip_info()`, desktop: "desktop" +- `sdkVersion()` — ESP32: `esp_get_idf_version()`, desktop: compiler version + +**Registration**: first module in scheduler (before everything else). + +**Factory**: `ModuleFactory::registerType<SystemModule>("SystemModule")` + +**Files**: `src/core/SystemModule.h` (new), `src/platform/platform.h`, `src/platform/esp32/platform_esp32.cpp`, `src/platform/desktop/platform_desktop.cpp`, `src/main.cpp` + +## Phase 3: Platform Network Abstraction + +Add network functions to `src/platform/platform.h`: + +```text +bool ethInit(); +bool ethConnected(); +void ethGetIP(char* buf, size_t len); + +bool wifiStaInit(const char* ssid, const char* password); +bool wifiStaConnected(); +void wifiStaGetIP(char* buf, size_t len); +void wifiStaStop(); + +bool wifiApInit(const char* ssid, const char* ip); +bool wifiApConnected(); +void wifiApStop(); + +bool mdnsInit(const char* deviceName); +void mdnsStop(); +``` + +ESP32: implement using ESP-IDF APIs. Move Ethernet init logic from `esp32/main/main.cpp` into `platform::ethInit()` (non-blocking, no `xEventGroupWaitBits`). + +Desktop: all return false / no-op. + +**Files**: `src/platform/platform.h`, `src/platform/esp32/platform_esp32.cpp`, `src/platform/desktop/platform_desktop.cpp` + +## Phase 4: NetworkModule + +**src/core/NetworkModule.h** — new MoonModule: +- Controls: ssid, password (Text), addressing (Select: DHCP/Static), dynamic IP fields, dns, status (ReadOnly) +- Priority cascade in setup(): ethInit → wifiStaInit → wifiApInit(deviceName, "4.3.2.1") +- loop1s(): monitor connections, cascade up/down, AP shutdown delay (10s) +- Reads deviceName from SystemModule (via setSystemModule pointer) +- After network transitions: `scheduler_->rebuild()` to re-evaluate light buffer allocation + +**Dynamic controls**: onBuildControls checks `addressing_` — Static shows ip/gateway/subnet/dns, DHCP hides them. + +**ESP32 only**: guarded by `#ifdef ESP_PLATFORM` in mm_main. Compiles on desktop (platform stubs) but not instantiated. + +**Registration order**: SystemModule, NetworkModule, LayoutGroup, Layer, DriverGroup, HttpServerModule. + +**Files**: `src/core/NetworkModule.h` (new), `src/main.cpp` + +## Phase 5: Clean up esp32/main.cpp + +- Remove `eth_init()`, `eth_event_handler()`, `ethEventGroup` from `esp32/main/main.cpp` +- Remove blocking wait +- `app_main()` becomes: NVS init → `mm_main()` +- Ethernet is now handled by NetworkModule via `platform::ethInit()` + +**Files**: `esp32/main/main.cpp` + +## Phase 6: Tests + Docs + +**Tests**: +- `test/test_moonmodule.cpp` — ReadOnly, Select, Progress control types +- `test/test_system_module.cpp` (new) — MAC-to-deviceName conversion +- Existing scenarios must still pass + +**Docs**: +- `docs/moonmodules/core/Control.md` — document new types +- `docs/moonmodules/core/SystemModule.md` — mark implemented +- `docs/moonmodules/core/NetworkModule.md` — mark implemented +- `docs/testing.md` — add test entries +- `docs/plan.md` — remove items 9+10 + +## Verification + +1. Desktop build + all tests pass +2. Desktop: System card shows uptime/fps/heap/deviceName in UI +3. ESP32 build passes +4. ESP32 with Ethernet: connects, System+Network cards visible in UI +5. ESP32 without Ethernet: falls back to WiFi STA or AP +6. WiFi credential injection via REST API works +7. Platform boundary check passes +8. Pre-commit checklist (8 steps) + +## Files Summary + +```text +src/core/Control.h # new types: ReadOnly, Select, Progress +src/core/SystemModule.h # NEW +src/core/NetworkModule.h # NEW +src/core/HttpServerModule.h # serialize new types, dynamic onBuildControls +src/platform/platform.h # getMacAddress, totalHeap, network functions +src/platform/esp32/platform_esp32.cpp # implement all new platform functions +src/platform/desktop/platform_desktop.cpp # stubs +src/main.cpp # register + create SystemModule, NetworkModule +src/ui/app.js # render display/select/progress types +esp32/main/main.cpp # strip Ethernet init +test/test_moonmodule.cpp # new control type tests +test/test_system_module.cpp # NEW: MAC-to-name test +docs/moonmodules/core/Control.md +docs/testing.md +``` diff --git a/docs/history/plans/Plan-20260521 - Control-list-driven JSON persistence (item 11).md b/docs/history/plans/Plan-20260521 - Control-list-driven JSON persistence (item 11).md new file mode 100644 index 0000000..31aefb6 --- /dev/null +++ b/docs/history/plans/Plan-20260521 - Control-list-driven JSON persistence (item 11).md @@ -0,0 +1,267 @@ +# Plan-10 — Control-list-driven JSON persistence (item 11) + +## Context + +projectMM v3 has no persistence today. Settings (deviceName, ssid/password, effect parameters, mDNS state, ArtNet target IP, grid size) reset on every reboot. The foundation commit `7f9afa3` shipped the partition layout (4MB classic + 16MB S3) and the platform fs API (`fsMount`, `fsRead`, `fsWriteAtomic`, `fsList`, `fsSetRoot`, etc.) plus `MoonModule::dirty_` + `markDirty()` and HttpServerModule's `markDirty()` hooks. Nothing reads/writes config files yet — that's this plan. + +Plan-09 attempted this and was abandoned (see `docs/history/plan-09.md`). The failure modes were: nested JSON parser (~250 lines), 4-phase Scheduler reorder + re-load pass, recursive `rebuildControls` during boot, per-module `loadInto(this)` boilerplate, and SystemModule needing a `deviceName_[0] == 0` guard. ~1700 lines of code for the JSON path alone. + +This plan stays minimal by: +1. Keeping JSON files **flat** (children encoded with `<index>.` key prefix, not nested objects) +2. Treating conditional controls as **always-bound with a `hidden` flag**, not "add or skip" +3. One Scheduler phase swap (no re-load, no rebuild-during-boot) +4. Reusing the existing flat JSON parser from HttpServerModule + +Intended outcome: device boots → reads `/.config/<TypeName>.json` per top-level module → overlays values onto bound control variables → modules run their `setup()` with persisted state in their member vars. Modules themselves remain unaware that persistence exists. + +## Decisions already locked + +- **Storage:** one flat JSON file per top-level MoonModule under `/.config/`. Children encoded with `<idx>.` key prefix. Reuse existing flat JSON parser. +- **Conditional visibility:** ControlList gains `bool hidden` per descriptor. Modules build their full control set unconditionally; conditional logic flips `hidden`. UI honors the flag. +- **Hidden API:** `ControlList::setHidden(uint8_t index, bool hidden)`. Called right after `addX(...)`. No change to `addX` signatures. +- **JSON helpers:** extract `parseJsonString`/`parseJsonInt`/`parseJsonBool` from HttpServerModule (private static) into a new minimal `src/core/JsonUtil.h` (~50 lines, those three functions ONLY — plan-09 grew this to 256 lines, don't repeat). +- **Lifecycle:** swap Scheduler::setup phase order from `setup→onBuildControls→onAllocateMemory` to `onBuildControls→load→setup→onAllocateMemory`. Single load pass. No re-load. No rebuildControls during boot. +- **Save trigger:** existing `markDirty()` hooks in HttpServerModule. FilesystemModule::loop1s() debounces 2s, walks tree, serializes any subtree with a dirty descendant, atomic write-and-rename, clears dirty flags. +- **First boot:** built-in defaults; files appear lazily after first save. +- **Test scope:** one doctest covering set→save→fresh-instance→load→assert using `platform::fsSetRoot()` for isolation. No persistence-roundtrip live scenario (live runner has no reboot op — documented in CLAUDE.md). +- **No POD constraint:** modules can have any member types. Persistence touches only what was declared via `controls_.addX()`. +- **Constants live in code, not config:** initial defaults remain in member initializers (`uint8_t scale = 4`). Load OVERLAYS those; missing keys keep the default. + +## Storage layout (final) + +``` +/.config/ + SystemModule.json → {"deviceName":"MM-3A7F","enabled":true} + NetworkModule.json → {"ssid":"home","password":"...","addressing":1, + "mDNS":true,"ip":"...","gateway":"...", + "subnet":"...","dns":"...","enabled":true} + Layer.json → {"channelsPerLight":3,"enabled":true, + "0.type":"NoiseEffect","0.scale":12,"0.bpm":60,"0.enabled":true, + "1.type":"MirrorModifier","1.mirrorX":false, + "1.mirrorY":true,"1.mirrorZ":false,"1.enabled":true} + DriverGroup.json → {"enabled":true, + "0.type":"ArtNetSendDriver","0.ip":"192.168.1.70","0.universe_start":0, + "0.fps":50,"0.enabled":true, + "1.type":"PreviewDriver","1.fps":20,"1.enabled":true} + LayoutGroup.json → {"enabled":true, + "0.type":"GridLayout","0.width":128,"0.height":64,"0.depth":1, + "0.enabled":true} +``` + +Filename uses `typeName()` directly. Children identified by position (`0.`, `1.`, etc.) — the `type` field is informational and used to detect tree-shape mismatches (skip-load if the live tree's child[N] is a different type than the persisted blob's child[N]). + +`ReadOnly` and `Progress` controls are not persisted (they're derived values). + +## Lifecycle + +`Scheduler::setup()` runs four phases: + +``` +phase 1: onBuildControls() — every module binds its FULL control set +phase 2: loadAllHook() — FilesystemModule reads files, overlays bound variables +phase 3: setup() — modules' own init runs with persisted values in member vars +phase 4: onAllocateMemory() — buffers sized to final values +``` + +Scheduler exposes `setLoadAllHook(LoadAllFn fn)` taking a function pointer, so it stays independent of FilesystemModule's type (no circular include). FilesystemModule wires it in `setScheduler()`. + +NetworkModule's setup() reads `ssid_`/`password_` for the cascade — by then they're already overlaid. SystemModule's `deviceName_` is set from MAC in setup(); since setup() runs AFTER load, we need a guard: only derive from MAC if `deviceName_[0] == 0`. This is the SAME guard plan-09 added; it's correct here because the lifecycle is correct. (Plan-09's problem was the secondary re-load pass overwriting things.) + +## Conditional visibility — example + +NetworkModule today: +```cpp +void onBuildControls() override { + controls_.addReadOnly("status", statusStr_, sizeof(statusStr_)); + controls_.addText("ssid", ssid_, sizeof(ssid_)); + controls_.addText("password", password_, sizeof(password_)); + controls_.addSelect("addressing", addressing_, addressingOptions_, 2); + controls_.addBool("mDNS", mdnsEnabled_); + if (addressing_ == 1) { + controls_.addText("ip", staticIp_, sizeof(staticIp_)); + controls_.addText("gateway", staticGateway_, sizeof(staticGateway_)); + controls_.addText("subnet", staticSubnet_, sizeof(staticSubnet_)); + controls_.addText("dns", staticDns_, sizeof(staticDns_)); + } +} +``` + +After plan-10: +```cpp +void onBuildControls() override { + controls_.addReadOnly("status", statusStr_, sizeof(statusStr_)); + controls_.addText("ssid", ssid_, sizeof(ssid_)); + controls_.addText("password", password_, sizeof(password_)); + controls_.addSelect("addressing", addressing_, addressingOptions_, 2); + controls_.addBool("mDNS", mdnsEnabled_); + controls_.addText("ip", staticIp_, sizeof(staticIp_)); + controls_.setHidden(controls_.count() - 1, addressing_ != 1); + controls_.addText("gateway", staticGateway_, sizeof(staticGateway_)); + controls_.setHidden(controls_.count() - 1, addressing_ != 1); + controls_.addText("subnet", staticSubnet_, sizeof(staticSubnet_)); + controls_.setHidden(controls_.count() - 1, addressing_ != 1); + controls_.addText("dns", staticDns_, sizeof(staticDns_)); + controls_.setHidden(controls_.count() - 1, addressing_ != 1); +} +``` + +Persistence load can find `ip` etc. because they're always bound. Toggling `addressing` triggers a Select-change in HttpServerModule which already calls `rebuildControls()` — that re-runs `onBuildControls`, flipping the hidden flags fresh. + +## File-by-file change list + +**New files:** +- `src/core/JsonUtil.h` — ~50 lines. Contains EXACTLY `parseJsonString`, `parseJsonInt`, `parseJsonBool` (moved verbatim from HttpServerModule's private statics into `mm::json` namespace). **STRICT: no other functions. plan-09 grew this to 256 lines and that was a warning sign.** +- `src/core/FilesystemModule.h` — ~200 lines. Header-only per CLAUDE.md style. Contains: + - `setScheduler()` — wires the load hook into Scheduler + - `setup()` — mounts the filesystem + - `loop1s()` — debounced save walk + - `loadAllHook_` (static C-function) — Scheduler calls this between phase 1 and phase 3 + - `loadSubtree()`, `applyNode()`, `applyValue()` — load path + - `saveSubtree()`, `writeNode()`, `writeValue()` — save path + - `subtreeDirty()`, `clearSubtreeDirty()` — dirty walking + - `instance_` singleton + `noteDirty()` static API (the existing `target->markDirty()` is enough; FilesystemModule polls dirty flags in loop1s, no need for noteDirty) +- `test/test_filesystem_persistence.cpp` — ~80 lines. One TEST_CASE: set deviceName → save → recreate Scheduler+modules → load → assert deviceName matches. + +**Modified files:** +- `src/core/Control.h` — add `bool hidden = false;` to `ControlDescriptor` struct; add `void setHidden(uint8_t i, bool h)` method to `ControlList`. +- `src/core/Scheduler.h` — swap phase order in `setup()`. Add `LoadAllFn` typedef + `setLoadAllHook()` + private `loadAllHook_` field. +- `src/core/HttpServerModule.h` — remove the three flat parseJsonX helpers from private statics (or leave them as thin delegates calling `mm::json::*`). Add `,"hidden":%s` field to `writeControls()` per-type branches. +- `src/core/NetworkModule.h` — convert conditional `if (addressing_ == 1)` block to "always add + setHidden". Remove the `rebuildLocalControlsAndPipeline` if no longer needed (the runtime Select-change path uses HttpServerModule's `rebuildControls()` which already does this). +- `src/core/SystemModule.h` — add `if (deviceName_[0] == 0)` guard around the MAC-derived default in setup(). +- `src/core/MoonModule.h` — no change. `rebuildControls()` + `clearControlsRecursive()` from pile A remain useful for the Select-change path; they're NOT called during boot. +- `src/main.cpp` — create FilesystemModule first (`factory.create("FilesystemModule")`), `setScheduler(&scheduler)`, `setName("Filesystem")`, register it as the first scheduler module. +- `src/ui/app.js` — in `renderCards()` skip `if (ctrl.hidden) continue` when iterating controls. +- `test/CMakeLists.txt` — add `test_filesystem_persistence.cpp`. + +**Documentation:** +- `docs/moonmodules/core/FilesystemModule.md` — new spec doc, ~80 lines. Storage layout, lifecycle, save trigger, hidden flag, ESP32 partition, platform API. Match the existing module spec doc style. +- `docs/moonmodules/core/MoonModule.md` — note the `hidden` flag and `setHidden`. +- `docs/architecture.md` — short Persistence section between Controls and Rebuild Propagation: describes the 4-phase Scheduler setup, the load hook pattern, the hidden flag, debounced save. +- `docs/testing.md` — entry for `test_filesystem_persistence.cpp`. +- `docs/plan.md` — remove item 11 once complete. + +## Save/load flow (pseudocode) + +```text +FilesystemModule::setup(): + platform::fsMount() + cleanupTmpFiles_("/.config") # one-shot recursive .tmp removal + platform::fsMkdir("/.config") + +FilesystemModule::loadAllHook_(Scheduler* s): # called by Scheduler in phase 2 + if (!instance_) return + for each top-level module m in s: + instance_->loadSubtree(m) + +loadSubtree(m): + char path[64] + snprintf(path, "/.config/%s.json", m->typeName()) + char buf[2048] + if platform::fsRead(path, buf, sizeof(buf)) > 0: + applyNode(m, buf, prefix="") + +applyNode(m, json, prefix): + char key[48] + for each control c in m->controls(): + if c.type in (ReadOnly, Progress): continue + snprintf(key, "%s%s", prefix, c.name) + applyValue(c, json, key) # parseJsonInt/Bool/String based on c.type + snprintf(key, "%senabled", prefix) + if hasKey(json, key): + m->setEnabled(parseJsonBool(json, key)) + for i in m->childCount(): + snprintf(childPrefix, "%s%u.", prefix, i) + applyNode(m->child(i), json, childPrefix) + +FilesystemModule::loop1s(): + if !mounted_ or scheduler_ == nullptr: return + if no module has dirty(): return + if (millis() - lastDirtyMs_) < 2000: return # debounce + for each top-level m in scheduler_: + if subtreeDirty(m): saveSubtree(m); clearSubtreeDirty(m) + lastDirtyMs_ = 0 + +saveSubtree(m): + char buf[2048]; int pos = 0 + pos += snprintf("{") + pos += writeNode(m, buf+pos, ..., prefix="") + pos += snprintf("}") + char path[64]; snprintf(path, "/.config/%s.json", m->typeName()) + platform::fsWriteAtomic(path, buf, pos) +``` + +`markDirty` is set by HttpServerModule on every successful control mutation (already in place). FilesystemModule never sees the mutation directly — it just polls `dirty()` in loop1s. No `noteDirty` callback API needed. + +## Critical files for implementation + +- [src/core/JsonUtil.h](src/core/JsonUtil.h) (new) +- [src/core/FilesystemModule.h](src/core/FilesystemModule.h) (new) +- [src/core/Control.h](src/core/Control.h) +- [src/core/Scheduler.h](src/core/Scheduler.h) +- [src/core/HttpServerModule.h](src/core/HttpServerModule.h) +- [src/core/NetworkModule.h](src/core/NetworkModule.h) +- [src/core/SystemModule.h](src/core/SystemModule.h) +- [src/main.cpp](src/main.cpp) +- [src/ui/app.js](src/ui/app.js) +- [test/test_filesystem_persistence.cpp](test/test_filesystem_persistence.cpp) (new) + +## Existing utilities to reuse (do NOT duplicate) + +- Flat JSON parsers in HttpServerModule.h (private statics today) → move to JsonUtil.h, then HttpServerModule + FilesystemModule both use them +- `MoonModule::dirty_` / `markDirty()` / `clearDirty()` / `dirty()` — already on every module from foundation commit 7f9afa3 +- `MoonModule::typeName()` — used for filename construction +- `MoonModule::rebuildControls()` / `clearControlsRecursive()` — used by HttpServerModule's Select-change path (no change there) +- `platform::fsMount/fsRead/fsWriteAtomic/fsList/fsMkdir/fsSetRoot` — from foundation commit +- `Buffer::clear()` and `Buffer::data()` — unchanged +- HttpServerModule's `writeControls()` JSON-emit pattern — mirror it for the save path's `writeValue()` per-ControlType branches + +## Sequencing inside the PR + +1. Move 3 parseJsonX helpers from HttpServerModule.h to `src/core/JsonUtil.h`. Update HttpServerModule to use `mm::json::*` (or thin delegates). Build + tests green. +2. Add `bool hidden` to `ControlDescriptor`. Add `ControlList::setHidden`. Add `,"hidden":%s` to writeControls output. Update app.js to skip hidden. Verify desktop UI still works. +3. Convert NetworkModule's conditional block to always-add + setHidden. Verify UI shows static-IP fields with hidden flag flipping correctly. +4. Add `Scheduler::setLoadAllHook` + `LoadAllFn` typedef. Swap phase order. No hook wired yet — but the new order should still work because all modules' setup() is robust to being called with default-or-overlaid values. +5. Add `SystemModule` `deviceName_[0] == 0` guard. +6. Add `FilesystemModule.h` with the full save/load implementation. Register in `main.cpp` BEFORE SystemModule. Verify the load hook gets called and the file paths line up. +7. Add `test/test_filesystem_persistence.cpp`. Run with `platform::fsSetRoot()` isolation. +8. Add docs: spec, architecture section, testing entry. +9. Full pre-commit checklist (10 steps). + +## Pre-commit checklist (CLAUDE.md mandatory order) + +| # | Check | Command | +|---|-------|---------| +| 1 | Desktop build | `cmake --build /Users/ewoud/Developer/GitHub/ewowi/projectMM/build` (zero warnings) | +| 2 | Unit tests | `cd build && ctest --output-on-failure` | +| 3 | Scenario tests | `./build/test/mm_scenarios` (SIGABRT exit pre-existing on HEAD — accept) | +| 4 | Platform boundary | `python3 scripts/check/check_platform_boundary.py` — verify no platform leakage in FilesystemModule.h | +| 5 | Spec check | `python3 scripts/check/check_specs.py` — confirms FilesystemModule.md describes the implemented API | +| 6 | ESP32 build | `python3 scripts/build/build_esp32.py` — clean. Verify partition + LittleFS still work. | +| 7 | Reviewer agent | Opus reviewer over staged diff. Flag: no heap alloc in `loop1s()` save path (only stack buffers); platform boundary clean; no duplication of JSON helpers; JsonUtil.h stays at ~50 lines (not growing into a JSON library). | +| 8 | KPI collection | `python3 scripts/check/collect_kpi.py --commit` | +| 9 | Live scenarios | Run on ESP32 hardware: existing 7 scenarios pass. Manual: set deviceName via REST → reboot → verify deviceName persisted. | +| 10 | Documentation | spec + architecture + testing updated; item 11 removed from `docs/plan.md`. | + +## Verification end-to-end + +After implementation, on real ESP32 hardware: + +1. `esptool.py erase_flash` (one-time cleanup — should not be needed since the partition layout didn't change from foundation commit, but a fresh start removes any leftover state) +2. `idf.py build flash monitor` — boots cleanly, log shows `FilesystemModule: mounted`, `/.config/` empty, default deviceName `MM-XXXX` +3. Open `http://<ip>/`, change deviceName to `MM-TEST` +4. Wait ≥3 seconds; serial monitor shows save log line `FilesystemModule: saved /.config/SystemModule.json` +5. Power-cycle the board +6. UI top bar shows `MM-TEST`; `/api/state` confirms deviceName = `MM-TEST` +7. Set Network.ssid to a real WiFi network, set addressing = Static, set ip/gateway/subnet, reboot +8. After reboot: Network controls show the static-IP fields visible (because addressing == Static was persisted); device connects with static IP + +If any step fails, do not commit; investigate. + +## Out of scope (explicit follow-ups) + +- **PSRAM-backed config cache** for fast preset switching (when PSRAM is detected) +- **Structural persistence** (add/remove children) — current scope only persists control values + enabled flag +- **Live scenario runner reboot support** — needed for an automated persistence-roundtrip live test +- **Presets** — `/.config/presets/` for named bundles of control values +- **`platform::ethPresent()` / `wifiPresent()`** — deferred with WiFi runtime-disable backlog +- **Plan 11.5** (free-then-allocate pipeline rebuild) — fully separate, does not block this plan diff --git a/docs/history/plans/Plan-20260521 - FilesystemModule + flash partition scheme (attempted, abandoned).md b/docs/history/plans/Plan-20260521 - FilesystemModule + flash partition scheme (attempted, abandoned).md new file mode 100644 index 0000000..32da312 --- /dev/null +++ b/docs/history/plans/Plan-20260521 - FilesystemModule + flash partition scheme (attempted, abandoned).md @@ -0,0 +1,88 @@ +# Plan-09 — FilesystemModule + flash partition scheme (attempted, abandoned) + +## Outcome + +Attempted JSON-based persistence (`FilesystemModule.h`, 436 LOC + `JsonUtil.h`, 256 LOC + 4 doctest cases for persistence + 9 doctest cases for JSON util) plus several defensive patches against memory pressure during grid resize. **Persistence and resize patches abandoned; partition layout + platform fs API + several incidental improvements kept (committed as "plan-09a foundations").** Total stripped: ~1700 LOC. Total kept: ~700 LOC of genuine improvements. + +This file documents what we tried, why it didn't pay for itself, and what was kept. + +## What was kept (committed) + +- **Partition CSVs** (`esp32/partitions/esp32dev.csv`, `esp32s3_n16r8.csv`) copied from projectMM v1 +- **Custom partition wired into sdkconfig.defaults** + `CONFIG_ESPTOOLPY_FLASHSIZE_4MB` +- **joltwallet/esp_littlefs managed component** in `idf_component.yml` — adds ~30 KB; unused for now, will be consumed by plan-11 +- **Platform fs API** in `platform.h` + desktop/ESP32 implementations: `fsMount`, `fsUnmount`, `fsMkdir`, `fsExists`, `fsRemove`, `fsRead`, `fsWriteAtomic`, `fsList`, `fsSetRoot`. Plus real `filesystemUsed/filesystemTotal` backed by `esp_littlefs_info()`. Foundation for whatever persistence story comes next. +- **MoonModule additions**: + - `typeName_` as `const char*` (4 bytes vs the 24-byte buffer originally proposed) pointing into the factory's string literal — stable factory key distinct from per-instance `name()` + - `dirty_` flag + `markDirty()` / `clearDirty()` / `dirty()` accessors — a clean hook for any future persistence consumer + - `rebuildControls()` non-virtual helper + `clearControlsRecursive()` — the recursive-clear fixes a real latent bug where conditional onBuildControls would double children's controls + - Documented onBuildControls idempotency contract +- **ModuleFactory** wires `setTypeName` alongside `setName` in `create()` +- **HttpServerModule** Select range check (rejects out-of-bounds values with 400) + `markDirty()` calls at the two mutation points in `handleSetControl` +- **Scheduler::teardown** two-pass (tear down all → delete all) so cross-module teardown logic can observe sibling state. Surfaced by attempted FilesystemModule but the bug existed regardless. +- **PreviewDriver** reads physical dimensions live from Layer each frame instead of caching startup values — fixes a pre-existing bug where grid resize broke the preview at all sizes +- **DriverBase::setLayer** + protected `Layer*` member — clean way for drivers that need geometry (Preview) to access it +- **DriverGroup** passes `Layer*` to children in `passBufferToDrivers` +- **UI**: `localStorage["mm.selectedModule"]` persists nav selection across browser refresh +- **CMakeLists DEPENDS fix** so version.h regenerates when `generate_version.py` changes +- **NetworkModule** mDNS retry-on-failure fix + local `rebuildLocalControlsAndPipeline` rename to avoid colliding with base helper +- **Test scenarios** `control-change.json` adds reset-state steps; new `grid-resize.json` +- **Two new doctest cases** in `test_moonmodule.cpp` covering `typeName` and `dirty` flag mechanics + +## What was thrown away + +### Persistence (the big one) + +- `src/core/FilesystemModule.h` — 436 LOC +- `src/core/JsonUtil.h` — 256 LOC of custom JSON parser with nested + scoped lookups +- `test/test_filesystem_persistence.cpp` — 261 LOC +- `test/test_json_util.cpp` — 117 LOC +- `docs/moonmodules/core/FilesystemModule.md` — 174 LOC +- Scheduler 4-phase setup + 4b re-load + `LoadAllFn` hook +- SystemModule `deviceName_[0] == 0` guard (only needed because of Scheduler reorder) +- HttpServerModule `noteDirty(target)` calls + FilesystemModule include +- main.cpp FilesystemModule factory registration + scheduler injection +- Architecture.md persistence section + +### Resize defensive patches + +- BlendMap.h null guards on src.data() / dst.data() +- DriverGroup.h null guard in `loop()` (`outputBuffer_.data()` check) +- DriverGroup.h "allocate failed → fall back to Layer's buffer" logic +- Layer.h `allocateBuffer` redesign (identity-at-physical fallback, "buffer empty" tier) + +## Why it didn't pay for itself + +### 1. JSON was the wrong primitive for module persistence + +The spec started with "human-readable, editable JSON" as an unexamined premise. **Neither human-readability nor manual editability are real requirements.** Once that premise was challenged, the code cost (custom nested JSON parser, recursive serializer, scoped lookup helpers, ~800 LOC) becomes hard to defend. + +The honest job description is "save and restore module state at the right time". For POD-only module state (which is what MoonModule subclasses are), `memcpy(file, this + sizeof(MoonModule), classSize - sizeof(MoonModule))` is one line and produces a complete save. Plan-11 will pursue this. + +### 2. Persistence forced a Scheduler reorder that bred secondary bugs + +To overlay persisted values onto bound control variables, the Scheduler grew from 3 phases (setup → onBuildControls → onAllocateMemory) to 5 phases (onBuildControls → load → setup → rebuildControls → load again → onAllocateMemory). This: +- Required `onBuildControls` to be idempotent (good contract, but enforced by a foot-gun rather than a type) +- Bred a duplicate-children bug because `onBuildControls` recurses into children and `controls_.clear()` was top-level only +- Required SystemModule to guard `MAC → deviceName` derivation behind `if (deviceName_[0] == 0)` so the second `setup()` wouldn't overwrite persisted values +- Was the trigger for several "device shows nothing" hardware behaviors during testing + +The right approach (for blob persistence): load happens BEFORE any module's setup or onBuildControls, by directly memcpy'ing into member memory. No 5-phase dance, no idempotency contract, no guards. + +### 3. Resize defensive guards were fighting the symptom + +The underlying issue is that `Layer::onAllocateMemory` + `DriverGroup::onAllocateMemory` rebuild in-place (allocate new before freeing old), which fragments the heap. Free heap stayed ~60 KB but max contiguous block shrunk to ~15 KB — too small for new lwIP TCBs, so HTTP refused connections. We added 5 patches across BlendMap, DriverGroup, Layer to handle each failure mode the fragmentation produced. The patches accumulated; each one was correct in isolation; collectively they obscured the design problem. + +Plan-11.5 will pursue free-then-allocate: a two-phase rebuild that frees all light-pipeline buffers BEFORE attempting to allocate the new sizes. `canAllocate` sees true post-free heap, degrade decisions become deterministic, and the various stride-mismatch / zombie-state failure modes disappear by construction. + +## Lessons + +1. **Question the format premise.** "Persistence is JSON" was assumed in the spec without justification. Whenever a spec specifies a serialization format up front, ask: what would the minimum-bits form look like? For POD-only data the answer is usually "memcpy". + +2. **Be suspicious of helper proliferation.** When we found ourselves writing `rebuildControls`, `clearControlsRecursive`, `LoadAllFn`, `setLoadAllHook`, `noteDirty`, `loadAll`, `loadTopLevel`, `applyNode`, `applyControls`, `serializeNode`, `serializeControls`, `buildTopLevelPath`, `cleanupTmpFiles_`, `cleanupTmpCb_`, `cleanupTmpLeafCb_` — that was the system telling us the design was too elaborate for the job. + +3. **Defensive guards under memory pressure mask design bugs.** Each guard says "I don't trust this invariant" — which is a signal to fix the invariant, not the deref. If `outputBuffer_.data() == null` is reachable when `hasLUT() == true`, the design has a hole. Patch the design, not the call site. + +4. **Test isolation reveals real test-design issues.** Live scenarios that mutated persistent state (mirror toggles, grid size) contaminated each other across runs — the test failures appeared random until we realized previous runs were leaving state in `.config/`. Useful diagnostic for any future persistence layer: tests that need state must reset it explicitly. + +5. **Ship the foundation, redo the load.** When the build fails this badly, the right move was what we did: identify what's genuinely useful (partition table, platform fs API, MoonModule improvements, PreviewDriver fix, scheduler teardown fix) and commit that subset, then start fresh on the actual persistence design. diff --git a/docs/history/plans/Plan-20260522 - Nest child module cards inside their parent card's box.md b/docs/history/plans/Plan-20260522 - Nest child module cards inside their parent card's box.md new file mode 100644 index 0000000..cf58d5b --- /dev/null +++ b/docs/history/plans/Plan-20260522 - Nest child module cards inside their parent card's box.md @@ -0,0 +1,37 @@ +# Plan-13 — Nest child module cards inside their parent card's box + +## Context + +In the v3 web UI, every MoonModule renders as a card. Before this change, `renderModuleTree` (`src/ui/app.js`) appended **every** card — parent and child alike — as a flat sibling into the single `#main` container. Children only *looked* nested because of a `margin-left` on `.card[data-depth="1"/"2"]`. The parent card's border did **not** enclose its children. + +This was surfaced while reconciling the two `ui.md` specs after the repo rename: the flat-indent part shipped in plan-12, but the *containment* part was never built. The draft `docs/moonmodules_draft/core/ui.md` had a stale gap-analysis row conflating the two. The product owner wants the parent card to visibly **contain** its children so the module tree shape is structural, not just an indentation hint. + +The promoted spec `docs/moonmodules/core/ui.md` § Module card was updated first to describe the target layout: within a parent card the order is **title row → parent's own controls → `.card-children` block → `+ add child` footer**. + +## Decisions locked + +- **Children inside the parent box** — child cards live in a new `.card-children` wrapper that is a DOM descendant of the parent card; the parent's border encloses them. (Chosen over a bracket/spine-only treatment or keeping the flat-sibling layout.) +- **Controls above children** — the parent's own controls render above the `.card-children` block; `+ add child` renders below it, at the bottom of the parent box. No collapsible children block (rejected the `localStorage`-per-parent toggle as unneeded complexity). +- **`.card-children` gated on `acceptsChildren(mod)`** (Layer/DriverGroup/LayoutGroup), not on `mod.children.length` — so an empty parent still has a mount point and keeps `+ add child` below an (empty) children block. `.card-children:empty` collapses it visually. +- **Drag-and-drop gate** — `dragover` now accepts a drop only when source and target share the same `.card-children` container (true siblings under one parent), replacing the old `data-depth` equality check which would wrongly match effects under different Layers. + +## Implementation steps + +Two files changed: `src/ui/app.js` and `src/ui/style.css`. No backend change. `src/ui/ui_embedded.h` is regenerated at build time (`CMakeLists.txt:29-34`). + +1. **`createCard`** — returns `{ card, childrenEl }` instead of just `card`. When `acceptsChildren(mod)`, creates a `.card-children` div (with `data-depth = depth+1`) and appends it after the controls, then the `.card-footer` after that. `createCard` has only one caller (`renderModuleTree`), so the return-type change is contained. +2. **`renderModuleTree`** — destructures `{ card, childrenEl }`, appends `card` to `parentEl`, and recurses children into `childrenEl` (guarded by `childrenEl &&`) instead of into the flat `parentEl`. +3. **`attachDragHandlers`** — `dragover` gate changed to `src.parentElement === card.parentElement && card.parentElement.classList.contains("card-children")`. The `drop` handler was untouched — it already resolves the parent via `findParent(mod.name)` and computes `targetIdx` by name, position-independent. +4. **`style.css`** — added `.card-children` (margin-top, margin-left, left accent border, padding-left) and `.card-children:empty { display: none }`. The per-depth `.card[data-depth=...]` rules became background-only (removed `margin-left` and the per-card `border-left` — the wrapper now owns the indent and border). The responsive `@media (max-width:820px)` block's two per-depth margin overrides collapsed into one `.card-children` rule (nesting compounds naturally). + +## Verification + +- **Build** — `python3 scripts/build/build_desktop.py` from a clean `build/` (the old build cache held stale `projectMM-v3` paths from the directory rename and was removed). Zero warnings; `ui_embed` regenerated `ui_embedded.h` from the edited assets. +- **Rendered DOM** — drove headless Chrome via CDP, selected the `Layer` root (`localStorage['mm_selectedRoot']`), asserted on the live DOM: root card is `Layer`; its child order is exactly `["card-title", "card-children", "card-footer"]`; the `.card-children` wrapper holds 2 child cards (`Noise`, `Mirror`) as direct descendants; `+ add child` sits below the children block. All assertions passed. +- **Tests** — `ctest` 1/1 passed, `./build/test/mm_scenarios` 8/8 passed. UI-only change, no C++ touched, so test results unaffected as expected. + +## Notes + +- The `build/` directory was deleted and regenerated because the `projectMM-v3` → `projectMM` directory rename left an absolute path in `CMakeCache.txt`. This is unrelated to the feature. +- Per CLAUDE.md minimalism: changes confined to `src/ui/`, no new files, no inheritance — one struct-shaped return value and one new CSS class. +- Pre-commit gates (ESP32 build, platform boundary, KPI, etc.) not run — this is the product owner's gate to open. Git left untouched. diff --git a/docs/history/plans/Plan-20260522 - Replace a module with another type.md b/docs/history/plans/Plan-20260522 - Replace a module with another type.md new file mode 100644 index 0000000..2999433 --- /dev/null +++ b/docs/history/plans/Plan-20260522 - Replace a module with another type.md @@ -0,0 +1,65 @@ +# Plan-14 — Replace a module with another type + +## Context + +The web UI could add and delete child modules and reorder them, but not **replace** one — swap a child's type at the same position while keeping its siblings, order, and the parent's selection. The draft `docs/moonmodules_draft/core/ui.md` listed this three times as **Defer-1.x** ("Replace-type button (✎) … needs an atomic backend operation") — the last UI gap-analysis item needing a backend endpoint. + +Research found the engine primitive already existed: `MoonModule::replaceChildAt(i, fresh)` swaps a child in place and returns the old one, and `FilesystemModule::applyNode()` already used it during persistence load on a type mismatch. This feature exposes that primitive as an explicit HTTP operation plus a UI button — no new tree-mutation logic. + +## Decisions locked + +- **HTTP route: `POST /api/modules/<name>/replace`**, body `{"type":"<TypeName>"}`. Mirrors the existing `POST /api/modules/<name>/move` sub-route — same strict-suffix parsing. Not PUT: the body is a swap instruction (`{type}` only), not a full resource representation, so POST-as-action is the honest verb and it keeps the route family uniform (only GET/POST/DELETE exist). +- **Clean swap, fresh defaults** — the replacement is created via `ModuleFactory::create()` and gets its own factory-default control values. No carry-over of matching controls. Matches how `add` works; predictable. +- **Same-role swap (UI)** — the replace picker filters to types whose `role` equals the replaced module's role (effect ↔ effect). The backend does not enforce role, consistent with `add` (the UI owns role filtering). +- **Position, name, selection preserved** — `replaceChildAt` swaps in place, so sibling order and index are kept. Replace only applies to children (roots rejected, like move), so the selected root is unaffected. + +## Implementation + +### Backend — `src/core/HttpServerModule.h` + +- Added an `isReplaceRoute` check beside `isMoveRoute` — strict suffix `"/replace"` (8 chars), POST, body present; extracts the module name and calls `handleReplaceModule`. +- New `handleReplaceModule(conn, name, body)`, modeled on `handleMoveModule` + `handleAddModule`: + - 404 if module not found; 400 if it is a root (no parent); 400 if `type` missing. + - Find the child's index in the parent. + - `ModuleFactory::create(typeName)` — 400 "unknown type" if it fails, **before** touching the tree (never leave a hole). + - `parent->replaceChildAt(index, fresh)` → old module. + - Lifecycle on the fresh module: `onBuildControls()` → `setup()` → `onAllocateMemory()` — same phase order as the add path. + - `old->teardown()` + `Scheduler::deleteTree(old)` — the same teardown+recursive-delete pair `FilesystemModule::applyNode` uses. + - `scheduler_->rebuild()` so Layer LUT / DriverGroup buffer wiring re-forms. + - `parent->markDirty()` + `FilesystemModule::noteDirty()` — positional encoding rewrites `<index>.type` automatically. + +### UI — `src/ui/app.js` + +- `replaceModule(name, newType)` — POSTs to `/api/modules/<name>/replace`, then `refetchState()`. +- The type picker was parameterized: `openTypePicker` and the new `openReplacePicker` both delegate to a shared `openPicker(anchorEl, opts)` where `opts` carries the role filter, the confirm-button label (`create` / `replace`), and the commit action. No copy-paste of the picker. +- A **✎ button** added to `createActionButtons`, between ↓ and ×, on the same reorderable cards. Its click anchors the picker to `replaceBtn.closest(".card")` so the picker drops below the card content rather than inside the 26px button row. + +### CSS — `src/ui/style.css` + +- No change. The ✎ button reuses `.card-btn`; four 26px buttons + gaps (~116px) fit the title row comfortably. + +### Specs + +- `docs/moonmodules/core/ui.md` — documented the ✎ action, the `POST /api/modules/<n>/replace` endpoint, the dual-mode type picker, and updated the card diagram + feature summary. +- `docs/moonmodules_draft/core/ui.md` — removed the three now-implemented "Module replace" / "Replace-type button" rows and the cost-table entry. + +### Tests — `test/test_replacechild.cpp` (new) + +- `replaceChildAt`: swap at the same position with siblings intact; old child detached + replacement parented; out-of-range and null replacement rejected. +- Replace lifecycle: replacement built → set up → allocated, then old torn down — the order `handleReplaceModule` runs. +- Added to `test/CMakeLists.txt`; `docs/testing.md` gained a "Module tree mutation" section (also covering the previously-undocumented `test_movechild.cpp`). + +## Verification + +- Desktop build clean, zero warnings; `ui_embed` regenerated `ui_embedded.h`. +- `ctest` 1/1 passed (5 new replace cases, 17 assertions); `mm_scenarios` 8/8 passed. +- Live HTTP: replaced an effect under Layer → `{"ok":true}`, new type at the same index, siblings untouched. Error paths confirmed: unknown type → 400, root → 400, missing type → 400, `/replacex` → 404 — tree intact after each. +- Persistence: after a replace, `Layer.json` holds `"1.type":"FireEffect"` at the same positional index with the new type's default control values. +- Headless-Chrome DOM check: action row renders `↑ ↓ ✎ ×`; clicking ✎ opens the picker with confirm label "replace" and the role filter restricted to the target's role (`["effect"]`). + +## Notes + +- The engine already did this swap internally (`FilesystemModule::applyNode`); this feature is the explicit user-driven version, reusing `replaceChildAt` + `Scheduler::deleteTree` — no new tree logic. +- Per CLAUDE.md: changes confined to `src/core/HttpServerModule.h`, `src/ui/`, one new test file, and specs. `MoonModule.h` reused, not modified. +- Scenario-runner coverage deferred: the runner supports `add_module`/`set_control` only; a `replace_module` step is a follow-up. Replace is covered by the module test + live HTTP verification for now. +- Implemented on branch `next-iteration`. Pre-commit gates (ESP32 build, platform boundary, KPI) not run — the product owner's gate. Git untouched. diff --git a/docs/history/plans/Plan-20260522 - Side-nav (hamburger + footer), MoonLight logo + favicon, repo URL rename.md b/docs/history/plans/Plan-20260522 - Side-nav (hamburger + footer), MoonLight logo + favicon, repo URL rename.md new file mode 100644 index 0000000..8be79c0 --- /dev/null +++ b/docs/history/plans/Plan-20260522 - Side-nav (hamburger + footer), MoonLight logo + favicon, repo URL rename.md @@ -0,0 +1,65 @@ +# Plan-12 — Side-nav (hamburger + footer), MoonLight logo + favicon, repo URL rename + +## Context + +The v3 web UI ships with a flat module grid: `index.html` already has a `<nav id="nav">` placeholder inside `.content` > `.main-area`, `style.css` has `#nav { display: none; }`, and `app.js` has an empty `renderNav()`. The draft spec (`docs/moonmodules_draft/core/ui.md`) lists side nav / hamburger / footer as **Defer-1.x** and favicon as **Adopt-1.0** — the product owner chose to implement all of them now. + +A MoonLight logo (`docs/assets/moonlight-logo.png`, 320×320, 23.5 KB) goes top-left in the header and as the browser favicon. + +Separately: the v1 repo was renamed on GitHub (`ewowi/projectMM` → `ewowi/projectMM-v1`); ~22 files in this repo link to the old URL and need updating. + +## Decisions locked + +- **Logo delivery:** downscale to 64×64 (~2–4 KB) with `sips`, serve as a real asset (new `/moonlight-logo.png` route + `image/png`), not base64-inlined. 64px covers the ~28px header use at retina and a 32px favicon. +- **Side nav on wide screens:** static left column; the hamburger collapses/expands it. Narrow (<820px): slide-in over a semi-transparent overlay. +- **One root visible at a time.** The side nav selects a root module; `renderCards()` renders only the selected root's subtree, not all roots. (This supersedes the draft spec's "show all roots" note — the product owner wants the v1 single-root pattern.) Selection persists in `localStorage['mm_selectedRoot']`. +- **Footer:** copyright line `© <year> MoonLight` + four social icon links (inline SVG, no extra assets): GitHub `https://github.com/ewowi/projectMM`, Discord `https://discord.gg/TC8NSUSCdV`, Reddit `https://reddit.com/r/moonmodules`, YouTube `https://www.youtube.com/@MoonModulesLighting`. Discord/Reddit/YouTube URLs taken from the v1 frontend. +- **No root drag-reorder** — root order stays fixed in `main.cpp`. +- `sips` (built into macOS) does the downscale. + +## Implementation steps + +### Part A — Side nav, logo, favicon + +1. `sips -z 64 64 docs/assets/moonlight-logo.png --out src/ui/moonlight-logo.png`. +2. `src/ui/embed_ui.cmake` — add a 4th hex array `logoPng[]` + `logoPngLen` (the `hex_to_c_array` helper handles arbitrary bytes). +3. `src/core/HttpServerModule.h` — route `GET /moonlight-logo.png` → `serveFile(..., "image/png")`; add the `logoPng` case to the embedded-array branch. Binary-safe (serveFile writes by `dataLen`). +4. `src/ui/index.html` — `<link rel="icon" type="image/png" href="/moonlight-logo.png">` in `<head>`; `<img id="brand-logo">` at the start of the status bar; `<button id="nav-toggle">☰</button>` first in the status bar. +5. `src/ui/app.js` — hamburger click toggles a body class; overlay-click + Esc close on narrow screens. `renderNav()` populates `#nav` with one entry per root module (calls `selectModule()`), plus a `<footer>` with copyright + social links. `selectModule()` re-renders cards; `renderCards()` renders **only the selected root's subtree**. +6. `src/ui/style.css` — `#nav` becomes a flex column (was `display:none`); footer pinned to bottom; `@media (max-width: 820px)` extended for the slide-in + overlay. + +### Part A2 — Backend efficiency check (one-root rendering) + +7. Investigate whether the backend can update / push only the visible root's data rather than the whole tree. Today `HttpServerModule` pushes the full module tree on every WS state push (~1 Hz) and the UI patches it all in place. With one-root-visible, the cards for non-selected roots don't exist in the DOM — so the UI already ignores their data, but the **backend still serializes and sends all of it**. Check: + - Is there a cheap way for the client to tell the server which root it is viewing (e.g. a WS client→server message `{t:"view",root:"Layer"}` or a query param), so the server serializes only that subtree? + - Does the JSON-state-buffer cost or the per-tick serialization cost matter enough to justify it on ESP32? (HttpServer is currently ~850 µs/tick — measure the state-push portion.) + - If the saving is real and the change is small, scope it; if it adds protocol complexity for a sub-millisecond gain, record the finding and defer. + This step is an **investigation** — its outcome (do it / defer with reason) is reported to the product owner before any backend change. + + **Finding (defer):** the state push runs in `loop1s()` — once per second, not per render tick — and the JSON payload is ~700 bytes today (~5 KB worst case for a 20-module system). Serializing one root instead of seven saves a few hundred µs and ~500 bytes once per second, against a ~50,000 µs tick budget — negligible. Sending only the visible root would require a client→server WS "view" message, per-connection view-state tracking (up to 4 clients), and reconnect/switch race handling — bidirectional state protocol complexity that does not pay for itself (CLAUDE.md minimalism). The UI already does the cheap half: non-visible roots have no DOM, so `updateValues()`'s `querySelector` patches no-op for them. Kept the full-tree push; revisit only if the tree outgrows the JSON buffer (the spec's documented fallback is then streaming JSON to the socket, a better fix than per-root filtering). + +### Part B — Repo URL rename + +8. Replace the old v1 repo URL `github.com/ewowi/projectMM` → `github.com/ewowi/projectMM-v1` across ~22 files (doc "prior art" links + README). Word-boundary aware so `projectMM-v2` / `projectMM-v3` are untouched. Verify with grep. + +### Part C — Docs + +9. `docs/moonmodules/core/ui.md` — update the Layout ASCII diagram, add a "Side navigation" section (one-root-visible behavior, footer, hamburger, responsive), add logo/favicon to the Status bar section and Feature summary. +10. `docs/moonmodules_draft/core/ui.md` — remove the now-shipped rows (Sidebar nav, Hamburger menu, Footer in side nav, favicon line); keep genuinely-deferred items. Reconcile the "Patterns to consciously NOT carry over" note about single-root-visible — that pattern is now adopted. + +### Verify + +11. `cmake --build build` (regenerates `ui_embedded.h`), run `./build/projectMM`, browser-check: logo in header, favicon in tab, hamburger toggles nav, one root visible per nav selection, footer links + copyright, <820px slide-in works. + +## Critical files + +- `src/ui/moonlight-logo.png` (new — generated by sips) +- `src/ui/embed_ui.cmake`, `src/core/HttpServerModule.h` — asset embed + serve +- `src/ui/index.html`, `src/ui/app.js`, `src/ui/style.css` — header, hamburger, nav, footer +- `docs/moonmodules/core/ui.md`, `docs/moonmodules_draft/core/ui.md` — spec move +- ~22 files for the URL rename + +## Risks + +- The embed pipeline is hex-based and binary-agnostic, so the PNG embeds fine — but `serveFile` must write the body by length, not as a C string (it does; uses `dataLen`). +- The URL rename must not catch `projectMM-v2` / `projectMM-v3` — the replacement targets the bare `projectMM` token only. diff --git a/docs/history/plans/Plan-20260522 - Stream -api-state JSON (fix fixed-buffer overflow).md b/docs/history/plans/Plan-20260522 - Stream -api-state JSON (fix fixed-buffer overflow).md new file mode 100644 index 0000000..78d882d --- /dev/null +++ b/docs/history/plans/Plan-20260522 - Stream -api-state JSON (fix fixed-buffer overflow).md @@ -0,0 +1,41 @@ +# Plan-15 — Stream /api/state JSON (fix fixed-buffer overflow) + +## Context + +Adding several effects to a Layer broke the whole web UI: it showed *"Error: The string did not match the expected pattern."* and rendered no module cards, even after a refresh. + +Root cause: `HttpServerModule` built the entire `/api/state` JSON into a single fixed `char jsonBuf_[4096]`. With a larger module tree the JSON exceeded 4 KB; `snprintf`-based appends silently dropped individual fragments past the limit, leaving **malformed JSON** (`…value":0},}]},},},}]}]}]}` — dangling commas, broken nesting). The browser's `JSON.parse` failed and the UI never rendered. The WebSocket state push had the identical bug — a `char json[4096]` stack buffer. + +This is exactly the failure the plan-12 spec note predicted: *"revisit only if the tree outgrows the JSON buffer (the documented fallback is then streaming JSON to the socket)."* This plan implements that fallback. + +## Decision + +Stream the state JSON with **no fixed-size ceiling**, rather than just enlarging the buffer (which only moves the cliff and costs ESP32 RAM). A `JsonSink` abstraction serves both consumers: + +- **Socket mode** — a small (1 KB) staging buffer flushes to the `TcpConnection` as it fills; the whole response never lives in RAM at once. Used by `GET /api/state`. +- **Buffer mode** — bytes collect in a heap buffer that doubles on demand. Used by the WebSocket push, whose frame header needs the total length up front so it can't stream incrementally. + +Either way a module tree of any size serializes correctly. + +## Implementation — `src/core/HttpServerModule.h` + +- New `JsonSink` class (before `HttpServerModule`): `append()` / `appendf()` write JSON; a `TcpConnection*` selects socket vs buffer mode. Socket mode auto-flushes the 1 KB stage; buffer mode grows a heap allocation (`platform::alloc`, doubling from 2 KB), freed in the destructor. +- `serveState` — writes the HTTP header directly (no `Content-Length`; `Connection: close` ends the body at EOF), then streams the tree through a socket-mode `JsonSink`. +- `buildStateJson`, `writeModuleJson`, `writeControls` — converted from `(char* buf, size_t bufSize, int& pos)` to a single `JsonSink&`. `appendf` replaces every `snprintf` + `pos`-bookkeeping pair, so the converted code is also shorter. The old "peek `buf[pos-1]` to decide a comma" trick became a `bool first` flag (streaming has no buffer to peek). +- `pushStateToWebSockets` — builds into a buffer-mode `JsonSink`, sends `sink.data()` / `sink.size()` via the unchanged `sendWsTextFrame`. +- `<cstdarg>` added for `appendf`'s varargs. The old `jsonBuf_` / `JSON_BUF_SIZE` stay — `/api/types` and `/api/system` still use them (smaller responses, not the overflow path). + +## Verification + +- Desktop build clean, zero warnings. +- Live: with the persisted large tree, `GET /api/state` returned **7000 bytes of valid JSON** (was truncated at 4095). Adding 10 more effects via the API pushed it to **~7 KB / 24 modules** — still valid, no truncation. +- Headless browser: the UI rendered all cards with no "pattern" error (the original symptom gone). +- WebSocket: `/ws` handshake returns `101 Switching Protocols` and pushes frames; the state push uses the same verified `buildStateJson`. +- `ctest` 1/1, `mm_scenarios` 8/8. + +## Notes + +- The fix removes the size ceiling entirely — there is no new larger limit to hit. +- ESP32 RAM: socket mode uses a 1 KB stage (down from the 4 KB static `jsonBuf_` for this path); buffer mode allocates transiently from PSRAM-preferred heap and frees immediately. +- Spec updated: `docs/moonmodules/core/ui.md` — `/api/state` REST entry, the WebSocket push description, and the per-root-filtering note now describe the streaming sink. +- Implemented on `next-iteration`. Pre-commit gates not run — the product owner's gate. diff --git a/docs/history/plans/Plan-20260522 - UI rewrite to ui-spec.md baseline (item 12).md b/docs/history/plans/Plan-20260522 - UI rewrite to ui-spec.md baseline (item 12).md new file mode 100644 index 0000000..8f1f417 --- /dev/null +++ b/docs/history/plans/Plan-20260522 - UI rewrite to ui-spec.md baseline (item 12).md @@ -0,0 +1,265 @@ +# Plan-11 — UI rewrite to ui-spec.md baseline (item 12) + +## Context + +The v3 web UI today is a thin first cut: `src/ui/index.html` (24 lines), `app.js` (576 lines), `style.css` (156 lines). It works but doesn't reflect what `docs/moonmodules_draft/core/ui-spec.md` lays out — the spec catalogues the v1 patterns proven at scale and the gap analysis between v1 and current v3. + +Plan-11 rewrites the UI to that spec baseline. Once status bar + card layout + 9 control types + type picker + no-rebuild contract are in, any new MoonModule renders generically with zero UI-code cost — the spec's core promise. This is the prerequisite for an "effect/module switching from UI" user feature: the switching mechanism *is* the type picker plus reorder/delete buttons on top of a spec-compliant card layout. + +**Scope:** The 8 items in ui-spec.md § Plan-12 scope (at plan time `docs/moonmodules_draft/core/ui-spec.md`; the spec has since shipped to [docs/moonmodules/core/ui.md](../moonmodules/core/ui.md)) — status bar, card layout, 9 control types, type picker, reset-to-default, light/dark theme, WS lifecycle, 3D preview polish — plus four items promoted from § Deferred to 1.x for this iteration: fps/ms toggle per card, reboot button with crashed-state styling, system stats in header (uptime · heap), drag handles for child reorder. + +**Intended outcome:** Plan-11 owns its own engine additions where the UI needs them. After plan-11 lands, the v3 UI matches the spec, the spec promotes from `_draft/` to `moonmodules/`, and this plan archives as `docs/history/plan-11.md` (next sequential; plan-10 untouched). + +## Decisions already locked + +- **Engine additions are owned by plan-11.** Three small endpoints + supporting code: `GET /api/types`, `POST /api/modules/<n>/move`, `POST /api/reboot`, plus `MoonModule::moveChild`, `ModuleFactory` role capture at registration, `SystemModule::bootReason` control. ~150 LOC total backend. +- **Up/down icon buttons AND drag handles ship together.** Up/down for touch users, drag for desktop. Both call the same `POST /api/modules/<n>/move` endpoint with `{to: N}` (absolute target index). Up = `to: currentIndex-1`, down = `to: currentIndex+1`, drag = `to: dropTargetIndex`. One endpoint, one round-trip per move regardless of distance. +- **`POST /api/modules/<n>/move` triggers `scheduler_->rebuild()`** after a successful move. This is unnecessary for effect-only moves but required for modifier and layout moves (LUT depends on modifier order; physical→logical mapping depends on layout). Same pattern as the existing add/delete handlers — simple, correct, no need to special-case by `role()`. +- **No-rebuild contract preserved.** Existing `dragTs` 1s cooldown (current `app.js` L319) and `if (ctrl.hidden) continue` (current `app.js` L154 — plan-10 feature) stay. The rewrite extends what's there, doesn't restart from zero. +- **localStorage key migration.** Current `mm.selectedModule` → spec's `mm_selectedRoot`. Read both on init, prefer new; one-release fallback. New keys `mm_theme`, `mm_timing_mode` per spec. +- **System stats in header** uses existing `SystemModule.uptime` control from `/api/state` WS push. No new endpoint needed for that — the data is already there. +- **Reboot button needs a backend endpoint.** Adds `platform::reboot()` (ESP32: `esp_restart()`, desktop: `exit(0)`) + `POST /api/reboot` handler. Crashed-state badge driven by a new `SystemModule.bootReason` ReadOnly control populated from `esp_reset_reason()`. +- **Plan archives as `docs/history/plan-11.md`** (plan-10 untouched in history). + +## Engine additions + +Required so the UI scope items have endpoints to call against. All additive — no existing behavior changes. + +### `src/core/MoonModule.h` — `moveChildTo(child, newIndex)` + +Move child to an absolute position 0..childCount-1. Shifts intervening siblings (memmove-style). Returns `false` if child not found or newIndex out of range. Sits alongside existing `addChild`/`removeChild`/`replaceChildAt`. ~18 LOC. + +### `src/core/ModuleFactory.h` — capture role at registration + +Extend `TypeEntry` with `ModuleRole role`. Template `registerType<T>()` discovers role via a probe instance: `T probe; ModuleRole r = probe.role();` then forwards to the non-template overload. Add `static ModuleRole typeRole(uint8_t i)` accessor. ~10 LOC. + +### `src/core/HttpServerModule.h` — three new endpoints + +- `GET /api/types` → `{"types":[{"name":"NoiseEffect","role":"effect"}, …]}`. Role string lowercased from `ModuleRole` enum. UI uses it for the picker's context filter (parent's `role()` → accepted child roles, derived in JS). ~25 LOC. +- `POST /api/modules/<n>/move {to: N}`. Route uses strict-suffix match — path must end with `/move` exactly, not `/movex`. Resolves module by name, finds its parent, calls `parent->moveChildTo(mod, to)`, marks dirty, notes filesystem dirty, calls `scheduler_->rebuild()` so any LUT depending on modifier/layout order rebuilds. ~30 LOC. +- `POST /api/reboot`. Calls `platform::reboot()` and returns `{"ok":true}` (the response races the actual restart on ESP32; that's fine — the UI sees a WS disconnect and reconnects when the device comes back up). ~10 LOC. + +### `src/platform/platform.h` + impls — `reboot()` + +Add `void reboot();` to the API. ESP32 impl: `esp_restart()`. Desktop impl: `std::exit(0)` (a no-op or exit; matches "smoke-tested but not load-bearing" expectations on desktop). ~6 LOC across three files. + +### `src/core/SystemModule.h` — `bootReason` ReadOnly control + +Add a ~32-byte `bootReasonStr_` member. In `setup()`, query `esp_reset_reason()`, map enum to "POWERON" / "SW" / "PANIC" / "WDT" / etc., snprintf into the buffer. In `onBuildControls`, bind it as a ReadOnly control. On desktop the buffer reads "OK" (no reset reason concept). The UI uses this to set the reboot button's `data-crashed` attribute when the value indicates an unclean prior boot (PANIC/WDT/BROWNOUT). ~20 LOC. + +## UI rewrite + +### `src/ui/index.html` — full restructure (was 24 lines, target ~50) + +- Fixed top **status bar** (`<header>` becomes 44px fixed): brand logo + wordmark, device name (from `System.deviceName`), system stats span (`uptime · NN KB heap`), spacer, WS dot, reconnect button, **reboot button** (with crashed-state class hook), **theme toggle** button. +- Sticky **3D preview canvas** wrapper below status bar. +- Main column: single column `max-width: 500px; margin: 0 auto`, card list. Root modules rendered with depth=0; children indented with depth+1, etc. +- `<body data-theme="dark">` default. + +### `src/ui/app.js` — extend existing 576 lines + +**Preserve as-is:** `dragTs` cooldown (L319), `if (ctrl.hidden) continue` (L154), the 7 working control type renderers (uint8 slider, uint16, bool, text, display, select, progress). + +**WebSocket lifecycle (spec item 7):** +- Rewrite `connectWs()` (L13-43) with exponential backoff: `wsRetryMs` 500 → 1000 → 2000 → 4000 → 5000, reset on `onopen`. +- Add `setInterval(() => ws.readyState===1 && ws.send("ping"), 25000)` keepalive on connect; clearInterval on close. +- Module-level `let wsPaused = false`; gate `onmessage` body on `!wsPaused`. +- `document.addEventListener("visibilitychange", () => wsPaused = (document.visibilityState === "hidden"))`. +- `window.addEventListener("pageshow", e => { if (e.persisted) { wsPaused = false; if (ws.readyState !== 1) connectWs(); } })` for Safari bfcache. + +**Status bar wiring (spec item 1 + 4 promoted items):** +- Device name from `state.modules[].controls[]` where name === "deviceName". +- System stats from `SystemModule.uptime` + free heap (computed via `freeHeap` field on /api/system or `dynamicBytes` from /api/state). Pull from the existing /api/state WS push — no new endpoint. Format: uptime as `Xd Yh Zm Ws`, heap as KB. +- Theme toggle button (`☀/🌙`): reads/writes `localStorage['mm_theme']`, sets `body.dataset.theme`. +- Reconnect button (already wired): force `ws.close()` then `connectWs()`. +- **Reboot button**: confirm dialog `confirm('Reboot device?')`, then `POST /api/reboot`. Add red border (`data-crashed="true"`) when `SystemModule.bootReason` indicates an unclean prior boot. + +**Card rendering (spec item 2 + per-card fps/ms toggle):** +- `createCard(mod, depth)` accepts depth. Sets `card.dataset.depth = depth`. `renderCards()` recurses children with `depth+1`. +- Title line `[name] [stats] [actions]`. **Actions** appear for children whose `role()` is reorderable (Effect, Modifier): + - `↑` up button → `POST /api/modules/<name>/move {delta:-1}` + - `↓` down button → `POST /api/modules/<name>/move {delta:+1}` (both disabled at extremes) + - `✕` delete button → `DELETE /api/modules/<name>` + - Drag handle `☰` (desktop) — see drag section below +- **Stats span** is clickable; cycles fps↔ms display via `localStorage['mm_timing_mode']`. Shows `loopTimeUs` from `/api/state` formatted per mode. Single global toggle affects all cards. + +**Control rendering (spec item 3 + 5):** +- Extend `createControl()` (L165-293) with three new branches: + - `button` — `<button>` calls `sendControl(name, 1)` on click, no echo. + - `password` — `<input type="password">` + hold-to-peek button (`onmousedown` shows, `onmouseup`/`onmouseleave` hides), 500ms debounce, placeholder shows `•` repeated to value length. + - `time` — read-only formatted via `fmtTime(seconds)` helper → `Xd Yh Zm Ws`. Updated via WS push. +- Add matching update branches in `updateModuleControls()` (L312-371). +- **Reset-to-default button (↺)**: in `createControl()`, when `ctrl.default !== undefined` (engine adds this field — see Engine additions below), append a small button. Class `dim` vs `active` based on `ctrl.value === ctrl.default`. Click → `sendControl(name, ctrl.default)`. `updateResetButtonState(mid, key, ctrl)` called from `updateModuleControls()` to refresh state. + +**Type picker (spec item 4):** +- `roleAcceptsChild(parentRole, childRole)` map (~10 LOC): `Layer → [effect, modifier]`, `DriverGroup → [driver]`, `LayoutGroup → [layout]`, others → `[]`. +- `openTypePicker(parentMod, anchorEl)`: + - Fetches `/api/types` (cache for session). + - Filters by `roleAcceptsChild(parentMod.role, t.role)`. + - Renders inline list (not modal) below anchor: search input, filtered list, Create/Cancel buttons. + - Keyboard nav: ↓ enters list from search, ↑↓ moves selection, Enter → `POST /api/modules {type, parent_id: parentMod.name}` then re-fetch state, Esc closes. + - Search filters by substring on type name. +- `+ add child` button in each card's footer (for parents that accept children) → `openTypePicker(mod, button)`. +- `+ add module` button somewhere at the top (top-level addition, parent_id null/missing). + +**Drag-to-reorder (promoted from Deferred):** +- `☰` drag handle in reorderable child cards (alongside existing up/down). +- On `dragstart`: store source card id in `dataTransfer`, add `.dragging` class. +- On `dragover` on a sibling card: `preventDefault()` to allow drop, add `.drag-over` class. +- On `drop`: compute delta from indices (source index vs drop target index), call `POST /api/modules/<name>/move {delta}` enough times to reach target (or extend the endpoint to accept absolute index; **decision: keep `delta:-1|+1` and call multiple times** — simpler endpoint, drag is short-range anyway. If we move 3 down, call delta:+1 three times in sequence with awaits between). +- `dragleave` / `dragend`: clean up classes. + +**3D preview polish (spec item 8):** +- Wrap canvas in `.preview-wrap { position: sticky; top: 44px; z-index: 5; }`. +- Touch handlers (`touchstart`/`touchmove`) mirroring mouse drag for mobile orbit. +- **Sparse vertex buffer**: in `renderPreviewFrame()` (L459+), pre-count non-black voxels; skip RGB=0 in upload loop. Halves GPU work for typical effects. +- **Cache `lastFrame`** (the buf) so a `redrawFromCache()` can be called from orbit handlers between server frames — orbit feels smooth even at low FPS. +- Scroll listener on main column → set `--preview-shrink` 0→1 over 0→300px scroll, recompute canvas height via `requestAnimationFrame` throttling. Preview shrinks to 50% of natural height when fully scrolled. +- GLSL vertex: `gl_PointSize = uPtSize / gl_Position.w` (depth-corrected). +- GLSL fragment: tighten disc to `d > 0.25 → discard`, soft brightness falloff via `smoothstep(0.10, 0.25, d)`. + +**localStorage migration:** +- On init, read `localStorage['mm_selectedRoot']` first, fall back to `localStorage['mm.selectedModule']`. Write only to the new key. One-release fallback. +- Add `mm_theme` (default `"dark"`) and `mm_timing_mode` (default `"fps"`) keys. + +### `src/ui/style.css` — restructure (was 156 lines, target ~350) + +**Layer 1 — variables.** Define `:root` CSS variables for the palette: +``` +--bg-0, --bg-1, --fg, --fg-muted, --accent, --accent-soft, +--card-bg-0, --card-bg-1, --card-bg-2 (depth-based backgrounds), +--border, --green (connected/ok), --red (error/crashed), --yellow (warn) +``` +Existing dark colors refactor to use them. + +**Layer 2 — `[data-theme="light"]` overrides.** ~10-12 variable flips. Per spec, ~30 lines total. + +**Layer 3 — structural rules.** +- Fixed status bar (44px, position:fixed top, flex row, gap 8px). +- Sticky `.preview-wrap` (top:44px, z-index:5). +- Main column max-width 500px, centered, padding-top to clear sticky preview. +- Card depth backgrounds via `.card[data-depth="0/1/2"]` + left-border accent on indented children. +- 600px → 820px breakpoint per spec. + +**Layer 4 — component styles.** +- `.card`, `.card-title`, `.card-stats` (cursor:pointer for fps/ms toggle), `.card-actions`, `.card-btn` (square 26×26 button), `.card-btn-del` (red variant). +- `.drag-handle` (cursor:grab). +- `.reboot-btn`, `.reboot-btn[data-crashed]` (red border). +- `.type-picker` (inline list styling). +- `.reset-btn` dim/active states. +- `.peek-btn` for password. + +## Test additions + +Three small additions in `test/`: + +- `test_movechild.cpp` — verify `MoonModule::moveChild` swaps siblings, returns false on out-of-range, doesn't disturb non-child slots. ~40 LOC. +- `test_module_factory.cpp` — verify the role probe captures correctly via `registerType<T>("…")` and `typeRole(i)` returns expected enum for the 10+ registered types in `main.cpp`. ~30 LOC. +- `test_system_module.cpp` — already exists; extend with a bootReason-present check (desktop value should be a non-empty string). ~5 LOC added. + +`CMakeLists.txt` updated to include the two new test files. + +## Documentation + +- **`docs/moonmodules/core/SystemModule.md`** — add `bootReason` to the controls list, note the UI's crashed-state behavior. +- **`docs/moonmodules/core/HttpServerModule.md`** — add the three new endpoints to the API table, with shapes. +- **`docs/moonmodules/core/MoonModule.md`** — add `moveChild` to the children API list (alongside `addChild`/`removeChild`/`replaceChildAt`). +- **`docs/testing.md`** — add entries for `test_movechild.cpp` and `test_module_factory.cpp`. +- **`ui-spec.md` final cleanup** — once the UI matches, the Quick guide's deferred items get updated (the 4 promoted items move out of Deferred-1.x and into "implemented"). Then `git mv docs/moonmodules_draft/core/ui-spec.md docs/moonmodules/core/ui-spec.md`. +- **`docs/plan.md`** — remove the `## 12.` section per the file's "Completed items are removed" rule. +- **`docs/history/plan-11.md`** — 1:1 copy of this plan file (per CLAUDE.md's "Save plan to history" rule). + +## Critical files + +**Engine:** +- [src/core/MoonModule.h](src/core/MoonModule.h) — add `moveChild` +- [src/core/ModuleFactory.h](src/core/ModuleFactory.h) — role at registration +- [src/core/Control.h](src/core/Control.h) — add `default` field + `setDefault(i, val)` helper +- [src/core/HttpServerModule.h](src/core/HttpServerModule.h) — 3 endpoints, emit `default` field +- [src/core/SystemModule.h](src/core/SystemModule.h) — `bootReason` control +- [src/platform/platform.h](src/platform/platform.h) — declare `reboot()` +- [src/platform/desktop/platform_desktop.cpp](src/platform/desktop/platform_desktop.cpp) — `reboot()` stub +- [src/platform/esp32/platform_esp32.cpp](src/platform/esp32/platform_esp32.cpp) — `reboot()` via `esp_restart()` + +**UI:** +- [src/ui/index.html](src/ui/index.html) — full restructure +- [src/ui/app.js](src/ui/app.js) — extend existing 576 lines +- [src/ui/style.css](src/ui/style.css) — restructure with CSS variables + light theme + +**Tests:** +- [test/test_movechild.cpp](test/test_movechild.cpp) (new) +- [test/test_module_factory.cpp](test/test_module_factory.cpp) (new) +- [test/test_system_module.cpp](test/test_system_module.cpp) — extend +- [test/CMakeLists.txt](test/CMakeLists.txt) — register + +**Docs:** +- [docs/moonmodules/core/SystemModule.md](docs/moonmodules/core/SystemModule.md) +- [docs/moonmodules/core/HttpServerModule.md](docs/moonmodules/core/HttpServerModule.md) +- [docs/moonmodules/core/MoonModule.md](docs/moonmodules/core/MoonModule.md) +- [docs/testing.md](docs/testing.md) +- `git mv docs/moonmodules_draft/core/ui-spec.md docs/moonmodules/core/ui-spec.md` +- [docs/plan.md](docs/plan.md) — remove step 12 +- [docs/history/plan-11.md](docs/history/plan-11.md) — new + +## Existing utilities to reuse (do NOT duplicate) + +- `controls_` array + `addUint8/addBool/addText/addSelect/addReadOnly/addProgress` on every MoonModule +- `MoonModule::role()` returning `ModuleRole::{Generic, Effect, Modifier, Driver, Layout}` — picker filter derives from this in JS +- `MoonModule::children_` array + `addChild`/`removeChild`/`replaceChildAt` (plan-10) — `moveChild` joins these +- `MoonModule::loopTimeUs` + `dynamicBytes()` — already in `/api/state`, drive the fps/ms toggle +- `MoonModule::enabled()` / `setEnabled()` — already wired by HttpServerModule for the per-card checkbox; no new code needed for the enabled toggle UX +- `FilesystemModule` (plan-10) — persistence "just works" for new controls (bootReason isn't persisted because it's `ReadOnly`) +- `dragTs` cooldown + `if (ctrl.hidden) continue` in `app.js` — preserve, don't rewrite +- `ControlDescriptor.hidden` flag (plan-10) — already supported end-to-end + +## Risks and mitigations + +1. **`ui_embedded.h` regen.** UI files served from disk on desktop, but baked into `src/ui/ui_embedded.h` at ESP32 build. After UI edits, regen via `build_esp32.py` (CMake should regen automatically on file timestamps). Verify via ESP32 smoke test before declaring done. +2. **localStorage migration.** Renaming the selected-module key silently drops old values once. Mitigation: read both old and new on init, prefer new. Acceptable one-release migration. +3. **Persistence (plan-10) interaction.** Adding `default` field to `ControlDescriptor` is append-only; doesn't change persistence binary serialization. The new ReadOnly `bootReason` is correctly excluded from persistence (ReadOnly controls are derived, not state). Verify `test_filesystem_persistence.cpp` still passes. +4. **No-rebuild contract.** Card rendering restructure must keep WS state pushes patching values in place via `[data-mid][data-key]` selectors — never call `renderCards()` from `updateValues()`. dragTs cooldown at L319 must still work after restructuring. Drag operations and add/delete DO trigger a re-fetch + re-render of the affected parent only. +5. **Reboot endpoint response race.** `POST /api/reboot` returns 200 then the device restarts; the client may not see the response. Acceptable — the UI's existing reconnect-on-WS-close logic handles the disconnect cleanly. On desktop, `exit(0)` makes the server vanish; localhost smoke test should see clean WS close. +6. **WS reconnect storm.** Exponential backoff without jitter could cause N clients to slam the device. Acceptable for now (one developer + browser). +7. **bootReason on first boot.** Fresh ESP32 with no prior state reports POWERON_RESET, which is normal — UI must NOT show crashed-state for that. Map only PANIC, INT_WDT, TASK_WDT, BROWNOUT to "crashed". +8. **Drag-to-reorder iteration cost.** Multiple `/move {delta}` calls in sequence for a multi-position drop. Acceptable for short-range drags; for long-range, the up/down buttons or repeated drags are fine. Avoid extending the endpoint to absolute-index for now. + +## Verification + +Per CLAUDE.md pre-commit checklist (10 steps). Specific to this plan: + +1. `cmake --build build` — zero warnings (UI changes don't affect build but engine changes do) +2. `ctest --output-on-failure` — existing tests pass + 2 new (`test_movechild`, `test_module_factory`) +3. `./build/test/mm_scenarios` — exit 0 +4. `python3 scripts/check/check_platform_boundary.py` — PASS (new `platform::reboot` correctly placed) +5. `python3 scripts/check/check_specs.py` — `10+ modules ok` (HttpServer/SystemModule/MoonModule specs updated) +6. `python3 scripts/build/build_esp32.py` — clean; `ui_embedded.h` regenerated +7. Reviewer agent (Opus) over the staged diff +8. KPI one-liner with PC + ESP32 tick/FPS per CLAUDE.md step 8 +9. Hardware smoke test at `http://192.168.1.210/`: + - UI loads, status bar shows device name + green WS dot + system stats (uptime · NN KB heap) + - Theme toggle switches dark↔light, persists across reload + - Tab away 30 seconds, return: WS dot stays green (keepalive working) + - Click stats line on any card: cycles fps↔ms display; persists across reload + - Scroll main column: preview shrinks 50% over 300px; mouse-orbit during low-FPS stays smooth (frame cache) + - On Layer card, click `+ add child` → picker shows only effects + modifiers (NoiseEffect, RainbowEffect, MirrorModifier); search "noi" filters to Noise; Enter creates; new card appears + - Click ↑ / ↓ buttons on a child → order changes visibly in preview and in `/api/state` + - Drag a child to a new position → same effect as ↑/↓ + - Click ✕ on a child → confirm dialog → child disappears + - Click ↺ on a control with default off-default → snaps back, dragTs cooldown applies (no fight with WS push) + - Click reboot button → confirm dialog → device reboots, WS reconnects, UI returns + - If a panic/WDT happens on the device, reboot button shows red border on the next boot +10. Documentation: ui-spec.md matches code, promoted out of `_draft/`; SystemModule.md / HttpServerModule.md / MoonModule.md updated; testing.md updated; plan.md step 12 removed; this plan archived as `docs/history/plan-11.md`. + +## Out of scope (explicit follow-ups, deferred per ui-spec.md) + +- Side nav with drag-reorder of root modules (root order is fixed in main.cpp; the four roots stay) +- Health panel (`<details>` + `GET /api/test`) +- Log panel (`<details>` + WS `{t:"log",m:"…"}`) +- Update-available badge + OTA panel (requires `/api/firmware`) +- Module replace (`✎`) button (requires `POST /api/modules/replace`) +- Core affinity badge (C0/C1) — only meaningful when core pinning lands +- Help links per type (TYPE_TO_DOC mapping) +- Category emoji badge (deferrable — role() suffices) +- Multi-layer UI (plan.md backlog) +- Presets UI +- Canvas/node-graph view diff --git a/docs/history/plans/Plan-20260523 - Top-level shape change to `Layouts`, `Layers`, `Drivers`.md b/docs/history/plans/Plan-20260523 - Top-level shape change to `Layouts`, `Layers`, `Drivers`.md new file mode 100644 index 0000000..afe43f2 --- /dev/null +++ b/docs/history/plans/Plan-20260523 - Top-level shape change to `Layouts`, `Layers`, `Drivers`.md @@ -0,0 +1,250 @@ +# Plan: Top-level shape change to `Layouts`, `Layers`, `Drivers` + +## Goal + +Rename and re-shape the three light-domain top-level containers from singletons-of-things to plural containers-of-things, so the side-nav reads honestly: + +```text +Layouts ← was LayoutGroup + ├─ GridLayout + └─ (room for more) +Layers ← NEW (today there's only one Layer at root) + └─ Layer + ├─ NoiseEffect + ├─ MirrorModifier + └─ (effects + modifiers) +Drivers ← was DriverGroup + ├─ ArtNetSendDriver + └─ PreviewDriver +``` + +Each container is a regular `MoonModule` with a `Generic` role. The shape change is the deliverable; the **blend/composition of multiple Layers** and the **per-Layer start/end carving** are tracked as follow-ups but **the `start/end` controls land in this commit** so the surface is stable when composition arrives. + +## Scope decisions confirmed + +- **Q1 — composition (a):** Drivers will eventually compose N Layer buffers into a single output (alpha-blend or additive). Already documented in [architecture-light.md:123,137](docs/architecture-light.md) and [DriverGroup.md:20](docs/moonmodules/light/DriverGroup.md). **Follow-up; not in this commit.** With one Layer the compose step is a copy. +- **Q2 — Layouts shared (d) + per-Layer ranges (f):** All Layers share the same `Layouts` instance (today's model — [architecture-light.md:53](docs/architecture-light.md#L53)). Each Layer carries `startX/Y/Z` and `endX/Y/Z` controls that select a region of the shared layout. Defaults: whole layout. With one Layer the controls are no-ops; with N Layers + composition the carving becomes active. +- **Q3 — (g) rename and keep as containers:** all three top-level containers are concrete `MoonModule` subclasses with `Generic` role. Reject (h) (flat top level) and (i) (templated `RoleContainer<T>`). Each container *does* hold real state (LayoutGroup stitches indices, DriverGroup owns the output buffer; future `Layers` will own the composed buffer). +- **Q4 — ship shape only:** rename, introduce `Layers`, add `start/end` to `Layer`. Multi-Layer composition and the live carving of layout regions are a separate commit. + +## Mapping: old → new + +| Today | New | What it is | +|---|---|---| +| `class LayoutGroup` in [src/light/layouts/LayoutGroup.h](src/light/layouts/LayoutGroup.h) | `class Layouts` (same file, class renamed) | Holds N `LayoutBase` children, stitches indices via `forEachCoord`. **Behaviour unchanged.** | +| `class DriverGroup` in [src/light/drivers/DriverGroup.h](src/light/drivers/DriverGroup.h) | `class Drivers` (same file, class renamed) | Holds N `DriverBase` children, owns the LUT-blended output buffer, hands the buffer pointer to each driver. **Behaviour unchanged.** Still reads from a single `Layer*` (composition is the follow-up). | +| `Layer` at the root | `class Layers` (new file [src/light/Layers.h](src/light/Layers.h)) **wraps** N `Layer` children | New container. `loop()` runs each child Layer in order. With one child Layer it's a thin pass-through (same behaviour as today). | +| `Layer::startX/Y/Z`, `endX/Y/Z` | new controls on `Layer` | Default to `(0,0,0)`–`(physW-1, physH-1, physD-1)` (i.e. whole layout). Today no-op; persisted and visible in the UI for the composition follow-up. | + +The factory string keys also rename: +- `"LayoutGroup"` → `"Layouts"` +- `"DriverGroup"` → `"Drivers"` +- new key `"Layers"` (the container) +- existing `"Layer"` unchanged (the child class) + +This breaks any persisted `/.config/*.json` from earlier sessions that reference the old `"LayoutGroup"` / `"DriverGroup"` type names — see "Migration" below. + +## Files to change + +### Renames (1 class rename per file; preserve git history via in-place edit) + +- **[src/light/layouts/LayoutGroup.h](src/light/layouts/LayoutGroup.h)** — rename `class LayoutGroup` → `class Layouts`. Filename **stays** `LayoutGroup.h` so this is a class rename in place; everywhere that includes `light/layouts/LayoutGroup.h` keeps working. Update the `#include` comment in [src/light/Layer.h](src/light/Layer.h) and add an alias if needed. + - *Alternative:* rename file too (`LayoutGroup.h` → `Layouts.h`) via `git mv`. Cleaner long-term but breaks every `#include`. **Plan picks file rename via `git mv`** because we just did a folder restructure last commit; one more rename is consistent with the cleanup. +- **[src/light/drivers/DriverGroup.h](src/light/drivers/DriverGroup.h)** → `src/light/drivers/Drivers.h`, `class DriverGroup` → `class Drivers`. Same treatment. + +### New file + +- **[src/light/Layers.h](src/light/Layers.h)** — new container class. Roughly: + ```cpp + #pragma once + #include "core/MoonModule.h" + #include "light/Layer.h" + + namespace mm { + + // Top-level container for one or more Layers. Each child Layer reads its + // buffer from a shared Layouts instance and writes its own buffer; Drivers + // composes them on the output side (composition follow-up). + // + // With one child Layer today this is a thin pass-through: loop() runs the + // child Layer's loop() in order. The container itself owns no buffer. + class Layers : public MoonModule { + public: + void setLayouts(Layouts* l) { + layouts_ = l; + // Propagate to all child Layers so they can size their buffers. + for (uint8_t i = 0; i < childCount(); i++) { + if (auto* lyr = dynamic_cast<Layer*>(child(i))) { + lyr->setLayoutGroup(layouts_); // method name unchanged + } + } + } + + Layouts* layouts() const { return layouts_; } + + void loop() override { + // Scheduler gates Layers itself by respectsEnabled() default. + for (uint8_t i = 0; i < childCount(); i++) { + if (!child(i)->enabled()) continue; + uint32_t start = platform::micros(); + child(i)->loop(); + child(i)->addAccumUs(platform::micros() - start); + } + } + + // Active Layer for Drivers' single-Layer plumbing (placeholder until + // composition lands). Returns the first child Layer, or nullptr. + Layer* activeLayer() const { + for (uint8_t i = 0; i < childCount(); i++) { + if (auto* lyr = dynamic_cast<Layer*>(child(i))) return lyr; + } + return nullptr; + } + + private: + Layouts* layouts_ = nullptr; + }; + + } // namespace mm + ``` + - **Hot-path note:** `dynamic_cast` is in the cold path only (`setLayouts` runs at startup, `activeLayer` at composition setup). Per-frame `loop()` uses `child(i)->loop()` — no cast. No RTTI cost in the render path. + - **Alternative without dynamic_cast:** since every child of `Layers` is by construction a `Layer`, `static_cast` is safe. Use that — matches the existing pattern in `Layouts::forEachCoord` (`static_cast<LayoutBase*>(child(i))`). **Plan adopts `static_cast`.** + +### Layer changes + +- **[src/light/Layer.h](src/light/Layer.h)** — + - Add `lengthType startX_ = 0, startY_ = 0, startZ_ = 0` and `lengthType endX_ = -1, endY_ = -1, endZ_ = -1` members. `-1` means "use full layout extent." + - `onBuildControls()` adds these as `controls_.addInt16("startX", startX_, 0, physW)` etc. (uses int16 control if available; if only uint8 exists today, add uint16 controls — see "Controls" below). + - In `rebuildLUT()` / `onAllocateMemory()`, when computing `width_/height_/depth_` from the layout, **honour the start/end fields** if they're non-default. With one Layer and defaults, the result is identical to today. + - Update `setLayoutGroup(LayoutGroup*)` to accept `Layouts*` (just a type rename — same pointer semantics). Keep the method name `setLayoutGroup` for one cycle, or rename to `setLayouts`. **Plan picks `setLayouts`** since we're renaming everything else anyway, and the inconsistency would be confusing. + +### main.cpp wiring + +Old: + +```cpp +auto* layoutGroup = create("LayoutGroup"); +auto* grid = create("GridLayout"); layoutGroup->addChild(grid); +auto* layer = create("Layer"); layer->setLayoutGroup(layoutGroup); +layer->addChild(create("NoiseEffect")); +layer->addChild(create("MirrorModifier")); +auto* driverGroup = create("DriverGroup"); driverGroup->setLayer(layer); +driverGroup->addChild(create("ArtNetSendDriver")); +driverGroup->addChild(create("PreviewDriver")); + +scheduler.addModule(layoutGroup); +scheduler.addModule(layer); +scheduler.addModule(driverGroup); +``` + +New: + +```cpp +auto* layouts = create("Layouts"); +auto* grid = create("GridLayout"); layouts->addChild(grid); + +auto* layersContainer = create("Layers"); +static_cast<Layers*>(layersContainer)->setLayouts(static_cast<Layouts*>(layouts)); +auto* layer = create("Layer"); +layersContainer->addChild(layer); +static_cast<Layer*>(layer)->setLayouts(static_cast<Layouts*>(layouts)); // happens via setLayouts too +layer->addChild(create("NoiseEffect")); +layer->addChild(create("MirrorModifier")); + +auto* drivers = create("Drivers"); +static_cast<Drivers*>(drivers)->setLayer(static_cast<Layer*>(layer)); // placeholder; composition follow-up will read from Layers +drivers->addChild(create("ArtNetSendDriver")); +drivers->addChild(create("PreviewDriver")); + +scheduler.addModule(layouts); +scheduler.addModule(layersContainer); +scheduler.addModule(drivers); +``` + +### Factory + display name + +- [src/main.cpp](src/main.cpp) `registerModuleTypes()`: + - `registerType<Layouts>("Layouts", "light/Layouts.md")` + - `registerType<Layer>("Layer", "light/Layer.md")` — unchanged + - `registerType<class mm::Layers>("Layers", "light/Layers.md")` — new + - `registerType<Drivers>("Drivers", "light/drivers/Drivers.md")` +- `ModuleFactory::displayNameFor` strips role-noun suffixes (`Effect`/`Modifier`/etc.). The new names `Layouts`, `Layers`, `Drivers` don't end with any of those, so they pass through unchanged — UI shows them as written. ✓ + +### UI side (none, mostly) + +The UI is module-driven — it renders whatever the tree says. `acceptsChildren` in [src/ui/app.js](src/ui/app.js) currently allows Effect+Modifier in Layer, Driver in DriverGroup, Layout in LayoutGroup. Update: +- `acceptsChildren` mapping: `"Layouts"` accepts Layout role; `"Layers"` accepts a single role (`Layer`) — but `Layer` isn't a role, it's a *concrete type*. So either (a) introduce a `Layer` role distinct from Generic, or (b) keep `Layers` accepting type-name `Layer` as a special case, or (c) have `Layers` accept Generic children. **Plan picks (a) — add `ModuleRole::Layer`** to the enum. It's a small change, makes the role chip emit `🚇` (or another emoji — the UI's `ROLE_EMOJI` map gains one entry), and the type picker filters correctly. + +Wait — that adds noise. Let me reconsider: + + - **(b) is the lightest:** `acceptsChildren` for `"Layers"` is hardcoded to `[Layer]` (by typeName, not role). The UI already special-cases this kind of containment via `acceptsChildren`. The role chip on each `Layer` card stays Generic (⚙️). Slightly cluttered emoji-wise but no role-enum change. + - **(a) is cleaner long-term:** add `ModuleRole::Layer` to the enum. The UI ROLE_EMOJI map gets a new entry (need to pick an emoji — 🪟 / 🎞️ / 🧱 are candidates, will ask the product owner). [check_specs.py](scripts/check/check_specs.py) might depend on the role list; verify. + + **Plan picks (a)** because we're already changing the shape and adding a role is cheaper than a special-case in the UI. **One emoji to pick during implementation.** + +### Spec updates + +- **[docs/moonmodules/light/Layer.md](docs/moonmodules/light/Layer.md)** — update intro to "renders into a buffer sized by either the full Layouts extent or a carved region (start/end controls)." Document the new `setLayouts` method. +- **[docs/moonmodules/light/Layouts.md](docs/moonmodules/light/Layouts.md)** — rename from `LayoutGroup.md`; class is `Layouts`. Body mostly unchanged (still describes the index-stitching). +- **[docs/moonmodules/light/Layers.md](docs/moonmodules/light/Layers.md)** — NEW. Describes the container: holds N Layers, runs each in order in `loop()`, future home of the composed-buffer logic. Single-line forward-reference to the composition follow-up. +- **[docs/moonmodules/light/drivers/Drivers.md](docs/moonmodules/light/drivers/Drivers.md)** — rename from `DriverGroup.md`; class is `Drivers`. +- **[docs/architecture-light.md](docs/architecture-light.md)** — update the pipeline diagram and any prose that names `LayoutGroup`/`DriverGroup`/singular `Layer`. The "UI integration (light domain)" tree shape gets `Layouts → Layers → Drivers` at the top level. +- **[docs/moonmodules/light/EffectBase.md](docs/moonmodules/light/EffectBase.md)** — passing reference: parent is still `Layer`, no change. +- **[docs/plan.md](docs/plan.md)** — add a `Multi-Layer composition (pending)` entry covering (a) compose, (b) per-Layer start/end carving activation. +- **[README.md](README.md)** — scan for module type names; update if any examples use `LayoutGroup`/`DriverGroup`. + +### Tests + +- **[test/test_grid_layout.cpp](test/test_grid_layout.cpp)** — references `LayoutGroup`; rename to `Layouts`. +- **Other tests using `LayoutGroup`/`DriverGroup`** — same. Likely test_extrude, test_mirror, test_preview_driver, scenarios. `grep -rln "LayoutGroup\|DriverGroup"` will find all. +- **New test: `test_layers_container.cpp`** — + - One Layers container with one Layer + one effect (RainbowEffect): produces same byte-for-byte buffer as the old single-Layer model. + - One Layers container with two Layers (each with one effect): both child loops run, both buffers are populated. Composition not tested (follow-up). +- **Scenarios** — [test/scenarios/*.json](test/scenarios) reference `LayoutGroup`/`DriverGroup` by type-name strings. Update each. Behaviour byte-identical with one Layer. + +### Migration (persisted config) + +[src/core/FilesystemModule.h](src/core/FilesystemModule.h) writes per-module JSON keyed by **typeName** (e.g. `/.config/LayoutGroup.json`). After rename: +- Either delete the old `.config/*.json` files at boot (easy but loses control values), or +- Add a one-time migration map in `FilesystemModule::load` (`LayoutGroup → Layouts`, etc.). + +**Plan picks: delete-and-warn.** On first boot after this commit, if `.config/LayoutGroup.json` exists, log a warning and delete it. Same for `DriverGroup.json`. The user's control values for these containers were near-zero (no per-instance controls today besides `enabled`), so loss is minimal. Saves implementing a migration framework for one commit. + +## Implementation order + +1. **Add `ModuleRole::Layer`** to [src/core/MoonModule.h](src/core/MoonModule.h). Update `roleName()`. Verify [scripts/check/check_specs.py](scripts/check/check_specs.py) doesn't have a hardcoded role list. Build to check for warnings. +2. **Rename `LayoutGroup` → `Layouts`** (class + file via `git mv` + factory key). Update all `#include`s, all `static_cast<LayoutGroup*>`, all references in tests + scenarios + spec. Build + run all tests; expect green (no behaviour change). +3. **Rename `DriverGroup` → `Drivers`** (same treatment). +4. **Add `class Layers`** in [src/light/Layers.h](src/light/Layers.h). Add `setLayouts()` to `Layer`. main.cpp creates `Layers` containing one `Layer`. Run all tests; live-verify with a desktop run that the pipeline still produces frames. +5. **Add `start/end` controls to `Layer`** — uint16 (or int16 if available) with sensible bounds. Default = whole layout. `rebuildLUT()` honours them when not at default. Update [test_layer*.cpp](test) and add a test asserting "Layer with default start/end matches old Layer behaviour byte-for-byte." +6. **UI emoji pick** for `ModuleRole::Layer` — ask the product owner. Add to `ROLE_EMOJI` map in [src/ui/app.js](src/ui/app.js). +7. **Update specs** ([Layer.md](docs/moonmodules/light/Layer.md), new [Layouts.md](docs/moonmodules/light/Layouts.md), new [Layers.md](docs/moonmodules/light/Layers.md), new [Drivers.md](docs/moonmodules/light/drivers/Drivers.md), [architecture-light.md](docs/architecture-light.md), [plan.md](docs/plan.md), [README.md](README.md) if needed). Run [check_specs.py](scripts/check/check_specs.py). +8. **Migration**: FilesystemModule deletes `.config/LayoutGroup.json` and `.config/DriverGroup.json` if present, logs a warning. +9. **All pre-commit gates 1–6** (build, ctest, scenarios, platform boundary, specs, ESP32). Reviewer agent (gate 7) after. + +## Verification checklist + +- [ ] `cmake --build build` — zero warnings, builds clean. +- [ ] `ctest` — all unit tests pass, including the new `test_layers_container.cpp` cases. +- [ ] `./build/test/mm_scenarios` — all scenarios pass (after their `LayoutGroup`/`DriverGroup` type-name updates). +- [ ] [check_platform_boundary.py](scripts/check/check_platform_boundary.py) — PASS. +- [ ] [check_specs.py](scripts/check/check_specs.py) — all specs ok. +- [ ] [build_esp32.py](scripts/build/build_esp32.py) — clean ESP32 build. +- [ ] Live desktop run: `/api/types` shows `Layouts`, `Layers`, `Drivers` (no longer `LayoutGroup`, `DriverGroup`). `/api/state` shows the new tree shape. Effects render correctly through the new wiring. Tick time within run-to-run jitter of the previous commit. +- [ ] UI side-nav reads `Layouts`, `Layers`, `Drivers`. Cards under `Layers` contain one `Layer` with effects+modifiers inside. Drag-reorder still works within each container. +- [ ] One snapshot ESP32 run verifies no regression — same scenario, same FPS within jitter. +- [ ] Reviewer agent (Opus) — PASS. + +## Open variations / decisions during implementation + +- **Emoji for `ModuleRole::Layer`** — product owner picks. Suggestions: 🪟 (layered glass), 🎞️ (film strip = sequential layers), 🧱 (brick = stacked). +- **Control type for `start/end`** — if uint8 only, range is 0–255 (fine for current grids up to 128). If int16/uint16 is available, use that. Check existing `Control` types — there's already a `Uint16` (used by `httpServer->port`). +- **Layout file rename or class-only rename?** Plan picks `git mv` for `Layouts.h` and `Drivers.h`. Reject if it makes the diff harder to review — fall back to class-rename-in-place. + +## Notes for the implementer + +- This is a **shape change with explicit no-behaviour-change goal** (composition is the follow-up). Every test should pass with byte-identical output to the previous commit, modulo the type-name strings in JSON config and scenarios. +- The new `Layers` container is **not** the right place to put extrude logic, blend logic, or buffer ownership today. Those stay on `Layer` (and on `Drivers`'s output buffer). Resist the temptation to "while we're here, also…" — that's the projectMM-priority bloat trap. +- `dynamic_cast` is disabled on ESP32 (RTTI off). Use `static_cast<Layer*>(child(i))` everywhere — same pattern as `Layouts::forEachCoord` does for `LayoutBase`. +- Per CLAUDE.md: this needs to be planned (this file), implemented in a feature branch (we're on `next-iteration`), tested, then product-owner-approved before commit. Pre-commit gates 1–6 are not optional. The reviewer agent must PASS. +- The plan should be saved as `docs/history/plan-NN.md` after implementation per CLAUDE.md's per-feature workflow. Numbering picks up from the latest in `docs/history/` (not the archived ones). diff --git a/docs/history/plans/Plan-20260524 - Release 1.0 distribution: web installer + GitHub Releases.md b/docs/history/plans/Plan-20260524 - Release 1.0 distribution: web installer + GitHub Releases.md new file mode 100644 index 0000000..9a47368 --- /dev/null +++ b/docs/history/plans/Plan-20260524 - Release 1.0 distribution: web installer + GitHub Releases.md @@ -0,0 +1,421 @@ +# Plan-17 — Release 1.0 distribution: web installer + GitHub Releases + +> **Post-implementation note.** Two divergences from the original draft: +> 1. Of the desktop matrix this plan called for, only the macOS arm64 binary ships in 1.0. The Windows x64 build failed in CI on the first source file because `src/platform/desktop/platform_desktop.cpp` uses POSIX socket headers (`sys/socket.h`, `sendmsg`, `fcntl`, …) that have no MSVC equivalent. The `build-windows` job and the `dist/projectMM-*.zip` upload are removed from `release.yml` until the Windows platform-layer port lands; see `docs/plan.md` "Windows desktop port". +> 2. The original draft's `esp_idf_version: v5.4` in `release.yml` fails to compile `platform_esp32.cpp` — the v5.x EMAC config has `emac_rmii_clock_gpio_t clock_gpio` (strong enum), v6 has `int clock_gpio`. Per the plan's risk-1 fallback, CI is pinned to the same v6.1-dev line the local project uses (`esp_idf_version: v6.1-dev` — the rolling Docker tag on `espressif/idf`). The plan's v5.4 references below are historical. +> +> Everything else in this plan ships as described: 4 ESP32 board variants, macOS arm64 desktop, install page on Pages, RC tag dry-run flow. + +## Context + +projectMM v3 ships today as "clone the repo and run MoonDeck." That works for developers but blocks the end user the README promises: "plug in your ESP32, open a browser, see lights." This plan delivers the missing pieces — pre-built binaries on GitHub Releases for 4 ESP32 board variants + macOS + Windows, an ESP Web Tools installer page on GitHub Pages, and a tag-triggered CI pipeline that produces and publishes everything. + +The shape is anchored on projectMM-v1's release flow (matrix CI → GitHub Releases) and on WLED's installer pattern (ESP Web Tools + per-variant manifests). What v3 picks up vs v1: ESP Web Tools (v1 didn't have it), board-selector dropdown (v1 didn't have multiple ESP32 binaries beyond dev + S3). What v3 defers vs v1 to 2.0: OTA, nightly channel, Linux desktop. + +Closed scope (decided before this plan, not revisited here): + +- **4 ESP32 board variants** keyed by chip + feature flags: `esp32` (classic, WiFi only), `esp32-eth` (classic, Ethernet only — WiFi compiled out, smaller image), `esp32-eth-wifi` (classic, Ethernet + WiFi both available), `esp32s3-n16r8` (ESP32-S3 N16R8, WiFi only). Eth variants bake in Olimex ESP32-Gateway pin defaults (LAN8720 @ MDIO 0, PHY RST GPIO 5). Boards with the same PHY but different pins (WT32-ETH01: reset on GPIO 16) need a local rebuild for 1.0; runtime PHY/pin selection is a 2.0 item. P4 → 2.0. +- **Distribution = GitHub Releases + ESP Web Tools page on GitHub Pages.** 4 manifests, manual board dropdown. No Improv WiFi (WiFi creds via the device's SoftAP fallback). +- **CI = single `release.yml`** triggered by `git tag v*` and `workflow_dispatch`. While iterating pre-1.0, also triggers on push to `main` / `next-iteration` so build breakage is caught before tagging; the release + Pages-deploy jobs stay gated on a tag ref. Remove the branches trigger once the pipeline is proven. No PR-CI, no nightly. +- **Desktop binaries = macOS arm64 + Windows x64.** No Linux, no macOS x64. +- **No OTA in 1.0.** Users re-flash via Web Tools when a new release lands. +- **Tag matches `library.json` version.** CI fails fast on drift — maintainer bumps version, commits, tags as one act. +- **MoonDeck developer flow unchanged.** `git clone` + `uv run scripts/moondeck.py` stays the dev bootstrap; the slow part is the prerequisite chain (uv, ESP-IDF), not the clone. + +## Architecture + +```text +git tag v1.0.0 + └─> .github/workflows/release.yml + ├─ verify-version (tag == library.json["version"]?) + ├─ build-esp32 (matrix: esp32, esp32-eth, esp32-eth-wifi, esp32s3-n16r8) + │ └─ scripts/build/build_esp32.py --board <key> + ├─ build-macos (macos-14, cmake Release, tar.gz) + ├─ build-windows (windows-latest, cmake/MSVC Release, zip) + └─ release + ├─ assemble per-board firmware bundles + ├─ generate manifest-<board>.json + ├─ gh release upload + └─ deploy docs/install/ → GitHub Pages + +docs/install/index.html + ├─ board dropdown → manifest-<board>.json + └─ <esp-web-install-button> flashes selected board +``` + +## Implementation steps + +Estimated total **11–13 h**. Bulk of risk lives in steps 1, 5, 10 — start there, iterate on RC tags until step 10 passes cleanly, then bump to `1.0.0`. + +### Step 1 — `build_esp32.py --board` (2–3 h) + +Wire a `--board` flag that selects sdkconfig fragments, sets the chip target, and implies the WiFi-on/off cascade. Board names are `chip[-feature[-feature]]` — recognisable from Espressif's own `IDF_TARGET` (chip part) and feature-flag suffixes everyone reads at a glance. + +**Board → sdkconfig + feature table:** + +| `--board` | IDF target | `SDKCONFIG_DEFAULTS` (semicolon-joined) | WiFi compiled in? | +|---|---|---|---| +| `esp32` | `esp32` | `sdkconfig.defaults` | yes | +| `esp32-eth` | `esp32` | `sdkconfig.defaults;sdkconfig.defaults.eth` | **no** (EXCLUDE_COMPONENTS + `MM_ETH_ONLY=1`) | +| `esp32-eth-wifi` | `esp32` | `sdkconfig.defaults;sdkconfig.defaults.eth` | yes | +| `esp32s3-n16r8` | `esp32s3` | `sdkconfig.defaults;sdkconfig.defaults.esp32s3-n16r8` | yes | + +**Why split the Eth lines out of `sdkconfig.defaults`:** + +The base file used to carry 7 Olimex-specific Eth lines (`CONFIG_ETH_USE_ESP32_EMAC=y` … `CONFIG_ETH_DMA_TX_BUFFER_NUM=10`). That's wrong for the `esp32` (WiFi-only) board — RMII GPIOs are tied up at link time, the boot log complains. Move all 7 Eth lines from the base file into a feature-named fragment `sdkconfig.defaults.eth`, leaving the base file genuinely WiFi-only. After the move, `esp32` needs no extra fragment file — it uses `sdkconfig.defaults` alone. + +**Naming note:** the Eth fragment is named for the feature (`.eth`), not the vendor — Olimex pins happen to be the default, but the fragment is the right place for a future PHY-runtime-config to read defaults from. The previous board-vendor-named `sdkconfig.defaults.olimex_gw` is renamed via `git mv` to `sdkconfig.defaults.eth`. The S3 fragment is renamed `sdkconfig.defaults.esp32s3_n16r8` → `sdkconfig.defaults.esp32s3-n16r8` (hyphen rather than underscore, matches the board key). Each S3 SKU keeps its own fragment because flash size, partition table, and PSRAM mode differ per SKU — flashing an `n16r8` binary onto a different module misaligns the partition table. + +**Files to edit:** + +- [scripts/build/build_esp32.py](../../scripts/build/build_esp32.py) — replace the `--profile` argument logic with a `BOARDS` dict + `--board` argument. `--profile` becomes a deprecated alias (`eth-only` → `esp32-eth`, `default` → `esp32`) for one release. Replace `profile_cmake_args()` with `board_cmake_args(board)`. +- [scripts/build/build_esp32_ethonly.py](../../scripts/build/build_esp32_ethonly.py) — forward `--board esp32-eth` instead of `--profile eth-only`. Kept for any external scripting that already calls the filename. +- [esp32/sdkconfig.defaults](../../esp32/sdkconfig.defaults) — remove the 7 Eth-block lines. File becomes board-neutral WiFi-default. +- [esp32/sdkconfig.defaults.eth](../../esp32/sdkconfig.defaults.eth) — renamed from `.olimex_gw`. Self-sufficient (carries every Eth setting the working Olimex build needs); comment names Olimex as the default pin map and points at the 2.0 PHY-runtime-config plan. +- [esp32/sdkconfig.defaults.esp32s3-n16r8](../../esp32/sdkconfig.defaults.esp32s3-n16r8) — renamed from `.esp32s3_n16r8` (hyphen instead of underscore — matches the board key). No content change. +- Profile-change marker: rename `esp32/build/.mm_profile` → `.mm_board`. Migrate on first run (if the legacy file exists, read it once, treat as the equivalent board, then write the new marker). +- [scripts/moondeck.py](../../scripts/moondeck.py) — add `extra_args` forwarding (3 lines) so a config entry can pass static flags to its script. +- [scripts/moondeck_config.json](../../scripts/moondeck_config.json) + [scripts/MoonDeck.md](../../scripts/MoonDeck.md) — replace the "Build" / "Build (Ethernet-only)" pair with four board buttons, each baking a `--board` arg via `extra_args`. +- [docs/moonmodules/core/NetworkModule.md](../moonmodules/core/NetworkModule.md) — update the Ethernet-only section to reference `--board esp32-eth`. + +### Step 2 — Version-drift guard (0.5 h) + +CI must verify `git tag == library.json["version"]` and fail before building. No CI write-back; maintainer-driven version bumps. + +**Files to create:** + +- [scripts/build/verify_version.py](../../scripts/build/verify_version.py) — short script that reads `GITHUB_REF_NAME` (without leading `v`) and `library.json` `"version"`, fails the workflow if they differ. + +**Action at release time:** + +Maintainer bumps `library.json` from `0.1.0` to `1.0.0`, commits, tags `v1.0.0`, pushes tag. + +### Step 3 — Desktop packaging (1.5 h) + +Static-link what we can; accept dynamic libc++ on macOS (Apple doesn't ship a static libc++.a). Use MSVC `/MT` on Windows to avoid the vcredist dependency. + +**Files to create / edit:** + +- [scripts/build/package_desktop.py](../../scripts/build/package_desktop.py) — new. Reads version from `library.json`, detects host platform, runs the right CMake invocation, packages. + - macOS arm64: `cmake -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_OSX_ARCHITECTURES=arm64 && cmake --build build --config Release`. Tarball as `dist/projectMM-macos-arm64-vX.Y.Z.tar.gz` with the binary + a short `README.txt`. + - Windows x64: `cmake -B build -G "Visual Studio 17 2022" -A x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded && cmake --build build --config Release`. Zip as `dist/projectMM-windows-x64-vX.Y.Z.zip`. +- [CMakeLists.txt](../../CMakeLists.txt) — gate the warning flags by compiler: + ```cmake + if(MSVC) + add_compile_options(/W4 /WX) + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>") + else() + add_compile_options(-Wall -Wextra -Werror) + endif() + ``` + +**Honest note on MSVC `/WX`:** the code is unlikely to be clean under `/W4` warning-as-error on first try (signed/unsigned conversions, `[[maybe_unused]]` discipline, `snprintf` warnings differ). **Fallback if a clean build is days away:** disable `/WX` on Windows for 1.0 and file a follow-up. The ESP32 firmware is the primary product; the desktop binary is a convenience. + +### Step 4 — Manifest generator (1 h) + +ESP Web Tools manifest format (one file per board, referenced by the install page). Offsets are **chip-family-specific**: + +- ESP32 (classic): bootloader at `0x1000` (4096). +- ESP32-S3: bootloader at `0x0` (0). The ROM expects it there — wrong offset bricks visibly. + +Don't hardcode the offset table — read from `esp32/build/flasher_args.json` produced by the build (it already contains the correct offsets per chip). The CI build job copies `flasher_args.json` alongside the bins; the manifest generator parses it. + +**Files to create:** + +- [scripts/build/generate_manifest.py](../../scripts/build/generate_manifest.py) — takes `--board <key> --version <ver> --release-url <url> --flasher-args <path> --out <path>`, writes the manifest JSON with parts ordered by offset. + +Schema: + +```json +{ + "name": "projectMM", + "version": "1.0.0", + "home_assistant_domain": "projectMM", + "new_install_prompt_erase": true, + "builds": [ + { + "chipFamily": "ESP32", + "parts": [ + { "path": "<release-url>/firmware-esp32-eth-v1.0.0-bootloader.bin", "offset": 4096 }, + { "path": "<release-url>/firmware-esp32-eth-v1.0.0-partition-table.bin", "offset": 32768 }, + { "path": "<release-url>/firmware-esp32-eth-v1.0.0-ota-data.bin", "offset": 57344 }, + { "path": "<release-url>/firmware-esp32-eth-v1.0.0.bin", "offset": 65536 } + ] + } + ] +} +``` + +### Step 5 — CI release workflow (4 h) + +**File: [.github/workflows/release.yml](../../.github/workflows/release.yml)** — new. + +Job graph: `verify-version` → (`build-esp32` matrix × 4, `build-macos`, `build-windows`) → `release`. The final job collects artifacts from all five build jobs (4 ESP32 + 2 desktop), generates manifests, uploads to the release, and deploys Pages. + +Key shape: + +```yaml +name: Release +on: + push: + tags: ['v*'] + workflow_dispatch: + inputs: + tag: { description: 'Tag (must already exist, e.g. v1.0.0)', required: true } + +jobs: + verify-version: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: python scripts/build/verify_version.py + + build-esp32: + needs: verify-version + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + board: [esp32, esp32-eth, esp32-eth-wifi, esp32s3-n16r8] + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v4 + with: + path: | + ~/.espressif + ~/esp/esp-idf + key: esp-idf-v5.4-${{ runner.os }} + - uses: espressif/esp-idf-ci-action@v1 + with: + esp_idf_version: v5.4 + target: ${{ startsWith(matrix.board, 'esp32s3') && 'esp32s3' || 'esp32' }} + path: 'esp32' + command: python ../scripts/build/build_esp32.py --board ${{ matrix.board }} + - name: Stage artifacts + run: | + mkdir -p dist + V=$(jq -r .version library.json) + B=esp32/build + cp $B/projectMM.bin dist/firmware-${{ matrix.board }}-v$V.bin + cp $B/bootloader/bootloader.bin dist/firmware-${{ matrix.board }}-v$V-bootloader.bin + cp $B/partition_table/partition-table.bin dist/firmware-${{ matrix.board }}-v$V-partition-table.bin + cp $B/ota_data_initial.bin dist/firmware-${{ matrix.board }}-v$V-ota-data.bin + cp $B/flasher_args.json dist/flasher-${{ matrix.board }}.json + - uses: actions/upload-artifact@v4 + with: { name: esp32-${{ matrix.board }}, path: dist/ } + + build-macos: + needs: verify-version + runs-on: macos-14 + steps: + - uses: actions/checkout@v4 + - run: python scripts/build/package_desktop.py + - uses: actions/upload-artifact@v4 + with: { name: desktop-macos, path: dist/ } + + build-windows: + needs: verify-version + runs-on: windows-latest + steps: + - uses: actions/checkout@v4 + - run: python scripts/build/package_desktop.py + - uses: actions/upload-artifact@v4 + with: { name: desktop-windows, path: dist/ } + + release: + needs: [build-esp32, build-macos, build-windows] + runs-on: ubuntu-latest + permissions: { contents: write, pages: write, id-token: write } + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: { path: artifacts } + - name: Flatten artifacts + run: mkdir -p dist && find artifacts -type f -exec mv {} dist/ \; + - name: Generate manifests + run: | + V=$(jq -r .version library.json) + BASE=https://github.com/${{ github.repository }}/releases/download/v$V + for B in esp32 esp32-eth esp32-eth-wifi esp32s3-n16r8; do + python scripts/build/generate_manifest.py \ + --board $B --version $V --release-url $BASE \ + --flasher-args dist/flasher-$B.json --out dist/manifest-$B.json + done + - name: Stage GitHub Pages + run: | + mkdir -p pages/install + cp -r docs/install/* pages/install/ + cp dist/manifest-*.json pages/install/ + - uses: softprops/action-gh-release@v2 + with: + files: | + dist/firmware-*.bin + dist/manifest-*.json + dist/projectMM-*.tar.gz + dist/projectMM-*.zip + fail_on_unmatched_files: true + - uses: actions/upload-pages-artifact@v3 + with: { path: pages } + - uses: actions/deploy-pages@v4 +``` + +ESP-IDF caching at `~/.espressif` + `~/esp/esp-idf` is ~2 GB, well under the 10 GB repo cache cap. First run ~10 min, subsequent restores ~30 s. + +### Step 6 — Installer page (1.5 h) + +WLED-style minimal page. URL after Pages deployment: `https://ewowi.github.io/projectMM/install/`. + +**Files to create:** + +- [docs/install/index.html](../install/index.html) — board dropdown + `<esp-web-install-button>`. On dropdown change, swap the `manifest` attribute on the button. Use the unpkg-hosted ESP Web Tools v10. +- [docs/install/README.md](../install/README.md) — one-paragraph note explaining the manifests are *generated per-release* by `release.yml`, not committed to git. Cloners won't see them locally. + +Page structure: + +```html +<label for="board">Board:</label> +<select id="board"> + <option value="esp32">ESP32 — WiFi only</option> + <option value="esp32-eth">ESP32 — Ethernet only (Olimex pins)</option> + <option value="esp32-eth-wifi">ESP32 — Ethernet + WiFi (Olimex pins)</option> + <option value="esp32s3-n16r8">ESP32-S3 DevKitC-1 (N16R8) — WiFi only</option> +</select> +<esp-web-install-button id="installer" manifest="manifest-esp32.json"></esp-web-install-button> +``` + +After-flash UX text: "The device boots a SoftAP named `projectMM-xxxx`. Join it, open `http://4.3.2.1`, enter your WiFi credentials." + +### Step 7 — Enable GitHub Pages (manual, 0.25 h) + +One-time repo setting: **Settings → Pages → Source: GitHub Actions**. The `deploy-pages` action in `release.yml` publishes. No code change. + +### Step 8 — README + building.md (0.75 h) + +- [README.md](../../README.md) — replace the "From a release" subsection with two crisp paragraphs: ESP32 flash via the installer URL, desktop binaries via the Releases page. Drop the Teensy / RPi / Linux desktop bullets — those aren't shipped in 1.0. They go back in when the binaries exist. +- [docs/building.md](../building.md) — replace the "Build profiles" subsection with a "Boards" table mirroring step 1's table. Drop the obsolete `--profile` doc (or fold into the `esp32-eth` row note). + +### Step 9 — plan.md → 2.0 stub (0.25 h) + +- [docs/plan.md](../plan.md) — replace the "Release 1.0" milestone section with a forward-looking "Release 2.0" section: + - ESP32-P4 board variant. + - OTA / FirmwareUpdateModule (passive-observer pattern from v1). + - Linux desktop binary. + - Nightly CI / pre-release channel. + - Improv WiFi for one-step flash-then-credentials. +- Strike item 13 (README quick-start) and the Release 1.0 milestone — both subsumed by this plan. + +### Step 10 — End-to-end dry run (1 h) + +Three test surfaces stacked from cheapest to most production-like. Each catches +problems the next one would also catch, but later and at higher cost. + +**Test surface 1 — Local C+ recipe.** Documented in +[`docs/install/README.md`](../install/README.md) § "End-to-end with CI-built +firmware". Runs against the latest branch CI artifacts. Catches manifest +schema errors, page-render bugs, real Web Serial flash against a real binary +per board. Zero CI minutes, zero release-page noise. Run this *before* any RC +tag — it's a 5-minute loop. Doesn't exercise GitHub Pages deploy or the +gh-release action. + +**Test surface 2 — RC tag (`vX.Y.Z-rcN`).** The full release pipeline minus +the Pages publish. Releases land as **pre-releases** (marked with the +GitHub "Pre-release" badge, sorted below stable releases, not picked up by +"latest" tooling). The Pages deploy step is skipped on RC tags so end users +visiting the installer URL keep seeing the previous stable release. Catches +everything surface 1 misses except the live Pages flip. Iterate `rcN → rcN+1` +as needed — RC releases are cheap to delete. + +**Test surface 3 — Stable tag (`vX.Y.Z`).** The real release. Pages flips to +publish the installer page from this tag's manifests. Only run when surfaces +1 and 2 are clean. + +**Procedure for the first 1.0:** + +1. **Surface 1.** Push the branch, wait for branch CI green, run the local + C+ recipe with the new artifacts. Flash each of the four boards locally. + Fix any issues in source and repeat. +2. **Surface 2.** When the local recipe passes: + - Bump `library.json` to `1.0.0-rc1`. Commit. Tag `v1.0.0-rc1`. Push tag. + - Watch Actions: `verify-version` + 4 ESP32 jobs + 2 desktop jobs + release + job all green. (Pages staging + deploy steps are correctly *skipped*.) + - Visit the release page: 22 files (4×4 ESP32 bins + 4 manifests + macOS + tarball + Windows zip). The release is flagged "Pre-release". + - Manually point the local install page at the rc1 release URLs (edit the + manifest's `release-url` and regenerate), flash each board, confirm the + UI loads. + - Issues found? Delete the RC: `gh release delete v1.0.0-rc1 --yes && git + push --delete origin v1.0.0-rc1`. Fix on the branch, bump `library.json` + to `1.0.0-rc2`, tag, push. Repeat until clean. +3. **Surface 3.** When the RC is fully green: + - Bump `library.json` from `1.0.0-rcN` to `1.0.0`. Commit. Tag `v1.0.0`. Push. + - Watch Actions: everything green, **including** Pages staging + deploy + (no longer skipped because the tag has no `-rc`). + - Visit `https://ewowi.github.io/projectMM/install/`. Page loads, dropdown + has 4 options. The flash path uses the stable v1.0.0 release URLs. + - Optional cleanup: delete the leftover RC tags + pre-releases. + +If any step fails on a real board: don't ship. The installer page exists +precisely so end users don't need to read serial logs — its first-flash +experience has to be reliable. + +## Per-release criteria + +These run as the per-release additions to CLAUDE.md's Event 3 (Release tag) gates, on top of the always-on items (PR-merge gates passed, hardware test, no known critical bugs). + +1. **Principles audit.** Sweep `docs/` (excluding `docs/plan.md` and `docs/history/`) and `src/` for present-tense violations and forward-looking language — "roadmap", "will be", "in the future", "planned", "todo", "currently lacks" outside the allowed locations. The reviewer agent can run this; a one-line `grep -rn "TODO\|will be\|going to\|in the future" docs/ src/` gives a starting list. Acceptable hits get one-line justifications; the rest get rewritten present-tense or moved to `docs/plan.md` / `docs/history/`. +2. **All Principles in CLAUDE.md** verified end-to-end: common patterns first (no bespoke conventions sneaking in), minimalism (nothing earning its place got added without paying for itself), data over objects, concrete first, domain-neutral core, present tense. +3. **Cross-board flash test.** All four ESP32 board variants flashed from the installer page on actual hardware (or two variants × the boards available — see dry-run § Step 10). +4. **Branch CI cleanup decision.** If the pre-1.0 `push.branches` trigger in `release.yml` has earned removal (the build path is proven across N tags), strike the `branches:` block before tagging 1.0. Otherwise carry it forward with a comment refresh. + +## Critical files to be modified or created + +**New:** + +- [.github/workflows/release.yml](../../.github/workflows/release.yml) +- [scripts/build/package_desktop.py](../../scripts/build/package_desktop.py) +- [scripts/build/generate_manifest.py](../../scripts/build/generate_manifest.py) +- [scripts/build/verify_version.py](../../scripts/build/verify_version.py) +- [docs/install/index.html](../install/index.html) +- [docs/install/README.md](../install/README.md) + +**Edited:** + +- [scripts/build/build_esp32.py](../../scripts/build/build_esp32.py) — add `--board`, deprecate `--profile`. +- [scripts/build/build_esp32_ethonly.py](../../scripts/build/build_esp32_ethonly.py) — forwards to `--board esp32-eth`. +- [esp32/sdkconfig.defaults](../../esp32/sdkconfig.defaults) — drop the 7 Eth lines. +- [esp32/sdkconfig.defaults.eth](../../esp32/sdkconfig.defaults.eth) — renamed from `.olimex_gw`. Self-sufficient (carries the full Olimex pin set). +- [esp32/sdkconfig.defaults.esp32s3-n16r8](../../esp32/sdkconfig.defaults.esp32s3-n16r8) — renamed from `.esp32s3_n16r8`. No content change. +- [CMakeLists.txt](../../CMakeLists.txt) — MSVC-gated warning flags + static MSVC runtime. +- [library.json](../../library.json) — bump `0.1.0` → `1.0.0` at release time. +- [scripts/moondeck.py](../../scripts/moondeck.py) — `extra_args` forwarding. +- [scripts/moondeck_config.json](../../scripts/moondeck_config.json) + [scripts/MoonDeck.md](../../scripts/MoonDeck.md) — four board buttons. +- [docs/moonmodules/core/NetworkModule.md](../moonmodules/core/NetworkModule.md) — `--board esp32-eth` reference. +- [README.md](../../README.md) — Quick Start with installer URL. +- [docs/building.md](../building.md) — boards table. +- [docs/plan.md](../plan.md) — Release 1.0 → 2.0 stub. + +**Manual:** + +- Repo Settings → Pages → Source: GitHub Actions. + +## Verification + +- **Local board builds:** `python scripts/build/build_esp32.py --board esp32`, `--board esp32-eth`, `--board esp32-eth-wifi`, and `--board esp32s3-n16r8` all complete with zero warnings. Each writes `esp32/build/projectMM.bin` + `flasher_args.json` for the right chip. +- **Local desktop:** `python scripts/build/package_desktop.py` on macOS produces a tarball under `dist/` that runs on a fresh Mac. +- **Local installer (surface 1):** the C+ recipe in [`docs/install/README.md`](../install/README.md) — pull branch CI artifacts with `gh run download`, serve locally, flash each of the four boards over USB through the install page. +- **RC dry run (surface 2):** push `v1.0.0-rcN` tag, all workflow jobs green, pre-release page populated with 22 files, Pages staging + deploy correctly skipped, manual flash per board succeeds against the rcN release URLs. +- **Real release (surface 3):** tag `v1.0.0`, Pages flips, the live `https://ewowi.github.io/projectMM/install/` flashes each board cleanly. + +## Risks and unknowns + +1. **MSVC `/WX` cleanliness.** Likely a few warning fixes needed before Windows binary builds. Acceptable fallback: drop `/WX` for 1.0, file follow-up. +2. **ESP-IDF version pin (v5.4 vs the project's v6.x-dev SHA).** Project uses some v6-era APIs (`esp_eth_phy_new_generic`, mDNS component manager). If v5.4 doesn't compile, fall back to manual ESP-IDF checkout at the exact SHA via a CI step before `esp-idf-ci-action`. +3. **`<esp-web-install-button>` manifest swap on dropdown change.** Some versions cache the parsed manifest. Mitigation: if `setAttribute` doesn't pick up the new value live, recreate the element on dropdown change. +4. **GitHub Pages CORS for binaries on `objects.githubusercontent.com`.** Should be `Access-Control-Allow-Origin: *` already — WLED, ESPHome, dozens of projects use this exact pattern. Verify in the RC dry-run by manually rewriting the local install page's manifest to the rcN release URLs and flashing through it; if it fails, host binaries on the gh-pages branch instead of release assets. +5. **macOS arm64 Gatekeeper warning.** Unsigned binary triggers "downloaded from internet, allow?" on first run. Document in release notes for 1.0; code-signing is 2.0+ work. +6. **`new_install_prompt_erase: true` wipes saved config.** Right default for a beta product (avoids stale-config bugs). Document; revisit when the config schema is stable. + +## Notes + +- Per CLAUDE.md per-feature workflow, this plan is saved as `docs/history/plan-17.md` at the start of implementation. +- Per CLAUDE.md gate-3 rule, plan reconciliation lands on the branch before the merge commit. This branch already carries pending `decisions.md` + plan-archive moves from the previous merge — the implementer should let them ride in the same commit train as plan-17's work. diff --git a/docs/history/plans/Plan-20260525 - Release-channel picker + first-boot WiFi provisioning.md b/docs/history/plans/Plan-20260525 - Release-channel picker + first-boot WiFi provisioning.md new file mode 100644 index 0000000..d3e237e --- /dev/null +++ b/docs/history/plans/Plan-20260525 - Release-channel picker + first-boot WiFi provisioning.md @@ -0,0 +1,509 @@ +# Plan-18 — Release-channel picker + first-boot WiFi provisioning + +> **Post-implementation note.** Plan-18 shipped all three tracks (OTA, web installer, Improv) as described. The branch carried six unplanned follow-up plans that landed in the same merge: +> +> - **Plan-19 — MoonDeck ESP32 tab refresh.** Replaced the dead "Chip" dropdown with a Firmware-variant picker; collapsed four "Build esp32-X" buttons into one parameterised Build; moved Setup above the dropdowns; separated Flash from Build with the Port dropdown between them; added a "destructive" confirm flag; fixed the `?` help-anchor renderer to emit `<h3 id="…">` so deep links land on the right section. +> - **Plan-19.1 — Per-target build directories.** `build/esp32-<board>/` + `build/<host>/`. Each board has its own build dir; switching boards is free (no clean rebuild). Mirrors the deployment layout the release workflow already used (`dist/firmware-<board>-v<ver>.bin`). A follow-up commit added `-DSDKCONFIG=…/sdkconfig` to keep per-build-dir sdkconfigs isolated from each other (`esp32/sdkconfig` at the project root no longer exists). +> - **Plan-20 — Web installer end-user features.** "Your devices" card backed by `localStorage`, with Visit / Erase / Forget buttons. Erase reuses ESP Web Tools' `erase-first` install button. Diagnose intentionally moved to the device UI (same-origin nav-footer link) because Chrome's mixed-content blocker prevents an HTTPS Pages page from fetching `http://<device>/api/state`. +> - **Plan-21 — Improv as a child of Network module.** Attempted, reverted same session. The architectural shape is right but crosses load-bearing infrastructure: `Scheduler::tick()` only walks top-level modules for `loop20ms`/`loop1s`, so a child module's tick callbacks silently disappear. Carved out for a future plan that fixes the scheduler/MoonModule chain first. +> - **Plan-22 — Nightly builds.** `.github/workflows/nightly.yml` cron'd at 04:00 UTC tags `nightly-YYYY-MM-DD` if `main` HEAD has moved since the last nightly, prunes nightly releases older than 7 days. Reuses `release.yml` via tag push — zero duplication of build matrix or Pages logic. `verify_version.py` learned to skip the library.json check on `nightly-*` tags (they're snapshot labels, not semver). +> - **Plan-23 — Split `platform_esp32.cpp` by subsystem.** 1281 lines → 700 (core) + 3 sibling files (FS, OTA, Improv). Network stayed in the core file because Eth + WiFi + sockets + mDNS share file-scope state — splitting would need an internal header with `extern` declarations or a singleton refactor. Desktop's `platform_desktop.cpp` deliberately stayed in one file; its OTA/Improv/FS stubs are 6 lines each. Asymmetry is intentional. +> +> The branch also carried a real bug fix the user surfaced mid-implementation: `NetworkModule::setWifiCredentials` did `wifiStaInit` directly without first stopping the AP-mode driver, so the `IP_EVENT_STA_GOT_IP` handler never registered and the state machine sat in limbo. AP→STA tear-down now runs explicitly in `setWifiCredentials`. The bug had been masked because the only callers were the credential-entry-then-reboot UI flow (reboot hides it) — Improv's "set credentials on a running device" flow exposed it. + +## Context + +projectMM has three installer surfaces, two of which don't exist in v3 yet: + +1. **Web installer** at `https://ewowi.github.io/projectMM/install/` — first-flash, browser does the work via Web Serial. Bound to one release at deploy time today (plan-17's design). +2. **On-device OTA installer** — re-flash after the device is running. **Missing entirely in v3.** projectMM-v1 had it (`FirmwareUpdateModule` + `/api/firmware/url`). +3. **First-boot WiFi provisioning** — the flow that gets credentials onto a freshly-flashed device. Today it's "device boots SoftAP at `4.3.2.1`, user joins from a phone, opens the UI, types creds, reboots." Five friction steps, one of which (joining the AP from a phone) is genuinely confusing for non-technical users. **Missing in v3** — and v1 didn't have a polished version either; v1 used a deploy-time partition-baking script (`deploy/wifi.py` + `deploy/flashfs.py --wifi`) that's useful for racks of devices over USB but doesn't help an end user with one board. + +Plan-18 builds all three with a coherent identity: + +- **A shared release-channel picker JS module** powers both installers (Tracks 1+2): visitor (browser, or device-UI tab) picks **Stable** or **Pre-release (beta)** → release → board → click Install. Same code, two surfaces, one mental model. +- **Improv WiFi over USB-serial** (Track 3) handles first-boot provisioning. Browser drives the protocol immediately after a flash via ESP Web Tools; a Python CLI (`scripts/build/improv_provision.py`) drives it for headless / rack / CI use. Same protocol, two transports. + +**Step 0 of v1 of this plan empirically falsified plan-17 risk #4**: GitHub release-asset URLs (both `github.com/.../releases/download/` and the `release-assets.githubusercontent.com` redirect target) return **no `Access-Control-Allow-Origin` headers**. Cross-origin browser fetches are blocked. WLED works around this with a third-party CORS proxy (`proxy.corsfix.com`); ESPHome self-hosts every binary; projectMM-v1 sidestepped the problem entirely by not having a web installer. v3 chooses **self-host on Pages** (Option 1 from the CORS replan discussion): the release workflow stages the last 5 stable + 5 prerelease releases' binaries into Pages content. End-of-line origin = same as the install page, no CORS. + +The OTA installer **does not have a CORS problem** — the device's ESP-IDF HTTPS client (`esp_https_ota`) has no Same-Origin Policy, just GETs the URL and writes bytes to the OTA partition. This asymmetry is why v1 has only the OTA flavour (easier). Plan-18 captures both: the *picker UX* is shared, the *binary fetch path* differs (browser-fetches-self-hosted vs device-fetches-GitHub). + +Closed scope (locked by product owner): + +- **Three tracks in one PR.** OTA → web installer → Improv, in that delivery order. All ship under plan-18. +- **Shared JS module at `src/ui/release-picker.js`**. Embedded into device builds via the existing `embed_ui.cmake` pipeline. Imported via `<script type="module">` in `docs/install/index.html` (same file, two consumers). +- **OTA compatibility filter**: device shows releases whose board is compatible with `MM_BOARD_NAME`. Bespoke rule, documented inline: strip `-eth*` suffix from both sides; matching identities are mutually compatible. So `esp32` / `esp32-eth` / `esp32-eth-wifi` are mutually compatible; `esp32s3-n16r8` is only itself. +- **Web installer CORS solution**: self-host last 5 stable + 5 prerelease releases on Pages. Release workflow stages binaries into the Pages artifact cumulatively (fetch-fresh-from-GitHub-Releases on each deploy; "Pattern D" from the Explore agent's research). +- **Smart default**: most-recent stable; fall through to most-recent prerelease if none exists. +- **RC visibility**: always shown in the channel dropdown, labelled "Pre-release (beta)". No URL gating. +- **Per-release board list**: derived from the release's `manifest-<board>.json` assets (strict regex parse). +- **Caching**: sessionStorage, 5-minute TTL. Dev escape hatch: `?nocache=1`. +- **Yanked releases**: delete the GitHub release; API stops returning it; cumulative-stage step trims it on next deploy. No special UI. +- **Two dropdowns: release + board.** Single flat release dropdown listing every release newest-first; RCs flagged with a `(beta)` suffix on the option text (e.g. `v1.0.0-rc1 (beta) — 12 hours ago`). Smart default selects the newest stable; falls through to the newest prerelease if none exists. **Reconciled mid-implementation**: the original plan called for a separate channel select + `<details>` "Pick specific release" expand, but the two-axis layout (stability vs version) was confusing; the single flat dropdown is simpler, the compatibility filter on the board step still does the protection work, and an RC remains visually distinct via the suffix + colour. +- **Relative-time display** ("2 days ago") next to each release. +- **`app.js` becomes a module** (`<script type="module">`) to import `release-picker.js` cleanly. Single-attribute HTML change. +- **File-upload OTA route (`POST /api/firmware`) is skipped.** Picker drives `/api/firmware/url` only. v1 had both; v3 doesn't need the file-picker affordance on day one. +- **RC-tag Pages skip is removed.** With cumulative content, the install page is the canonical "all our releases" surface; Stable-by-default already protects naive end users. Pages publishes on every tag including RC. + +Track 3 (Improv) closed scope: + +- **Library**: `improv/improv` (v1.2.5) as an ESP-IDF managed component, fetched from the ESP Component Registry. ~10 KB of bundled library + cert-free serial protocol; reuses the existing mbedTLS bundle plan-18 already added for OTA. (Original plan said `improv-wifi/sdk-cpp` — that name was the GitHub repo coordinate, not the Registry coordinate; the Registry uses `improv/improv` and that's what ships in `idf_component.yml`.) +- **Serial source**: UART0 only on every board. ESP32-S3-DevKitC-1's UART USB port works (UART0 routed to the on-board USB-UART bridge); the S3's native USB-Serial-JTAG port doesn't. AP-fallback remains the only path for users with USB-CDC-only connections. +- **Lifecycle**: always-on listener, no task suspension. Provision requests are rejected (with Improv's wrong-state error frame) when `platform::wifiStaConnected() == true`; scan + info requests stay available so a browser can identify a running device. +- **What it surfaces**: one read-only `provision_status` Control matching `FirmwareUpdateModule`'s shape. No buttons, no re-provision affordance — the protocol is the entry point. +- **Rack / CI mode**: `scripts/build/improv_provision.py` — pyserial CLI speaking the Improv protocol. Single-port mode today (`--port + --ssid + --password`); a future `--from-list <devicelist.json>` mode is a separate plan once v3 has a devicelist schema. +- **AP-fallback flow stays unchanged.** Improv adds a third credential-entry path alongside the AP fallback UI and the persistence-loaded values; all three converge on `NetworkModule::ssid_` / `password_`. + +Prior art (the design isn't bespoke): + +- **projectMM-v1 OTA**: `projectMM-v1/src/modules/system/FirmwareUpdateModule.h` (MoonModule with `update_status` + `update_pct` display controls), `projectMM-v1/src/core/OtaState.h` (file-scope statics), `projectMM-v1/src/core/AppRoutes.cpp:174-210` (`POST /api/firmware/url`), `projectMM-v1/src/frontend/app.js:1235-1410` (release-listing JS with sessionStorage cache + prerelease filter). Plan-18 ports the architecture, not the code — same shape, v3 idioms. +- **WLED installer at `install.wled.me`**: cross-origin release-asset fetch via `proxy.corsfix.com`. Plan-18 rejects this for the third-party dependency cost. +- **ESPHome at `web.esphome.io`**: self-hosts every binary in its Pages site. Plan-18's Track 2 follows this shape directly. + +## Architecture + +```text + src/ui/release-picker.js (the shared module) + │ + ┌──────────┴────────────┐ + │ │ + Device UI (OTA) Web installer + src/ui/app.js docs/install/index.html + imports inline imports as <script type="module"> + │ │ + Install click: Install click: + POST { url } to setAttribute('manifest', url) + /api/firmware/url on <esp-web-install-button> + (device fetches (ESP Web Tools flashes via Web Serial) + binary via HTTPS; + no CORS) + │ │ + ▼ ▼ + api.github.com docs/install/releases/<tag>/*.bin + /repos/.../releases (Pages-hosted; release workflow + (CORS OK for API) stages new release + retains last + 5 stable + 5 prerelease) +``` + +## Implementation steps + +Two tracks, sequential within one PR. Track 1 must be hardware-verified before Track 2 touches the release workflow. Total estimated **~10 h**. + +### Phase 0 — Shared groundwork (1.5 h) + +**Step 0.1 — Read v1 prior art (0.5 h).** Read `projectMM-v1/src/frontend/app.js:1235-1410`, `projectMM-v1/src/modules/system/FirmwareUpdateModule.h`, `projectMM-v1/src/core/AppRoutes.cpp:174-210` end-to-end. Confirm sessionStorage shape, the `per_page` choice, relative-time rendering. Read-only. + +**Step 0.2 — Add `hasOta` platform flag (0.25 h).** Add `constexpr bool hasOta = true;` to [src/platform/esp32/platform_config.h](src/platform/esp32/platform_config.h) and `= false;` to [src/platform/desktop/platform_config.h](src/platform/desktop/platform_config.h). Mirror the existing `hasWiFi` / `hasEthernet` pattern. + +**Step 0.3 — Extend `mm::platform` with OTA primitives (0.75 h).** Add to [src/platform/platform.h](src/platform/platform.h) in the network section: + +```cpp +// OTA — ESP32 only; desktop stubs return false from every entry point. +// Out-params drive Control buffers polled at 1 Hz by FirmwareUpdateModule. +bool ota_begin(size_t imageSize); +bool ota_write(const uint8_t* data, size_t len); +bool ota_end(); +bool http_fetch_to_ota(const char* url, + char* statusBuf, size_t statusBufLen, + uint8_t* pctOut); +``` + +`ota_begin/write/end` are kept available even though the file-upload route is skipped for this PR — they're a clean abstraction over `esp_ota_*` and may serve a future use (a debug-only local-file-upload affordance, for instance). + +### Track 1 — On-device OTA module (5.5 h, ships verified before Track 2) + +**Step 1.1 — Build `src/ui/release-picker.js` (2.5 h, the load-bearing piece).** Create the new file. Self-contained ES module exporting one symbol: + +```js +export const releasePicker = { + init({ container, ownBoardKey, onInstall }) +}; +``` + +- `container`: DOM element to mount into. +- `ownBoardKey`: device's `MM_BOARD_NAME` (compatibility filter on); `null` = web installer (no filter). +- `onInstall(board, manifestUrl, binaryUrl)`: callback fired when user clicks Install. The picker does NOT decide *how* to install — caller's concern. + +Sectioned the same way [src/ui/app.js](src/ui/app.js) is sectioned (hand-maintained, comment headers per section): + +1. `fetchReleases()` — GET `https://api.github.com/repos/ewowi/projectMM/releases?per_page=10`, sessionStorage cache keyed on URL, 5-minute TTL. Returns normalised `[{tag, name, publishedAt, isPrerelease, assets:[{name,url}]}]`. +2. `parseBoardsFromAssets(assets)` — for each `manifest-<board>.json` asset, find the matching `firmware-<board>-v<ver>.bin` asset, return `[{board, manifestUrl, binaryUrl}]`. +3. `isCompatible(ownBoard, candidateBoard)` — strip `-eth*` from both; equal identities = compatible; null `ownBoard` = always compatible. **Bespoke**, one-line comment explains the rule. +4. `relativeTime(iso)` — `Intl.RelativeTimeFormat` + small diff-to-unit helper, ~15 lines, no library. +5. `render()` — channel select, release select (filtered by channel + compatibility), board select (per-release compatible boards), Install button. Smart default per the locked decisions. +6. Wire Install → `opts.onInstall(...)`. + +**Common-patterns check**: ES-module shape, recognisable from Lit / vanilla web components. No bespoke framework choices. + +**Risk**: medium. Single piece of load-bearing code for both tracks; a bug = bug everywhere. Mitigate with strict no-external-state design and identical sectioning to `app.js`. + +**Step 1.2 — Wire the JS into the UI embedding pipeline (0.5 h).** Extend [src/ui/embed_ui.cmake](src/ui/embed_ui.cmake) to read `release-picker.js`, emit a `releasePickerJs[]` byte array + length. Add `release-picker.js` to the `DEPENDS` lists in both [CMakeLists.txt](CMakeLists.txt) (root) and [esp32/main/CMakeLists.txt](esp32/main/CMakeLists.txt). Add a `/release-picker.js` route to [src/core/HttpServerModule.cpp](src/core/HttpServerModule.cpp)'s GET ladder (one extra branch next to the existing `/app.js` route). + +**Step 1.3 — `FirmwareUpdateModule.h` (0.5 h).** Create [src/core/FirmwareUpdateModule.h](src/core/FirmwareUpdateModule.h), header-only, mirrors v1's pattern + v3's [SystemModule.h](src/core/SystemModule.h) idioms: + +- Two `controls_.addReadOnly` controls: `update_status` (char[64]), `update_pct` (uint8_t). +- File-scope statics `g_otaStatus[64]` / `g_otaPct` in an anonymous namespace inside the .h. Routes write these; `loop1s()` polls and copies into the bound buffers. +- `respectsEnabled() { return false; }` (diagnostics keep running). +- Registered in `main.cpp` alongside SystemModule. + +**Step 1.4 — Platform OTA implementation (1.0 h).** In [src/platform/esp32/platform_esp32.cpp](src/platform/esp32/platform_esp32.cpp): implement the four functions from step 0.3. + +- Include `<esp_ota_ops.h>`, `<esp_https_ota.h>`. +- `http_fetch_to_ota`: configure `esp_https_ota_config_t` with `esp_crt_bundle_attach` so api.github.com / objects.githubusercontent.com TLS works without a baked cert. Loop on `esp_https_ota_perform`, update `*pctOut` from `esp_https_ota_get_image_len_read` / total. **Blocking call** — the route in step 1.5 spawns a FreeRTOS task. + +In [src/platform/desktop/platform_desktop.cpp](src/platform/desktop/platform_desktop.cpp): stub the four functions returning `false` and writing "unsupported" into `statusBuf`. + +**Risk**: medium. `esp_https_ota_perform` task scheduling is the main hazard; getting the task pinned to a sensible core + stack size matters. Reference v1's `pal::http_fetch_to_ota` for the working numbers. + +**Step 1.5 — `POST /api/firmware/url` route (1.0 h).** Add to [src/core/HttpServerModule.cpp](src/core/HttpServerModule.cpp). Gated `if constexpr (mm::platform::hasOta)`; desktop returns 501. Handler: + +1. Parse JSON `{"url":"..."}` using the existing JSON-string extraction helpers (see `handleSetControl` patterns at line 428). +2. Validate URL shape (`http://` or `https://`). +3. `xTaskCreate` a one-shot task that calls `pal::http_fetch_to_ota`. Task updates `g_otaStatus` "downloading" → "flashing" → "rebooting", and `g_otaPct`. On success, `pal::reboot()`. +4. Return `202 Accepted` immediately. UI polls `update_status` via the existing WS state push. + +**Risk callout — complexity**: HttpServerModule.cpp already carries 24+ lizard warnings (from plan-17). Keep the new function ≤20 lines; pull body-parsing into the existing pattern; don't refactor the surrounding GET/POST ladder. If lizard flags the new code as adding to the worst-offender list, split OTA routes into a separate `OtaRoutes.{h,cpp}` (deferred decision, only if needed). + +**Skip**: file-upload route (`POST /api/firmware`) — picker drives URL-only. v1's file affordance is not part of this plan. + +**Step 1.6 — UI wiring in app.js (0.5 h).** In [src/ui/index.html](src/ui/index.html), add `type="module"` to the existing `<script src="/app.js">` tag — single-attribute change. In [src/ui/app.js](src/ui/app.js): + +- Top of file: `import { releasePicker } from "/release-picker.js";` +- In `createCard()` (around line 461-467), when `mod.type === "FirmwareUpdate"`, append a mount point and call: + +```js +releasePicker.init({ + container: mount, + ownBoardKey: getSystemBoard(), // reads systemModule's `board` control + onInstall: (board, manifestUrl, binaryUrl) => + fetch("/api/firmware/url", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ url: binaryUrl }) + }) +}); +``` + +The device uses the raw `.bin` URL, not the manifest — `esp_https_ota` ingests the firmware image directly. `ownBoardKey` reads from systemModule's `board` readonly control (already serialised in `/api/state` per `SystemModule.h:85`). + +**Risk**: low. `<script>` → `<script type="module">` is a five-character change; modules' implicit strict mode shouldn't trip `app.js`. Modules `defer` by default; entry-point is async WS init at the bottom of app.js — no ordering surprises. Worth one deliberate page-load smoke test after the switch. + +**Step 1.7 — Track 1 hardware verification (0.5 h, gating).** Flash the current build (`v1.0.0-rc1` or newer) to one esp32 and one esp32s3 board. On each: open the UI → FirmwareUpdate card. Confirm: + +- Picker populates from api.github.com (within ~1 s, sessionStorage cache hit on reload). +- Releases dropdown shows only compatible builds: esp32 device doesn't see `esp32s3-n16r8` in the list; vice versa. +- Stable selected by default if any stable; else first prerelease (the current case — only `v1.0.0-rc1` exists). +- Install button advances `update_status` `idle` → `downloading` → `flashing` → `rebooting`; `update_pct` advances 0 → 100. +- Post-reboot, `board` control still reads the right value and the new firmware's version shows in `version`. + +**Product-owner-driven; not agent-verifiable per CLAUDE.md.** Track 2 work does not begin until this is green. + +### Track 2 — Web installer (3.0 h, ships after Track 1 hardware-verified) + +**Step 2.1 — Adapt `docs/install/index.html` to use the shared module (0.5 h).** Edit [docs/install/index.html](docs/install/index.html): + +1. Add `<script type="module" src="release-picker.js"></script>`. The file will be staged into Pages alongside `index.html` (step 2.3), so this is a same-origin import. +2. Replace the hardcoded `<select id="board">` and `renderButton()` JS (lines 103-185 minus the browser-warning check) with: + +```js +import { releasePicker } from "./release-picker.js"; +releasePicker.init({ + container: document.getElementById("picker-mount"), + ownBoardKey: null, // web installer flashes any board + onInstall: (board, manifestUrl) => { + const host = document.getElementById("button-host"); + host.innerHTML = ""; + const btn = document.createElement("esp-web-install-button"); + btn.setAttribute("manifest", manifestUrl); + host.appendChild(btn); + btn.click(); + } +}); +``` + +Keep the SRI-pinned ESP Web Tools `<script>` tag and the browser-warning card. + +**Step 2.2 — Per-release manifest URLs become relative (0.5 h).** [scripts/build/generate_manifest.py](scripts/build/generate_manifest.py) already accepts `--release-url`; passing `--release-url .` produces relative paths like `./firmware-esp32-v…bin`. This is the form needed for Pages-hosted manifests (same-origin). The current release-asset manifest stays absolute (used by the OTA picker which still passes binary URLs to the device). + +Add a second invocation of the manifest generator in the workflow's "Generate ESP Web Tools manifests" step, writing a second set into `pages-manifests/`: + +```bash +for B in esp32 esp32-eth esp32-eth-wifi esp32s3-n16r8; do + python scripts/build/generate_manifest.py \ + --board "$B" --version "$V" \ + --release-url . \ + --flasher-args "dist/flasher-$B.json" \ + --out "pages-manifests/manifest-$B.json" +done +``` + +**Step 2.3 — Cumulative release content staging (1.5 h, the genuinely risky bit).** Insert a new step in [.github/workflows/release.yml](.github/workflows/release.yml) before the existing "Stage GitHub Pages site" step: + +```bash +# Last 5 stable + last 5 prerelease. +STABLE=$(gh release list --limit 50 --exclude-drafts \ + --json tagName,isPrerelease \ + | jq -r '.[] | select(.isPrerelease|not) | .tagName' | head -5) +PRE=$(gh release list --limit 50 --exclude-drafts \ + --json tagName,isPrerelease \ + | jq -r '.[] | select(.isPrerelease) | .tagName' | head -5) +KEEP="$STABLE $PRE" + +for TAG in $KEEP; do + mkdir -p "pages/install/releases/$TAG" + gh release download "$TAG" --dir "pages/install/releases/$TAG" \ + --pattern 'firmware-*.bin' \ + --pattern 'manifest-*.json' \ + --pattern 'projectMM-*.tar.gz' \ + || true # tolerate the current tag (assets not yet uploaded) +done + +# The current release's binaries aren't a downloadable asset yet (the +# `softprops/action-gh-release` step runs *after* Pages staging). Stage +# them from the local dist/ + pages-manifests/ instead. +mkdir -p "pages/install/releases/${GITHUB_REF_NAME}" +cp dist/firmware-*.bin "pages/install/releases/${GITHUB_REF_NAME}/" +cp pages-manifests/manifest-*.json "pages/install/releases/${GITHUB_REF_NAME}/" +cp dist/projectMM-*.tar.gz "pages/install/releases/${GITHUB_REF_NAME}/" || true +``` + +In the existing "Stage GitHub Pages site" step, add `cp src/ui/release-picker.js pages/install/` so the JS is alongside `index.html`. + +**Remove the `if: !contains(github.ref, '-rc')` gates** on the three Pages-related steps (currently lines 197, 206, 229 of release.yml). Cumulative content needs every tag to refresh Pages or releases-vs-installer-view drift. + +**Risk callout — release-process safety**: this step changes what lands on the live install URL on every tag. Mitigations: + +- Test once via `workflow_dispatch` against an existing tag before merging. +- `--pattern` lists are explicit so a future release with surprise asset names doesn't pull in junk. +- After merge, confirm `releases/v1.0.0-rc1/manifest-esp32.json` is reachable at the live URL before considering Track 2 verified. + +**Step 2.4 — Drop the separate `pages.yml` idea (0 h).** Plan-18 v1 proposed a separate `pages.yml` for docs-only changes. With cumulative content, a docs-trigger workflow buys nothing — it would have to re-run the same `gh release download` dance. Decision: don't create `pages.yml`; record in `docs/history/decisions.md` during PR-merge reconciliation. + +**Step 2.5 — Track 2 verification (0.5 h, gating).** After the release workflow runs on a tag with the new shape: + +- Visit `https://ewowi.github.io/projectMM/install/`. Picker populates from api.github.com. +- Pick a release + esp32 board → Install. DevTools Network: `manifest-esp32.json` fetched from `ewowi.github.io/projectMM/install/releases/<tag>/...` (same-origin). Three `.bin` files fetched from the same origin. +- No CORS errors in console. Flash succeeds against an unflashed ESP32. +- Switch channel to Stable / Pre-release; releases dropdown refreshes per filter. + +**Product-owner-driven; not agent-verifiable.** + +### Track 3 — Improv WiFi over USB-serial (3.0 h, ships after Tracks 1+2) + +Browser-driven WiFi provisioning during/right after a first flash. Same browser session as the install, no SoftAP detour, no manual IP-hunting. ESP Web Tools speaks Improv natively, so Track 2's install page automatically offers the WiFi dialog once a firmware boots with Improv listening. + +A small Python CLI mirrors the same code path for headless/CI/rack use over USB. Replaces v1's `deploy/wifi.py` + `deploy/flashfs.py --wifi` partition-baking flow (which required halting the device and re-flashing the LittleFS partition just to inject credentials). + +**Step 3.1 — Add `improv/improv` as managed component (0.25 h).** Edit [esp32/main/idf_component.yml](../../esp32/main/idf_component.yml). Append: + +```yaml + improv/improv: + version: "^1.2.5" +``` + +(The GitHub source is at `improv-wifi/sdk-cpp` but the ESP Component Registry coordinate is `improv/improv` — the latter is what `idf.py` resolves.) + +(The two existing deps — `espressif/mdns` and `joltwallet/littlefs` — keep their format. Verify the exact registry name during implementation; if it's published as a different coordinate or only as a GitHub git source, switch to that form.) Run `idf.py reconfigure` locally before committing to confirm resolution. + +**Risk callout — library API shape unverified at planning time.** The rest of Track 3 assumes a callback-driven library (parser eats bytes, emits callbacks for scan/info/provision). If the library is poll-driven, step 3.2 grows by ~30 min. The cache key at [release.yml:87](../../.github/workflows/release.yml#L87) is the IDF *toolchain* cache, not managed-components — no bump needed; the new component is fetched on first build. + +**Step 3.2 — Platform-layer Improv listener (1.0 h).** Mirrors the OTA task pattern at [platform_esp32.cpp:870-891](../../src/platform/esp32/platform_esp32.cpp#L870-L891). + +In [src/platform/platform.h](../../src/platform/platform.h), add to the network section after `wifiStaStop()`: + +```cpp +// Improv WiFi provisioning over UART0. ESP32 only; desktop stub returns false. +// Always-on listener; the task installs a UART driver on UART0 and parses +// inbound Improv frames. Provision requests are rejected (with Improv's +// wrong-state error frame) while wifiStaConnected() is true; scan + info +// stay available. The callback is invoked from the Improv task with the +// new credentials; the module copies them and triggers wifiStaInit on the +// scheduler thread (avoids cross-task races). +using ImprovCredentialCallback = void(*)(const char* ssid, const char* password); +struct ImprovDeviceInfo { + const char* name; // borrowed; lifetime >= init call (statics are fine) + const char* chipFamily; // "ESP32" / "ESP32-S3" / ... + const char* firmwareVersion; // kVersion from build_info.h +}; +bool improvProvisioningInit(const ImprovDeviceInfo& info, + ImprovCredentialCallback cb, + char* statusBuf, size_t statusBufLen); +``` + +In [platform_esp32.cpp](../../src/platform/esp32/platform_esp32.cpp): anonymous-namespace state (callback ptr, status-buffer ptr+len, device info copy), an `improvTask` function that: +- Installs the UART driver on `UART_NUM_0` at 115200 (idempotent — the bootloader's pre-init is preserved for ESP_LOGI to keep writing). +- Loops on `uart_read_bytes(UART_NUM_0, buf, sizeof(buf), pdMS_TO_TICKS(100))`. Feeds bytes into the library's parser. +- On info request: reply built from the stored `ImprovDeviceInfo`. +- On scan request: synchronous WiFi scan via `esp_wifi_scan_start`; emit results. +- On provision: if `wifiStaConnected()`, status = `"error: already connected"` + Improv wrong-state error frame; else status = `"received credentials"` + invoke callback + 30s poll on `wifiStaConnected()` / `wifiStaGetIP()` for the final success/failure reply (with `http://<ip>/` URL in the success frame). +- `xTaskCreate(&improvTask, "improv", 4096, nullptr, 4, nullptr)`. 4 KB stack (no TLS); priority 4 (below OTA's 5, above idle). + +In [platform_desktop.cpp](../../src/platform/desktop/platform_desktop.cpp): stub `improvProvisioningInit(...) { return false; }`. + +**Risk callout — UART0 driver coexistence with ESP_LOGI.** In IDF v6 the log subsystem uses `esp_rom_printf` direct-to-register, and `uart_driver_install` claims only the interrupt — they coexist. Empirically confirm in step 3.7 by watching the serial monitor: ESP_LOGI lines should keep appearing after `improvProvisioningInit`. If they vanish, the fix is `esp_log_set_vprintf` to route logs through `vprintf`, ~30 min addition. + +**Step 3.3 — Add `hasImprov` platform-config flag (0.1 h).** Mirror `hasOta` at [src/platform/esp32/platform_config.h:38](../../src/platform/esp32/platform_config.h#L38) (true on ESP32) and [src/platform/desktop/platform_config.h](../../src/platform/desktop/platform_config.h) (false). Call sites use `if constexpr (platform::hasImprov)` to compile out the listener-install path on desktop. + +**Step 3.4 — `ImprovProvisioningModule.h` (0.5 h).** Header-only module at [src/core/ImprovProvisioningModule.h](../../src/core/ImprovProvisioningModule.h), mirrors [FirmwareUpdateModule.h](../../src/core/FirmwareUpdateModule.h)'s shape. + +- One read-only Control: `provision_status` (char[64], default `"listening"`). +- `setSystemModule(SystemModule*)` + `setNetworkModule(NetworkModule*)` setters. +- `setup()`: gated `if constexpr (platform::hasImprov)`; builds `ImprovDeviceInfo` from `systemModule_->deviceName()` + `platform::chipModel()` + `mm::kVersion`; calls `platform::improvProvisioningInit(info, &onCredentialsThunk, statusStr_, sizeof(statusStr_))`. Updates `statusStr_` to `"listening"`. +- `onBuildControls()`: `controls_.addReadOnly("provision_status", statusStr_, sizeof(statusStr_))`. +- `loop1s()`: if `pendingCredentials_` flag set by the callback, copy `pendingSsid_` / `pendingPassword_` into the network module via a new `NetworkModule::setWifiCredentials(const char* ssid, const char* password)` public method, clear the flag. +- Static thunk + static `instance_` singleton bridge: the platform layer's C-style callback can't take a member pointer, so the module installs a free-function thunk that dispatches to `instance_->onCredentials(...)`. One-line comment at the introduction site documents the bespoke shape ("plain function pointer demanded by the C-style platform API; module is unique by construction"). + +Add `setWifiCredentials()` to [NetworkModule.h](../../src/core/NetworkModule.h): + +```cpp +void setWifiCredentials(const char* ssid, const char* password) { + if (!ssid) return; + std::strncpy(ssid_, ssid, sizeof(ssid_) - 1); + std::strncpy(password_, password ? password : "", sizeof(password_) - 1); + markDirty(); // FilesystemModule notices and persists + platform::wifiStaInit(ssid_, password_); + // Existing state machine in loop1s() handles the 10s timeout + AP fallback. +} +``` + +**Step 3.5 — Wire into `src/main.cpp` (0.25 h).** Three edits matching the FirmwareUpdateModule registration pattern from [main.cpp:60](../../src/main.cpp#L60), [113-117](../../src/main.cpp#L113-L117), [174](../../src/main.cpp#L174): + +1. `#include "core/ImprovProvisioningModule.h"` at the top. +2. `mm::ModuleFactory::registerType<mm::ImprovProvisioningModule>("ImprovProvisioningModule", "core/ImprovProvisioningModule.md");` in `registerModuleTypes()`. +3. Create + setters + `scheduler.addModule(...)` **after** `networkModule` (Improv depends on NetworkModule existing so its `setNetworkModule` setter has a valid pointer; the cold-boot order doesn't matter — both modules' `setup()` runs in the same scheduler phase). + +**Step 3.6 — Python rack CLI: `scripts/build/improv_provision.py` (0.5 h).** ~150-line pyserial script. Argparse: + +```bash +improv_provision.py --port /dev/tty.usbserial-X --ssid <SSID> --password <PW> [--timeout 30] +``` + +- Open serial at 115200, send the Improv "send Wi-Fi settings" frame (RPC command 0x02, payload = `[ssid_len, ssid_bytes, pw_len, pw_bytes]`, then checksum). +- Loop reading frames until "provisioning success" (with URL) or "provisioning fail" (with error code) or timeout. +- Print: `provisioned <device> on <SSID>; UI at <URL>`. Exit 0 on success, non-zero on any error. + +A future `--from-list scripts/devicelist.json` mode for true rack provisioning is **out of scope for plan-18** — v3 doesn't have a devicelist schema yet; single-port mode covers single-device and "shell-loop a hub" today. + +Add to [scripts/MoonDeck.md](../../scripts/MoonDeck.md) under a new "Provisioning" section. No MoonDeck button — the script is CLI-only by design. + +**Step 3.7 — `docs/moonmodules/core/ImprovProvisioningModule.md` + hardware verification (0.4 h).** + +Spec page sections (mirror [FirmwareUpdateModule.md](../moonmodules/core/FirmwareUpdateModule.md)'s shape, kept brief): +- One-paragraph "what Improv is" + link to <https://www.improv-wifi.com/>. +- Controls table (the one `provision_status` line — satisfies `check_specs.py`). +- **ESP32-S3 USB-port footnote**: explicit "connect to the silkscreen-labelled USB (UART) port, not USB (CDC/JTAG)" — the locked decision documented at the user-visible surface. +- "How to test": two paths — browser via <https://www.improv-wifi.com/> or ESP Web Tools' built-in Improv flow; CLI via `improv_provision.py`. + +Hardware verification (product-owner gate): + +1. Flash to an ESP32 and an ESP32-S3 DevKitC-1 (via the silkscreen UART USB port on the S3). +2. From Chrome desktop, open <https://www.improv-wifi.com/>, click Connect, pick the device's serial port. Expect device name + chip + version to appear in the browser; SSID scan returns nearby networks; enter creds → device shows `received credentials` → `connecting` → `connected: <ssid>`; URL `http://<ip>/` clickable; opens the device UI. +3. Same flow via the Python CLI: `python3 scripts/build/improv_provision.py --port <port> --ssid <ssid> --password <pw>`. Expect exit 0 + the IP printed. +4. Confirm ESP_LOGI output still appears on the serial monitor throughout (the step 3.2 risk). +5. Wipe credentials (`POST /api/control` on `ssid` = empty + reboot) → confirm device boots AP-fallback as before. Re-run Improv via browser → confirm it provisions cleanly. The AP-fallback path stays intact. + +Track 3 total: **3.0 h**. Sequential within the track. Hardware verification (step 3.7) is the gate; plan-18 doesn't ship without it green. + +### Final housekeeping (0.5 h) + +**Step 3.1 — `docs/install/README.md` recipes (0.25 h).** Simplify Recipe A to `cp docs/install/index.html /tmp/preview && cd /tmp/preview && python -m http.server 8000`. Mark Recipe B as rarely-needed (stub `loadReleases()` from DevTools for un-tagged-release testing). Update Recipe C with the new "Pages publishes on every tag (RC included)" mental model. + +**Step 3.2 — `docs/plan.md` housekeeping (0.25 h).** Remove the "Installer with release-channel picker" stub (this plan supersedes it). Mention Phase 2 (nightly channel) + Phase 3 (UX polish) in plan-18.md's "Future phases" section so they're not lost. + +## Critical files + +**New:** + +- [src/ui/release-picker.js](src/ui/release-picker.js) — shared release-picker module. +- [src/core/FirmwareUpdateModule.h](src/core/FirmwareUpdateModule.h) — on-device OTA MoonModule (header-only). +- [src/core/ImprovProvisioningModule.h](src/core/ImprovProvisioningModule.h) — Improv listener MoonModule (header-only). **Track 3.** +- [scripts/build/improv_provision.py](scripts/build/improv_provision.py) — pyserial CLI for headless / rack provisioning. **Track 3.** +- [docs/moonmodules/core/ImprovProvisioningModule.md](docs/moonmodules/core/ImprovProvisioningModule.md) — spec page. **Track 3.** +- [docs/history/plan-18.md](docs/history/plan-18.md) — this plan's archive. + +**Edited:** + +- [src/platform/platform.h](src/platform/platform.h) — OTA primitives **+ `improvProvisioningInit` (Track 3)**. +- [src/platform/esp32/platform_config.h](src/platform/esp32/platform_config.h) + [src/platform/desktop/platform_config.h](src/platform/desktop/platform_config.h) — `hasOta` flag **+ `hasImprov` (Track 3)**. +- [src/platform/esp32/platform_esp32.cpp](src/platform/esp32/platform_esp32.cpp) — OTA implementation **+ Improv listener task (Track 3)**. +- [src/platform/desktop/platform_desktop.cpp](src/platform/desktop/platform_desktop.cpp) — OTA stubs **+ Improv stub (Track 3)**. +- [src/core/HttpServerModule.cpp](src/core/HttpServerModule.cpp) — `/api/firmware/url` route + `/release-picker.js` GET. +- [src/core/NetworkModule.h](src/core/NetworkModule.h) — new public `setWifiCredentials(ssid, password)` method (Track 3 — Improv writes through this). +- [src/ui/embed_ui.cmake](src/ui/embed_ui.cmake) — embed `release-picker.js`. +- [src/ui/index.html](src/ui/index.html) — `<script type="module">` for `/app.js`. +- [src/ui/app.js](src/ui/app.js) — import release-picker, wire to FirmwareUpdate card. +- [CMakeLists.txt](CMakeLists.txt) + [esp32/main/CMakeLists.txt](esp32/main/CMakeLists.txt) — embed deps. +- [esp32/main/idf_component.yml](esp32/main/idf_component.yml) — add `improv/improv` (Track 3). +- `main.cpp` — register FirmwareUpdateModule **+ ImprovProvisioningModule (Track 3)**. +- [docs/install/index.html](docs/install/index.html) — Track 2: use shared module. +- [.github/workflows/release.yml](.github/workflows/release.yml) — Track 2: cumulative content staging, remove RC-Pages gate. +- [scripts/build/generate_manifest.py](scripts/build/generate_manifest.py) — comment about the `--release-url .` use case. +- [docs/install/README.md](docs/install/README.md) — Track 2: simplify recipes. +- [scripts/MoonDeck.md](scripts/MoonDeck.md) — document `improv_provision.py` (Track 3). +- [docs/plan.md](docs/plan.md) — remove installer stub. + +## Reuse map + +| Source | Pattern to reuse | Why | +|---|---|---| +| [projectMM-v1 OTA module](../../projectMM-v1/src/modules/system/FirmwareUpdateModule.h) | MoonModule with two display controls + file-scope statics polled by `loop1s()` | Working pattern from v1. v3 ports the architecture using v3 idioms (`controls_.addReadOnly`, anon namespace statics). | +| [projectMM-v1 `populateGhList()`](../../projectMM-v1/src/frontend/app.js#L1362-L1410) | `fetch(api.github.com/.../releases) + sessionStorage cache + prerelease filter + per-asset install button` | Identical data shape; the picker UX is recognisable from v1. | +| [projectMM-v1 `pal::http_fetch_to_ota`](../../projectMM-v1/src/platform/esp32) | `esp_https_ota_config_t` + `esp_crt_bundle_attach` + perform loop | Working ESP-IDF idiom. Cherry-pick the working numbers (stack size, core pinning) into v3. | +| [src/ui/embed_ui.cmake](src/ui/embed_ui.cmake) | The whole UI-embedding pipeline | Extend to one more file. No new architecture. | +| [src/core/SystemModule.h:85](src/core/SystemModule.h#L85) | `controls_.addReadOnly("name", buf, sizeof(buf))` for live-updating diagnostic strings | Same shape for `update_status` and `update_pct`. | +| [src/core/HttpServerModule.cpp:428+](src/core/HttpServerModule.cpp#L428) | `mm::json::parseString(body, "key", buf, sizeof(buf))` pattern | Reuse for parsing the `{"url":"..."}` body. | +| [scripts/build/generate_manifest.py](scripts/build/generate_manifest.py) | The whole script | Stays unchanged. Track 2 just calls it twice — once with absolute URLs (release assets) and once with `--release-url .` (Pages copy). | +| [projectMM-v1 `deploy/wifi.py` + `flashfs.py --wifi`](../../projectMM-v1/deploy/wifi.py) | The "one set of credentials, applied to a rack of devices" use case | The use case is preserved. The mechanism changes: v1 baked credentials into a LittleFS partition image and re-flashed it over USB (device halted). Track 3 talks Improv to running devices via UART — same end state, no flash required, generalises to any-firmware-with-Improv. | +| `improv/improv` (ESP Component Registry; source: `improv-wifi/sdk-cpp` on GitHub) | The Improv protocol parser + callbacks | Standard upstream library. We don't reimplement the protocol; we install the listener task that feeds it bytes. | +| [src/platform/esp32/platform_esp32.cpp:870-891](src/platform/esp32/platform_esp32.cpp#L870-L891) (`http_fetch_to_ota` task) | The xTaskCreate + status-buffer pattern | Improv's listener task is identical shape: heap struct + `xTaskCreate` + status-buffer ownership. Reuses the pattern, not the code. | + +## Verification + +Plan-18 passes Event-1 commit gates: + +- Desktop build clean (`hasOta = false` + desktop OTA stubs return false; new C++ compiles). +- ctest + scenarios pass. +- Platform boundary passes (OTA primitives live in `src/platform/`, declared in `platform.h`). +- Spec check requires a new `docs/moonmodules/core/FirmwareUpdateModule.md` describing the two controls + the URL-fetch route — per CLAUDE.md "Module specs are end-user / API-integrator documentation." +- ESP32 build all 4 boards (the C++ changes affect ESP32 builds). +- KPI re-captured (new C++ in `src/`). + +### Test coverage + +Added under plan-18 (Option A — "cheap + extract parser"): + +- **`test/test_improv_frame.cpp`** — 13 cases / 216 assertions over the Improv framing layer. Parser feed-byte-at-a-time, bad-checksum detection, oversize-length rejection, resync on garbage, the "stray 'I' restarts the magic search" edge case, builder/parser round-trip across all four frame types, back-to-back frames. The parser was extracted into [src/core/ImprovFrame.h](../../src/core/ImprovFrame.h) precisely to make this test cheap (no MCU, no `improv/improv` host port). The ESP32 task at [platform_esp32.cpp::improvTask](../../src/platform/esp32/platform_esp32.cpp) now consumes the same parser, so the unit test covers the bytes-in path that runs on-device. +- **`test/test_network_module.cpp`** — 4 cases on `NetworkModule::setWifiCredentials`: SSID + password copy, dirty-flag toggling, null-SSID no-op, null-password tolerance, oversize-SSID truncation. The desktop `wifiStaInit` stub returns false safely so the test runs without bringing up a radio. This is the bridge Improv uses to hand credentials to the network state machine. +- **`scripts/build/improv_provision.py --self-test`** — host-side framing/payload round-trip. No serial port needed; re-runnable in CI. Catches a regression in the Python frame builder before any device is involved. + +Documented gaps (not covered by automated tests, called out so reviewers know what hardware verification is buying): + +- **Improv RPC dispatch on-device** — the bridge from a parsed frame to `improvHandleProvision` / `improvSendDeviceInfo` etc. lives in `improvDispatchFrame()` ([platform_esp32.cpp:1037](../../src/platform/esp32/platform_esp32.cpp#L1037)) and depends on the upstream `improv/improv` library (source: `improv-wifi/sdk-cpp`) plus real WiFi state. Covered only by Track 3.7 hardware verification. +- **`esp_https_ota` fetch + OTA flash + reboot** — the `urlOta` task in `http_fetch_to_ota`. Depends on the ESP-IDF TLS stack, OTA partition layout, and network. Covered only by Track 1.7 hardware verification. +- **`release-picker.js`** — the pure helpers (`isCompatible`, `parseBoardsFromAssets`, `relativeTime`) have been exercised ad-hoc from a DevTools console; there is no JS unit-test harness in v3 today (no jsdom, no node runner). Adding one is on the 2.0 roadmap (`docs/plan.md`). +- **Web installer CORS path + cumulative Pages content** — release-workflow change. Verified only by Track 2.5 hardware-flash test against the live `https://ewowi.github.io/projectMM/install/`. + +Event-2 PR-merge gates apply normally. Reviewer agent's main checks: did the OTA route follow the existing HttpServerModule route patterns? Is the JS module's `isCompatible()` bespoke rule clearly documented at the introduction site? + +Track 1, Track 2, and Track 3 hardware verification (steps 1.7, 2.5, 3.7) gate the plan; product owner runs all three. + +## Risks and unknowns + +1. **`esp_https_ota` TLS bundle**: api.github.com + release-assets.githubusercontent.com need `esp_crt_bundle_attach` (the standard ESP-IDF mechanism). Confirm during step 1.4 that the v6.1-dev IDF has it linked by default (it should; it's a baseline component). +2. **`xTaskCreate` stack size for the OTA task**: too small and the TLS handshake stack-overflows; too big and we waste RAM. v1's working number is the reference — cherry-pick. +3. **HttpServerModule.cpp lizard complexity**: plan-17 left it at 24+ warnings. The new route is one more entry in the if-ladder; should not push the file's worst-offender functions higher. If lizard flags a regression, the OTA routes split into `OtaRoutes.{h,cpp}` (deferred decision). +4. **JS module + page-load timing**: converting `app.js` to a module changes load timing (deferred by default). The WS init at the bottom of app.js is async anyway, but a smoke-test reload is mandatory after step 1.6. +5. **Cumulative content workflow drift**: the `gh release download` step in step 2.3 changes what lands on Pages on every tag. A bug here breaks the install URL for everyone. Mitigation: `workflow_dispatch` dry-run against an existing tag before tagging the next real release. +6. **`isCompatible()` correctness**: the bespoke rule (strip `-eth*` suffix, compare) is intentionally narrow. If we add a future board that doesn't fit this scheme (e.g. an ESP32-P4 variant), the rule needs updating. Documented inline so the next person sees it; tested implicitly by Track 1 verification. +7. **Improv library API shape unverified** (Track 3.1): callback-driven vs poll-driven changes step 3.2's task structure by ~30 min. The exact ESP Component Registry coordinate is also unverified at planning time. +8. **UART0 + ESP_LOGI coexistence** (Track 3.2): empirically OK in IDF v6, but verified only at the step 3.7 hardware test. Fallback (route logs through `vprintf`) is ~30 min if needed. +9. **ESP32-S3 USB-port confusion**: the DevKitC-1 has two USB ports; only the silkscreen-labelled UART one works with Improv. Documented in the spec page; user education, not a code fix. + +## Notes + +- This plan is saved as `docs/history/plan-18.md` once Track 1 implementation begins, per CLAUDE.md per-feature workflow. +- Track 1 = OTA picker. Hardware-verified before Track 2 work began. **(Done at planning time of Track 3 addition.)** +- Track 2 = web installer + release workflow changes. Implemented; ships in the same PR; the release workflow change is the riskiest single edit in Tracks 1+2. +- Track 3 = Improv WiFi + Python rack CLI. Added late in plan-18 to close the install-UX loop: the picker UI provisions firmware, OTA flashes new firmware, Improv provisions WiFi. Same PR. +- The cancelled v1 of plan-18 (which assumed cross-origin release-asset fetches worked) is the source of the CORS-gate context. +- Total plan-18 budget: Tracks 1+2 = ~10 h (done); Track 3 = ~3 h (new); total ~13 h. Sequential within tracks; product owner gates each track on hardware before moving to the next. diff --git a/docs/history/plans/Plan-20260620 - Improv-as-REST: push device-model config over serial.md b/docs/history/plans/Plan-20260620 - Improv-as-REST: push device-model config over serial.md new file mode 100644 index 0000000..4ad1ec2 --- /dev/null +++ b/docs/history/plans/Plan-20260620 - Improv-as-REST: push device-model config over serial.md @@ -0,0 +1,64 @@ +# Plan — Improv-as-REST: push device-model config over serial + +*Approved 2026-06-20. Saved per the CLAUDE.md "Plan before implementing" rule.* + +## Context + +**The problem.** When you flash a device from the deployed web installer (`https://moonmodules.org/projectMM/install/`) and pick a device model, the model's defaults (Grid 8×8, AudioSpectrum + RandomMap, the LED driver, brightness, …) are supposed to be applied to the device. Today they often aren't. + +**Why.** The installer page is served over **HTTPS**; the ESP32 only serves **HTTP**. Browsers block an HTTPS page from calling `http://<device>/api/...` (mixed-content). So the original **push** (installer POSTs directly to the device's REST API) silently fails on the deployed site. The current workaround is a **pull/handoff**: the installer hands you a `?deviceModel=` URL, and the *device's own page* fetches the catalog from Pages and applies it — but that only runs if the user opens that exact link, which is easy to miss. The `deviceModel` *name* already arrives fine because it's pushed over **Improv (serial)**, which bypasses the network entirely. + +**The fix (product-owner decision).** Generalise that: **"Improv = the REST API over serial."** During provisioning the installer holds the USB serial port, so push the *whole* configuration over serial as a sequence of REST-equivalent operations — no HTTP, no mixed-content, no pull, no user click. It works for WiFi *and* Ethernet devices (serial exists regardless of network) and removes the mixed-content special-casing. + +**What "REST over serial" means concretely.** An `APPLY_OP` Improv frame is just the **serial envelope** (the same wire framing `SET_DEVICE_MODEL` already uses: magic/type/length/checksum) carrying a **REST operation as its payload** — literally `{"op":"add","type":...,"id":...,"parent":...}` or `{"op":"set","module":...,"control":...,"value":...}`, the **same JSON an HTTP `POST /api/modules` / `/api/control` body carries**. On the device the op routes to the **exact same apply-core** the HTTP handler calls, so a REST call over the network and an `APPLY_OP` over serial **execute identical code**. The new command byte `0xFC` exists only because `0xFE` is hardcoded to "payload = a device-model name string"; `0xFC` means "payload = a REST op." Wire model (decided): **primitive ops** — one op per frame (`add` / `set` / `clear-children`). The device needs **no catalog knowledge** (the installer owns catalog semantics, as it does today in JS). + +**The handoff/pull is removed entirely** (product-owner decision). Serial push covers the install; **MoonDeck** covers configuring an already-running device (it talks plain HTTP on the LAN — no mixed-content — so it keeps the direct REST API); **re-flash** covers the rest. So this change *deletes* the device-side catalog self-fetch and the whole `?deviceModel=` machinery — a large subtraction, and the device firmware no longer reaches out to Pages at all (a domain-neutrality + security win). + +## Approach (verified against the code, via the Explore pass) + +Three confirmed seams make this a wiring job, not new infrastructure: + +1. **Apply-core extraction (core).** `HttpServerModule::handleAddModule` (HttpServerModule.cpp:591) and `handleSetControl` (:447) are JSON-body-driven and only touch the `TcpConnection` to call `sendResponse`. Extract the apply core of each into transport-free methods that return an `ApplyResult` (the enum already exists, Control.h:377): + - `ApplyResult applyAddModule(const char* type, const char* id, const char* parentId)` — lines 619–671: id-uniqueness + single-instance-skip (idempotent), resolve parent, `ModuleFactory::create`, `setName`, `addChild`, `ensureUniqueName`, `onBuildControls`/`setup`/`onBuildState`, `buildState`, `noteDirty`. + - `ApplyResult applySetControl(const char* module, const char* control, const char* valueJson)` — lines 455–511: find module, the `enabled` fast-path, `applyControlValue`, `rebuildControls`, `onUpdate`, `noteDirty`, conditional `buildState`. (`applyControlValue` reads the value out of a JSON body by key, Control.cpp:203 — the serial path hands it a tiny `{"value":...}` string, same as HTTP.) + - `bool applyClearChildren(const char* parentName)` — the enumerate-then-delete the handoff's `clearModuleChildren` does, for `replaceChildren`. + The HTTP handlers become thin wrappers: parse body → call the apply-core → map `ApplyResult` to `sendResponse`. Net: no behaviour change for HTTP; the logic now has one home both transports share (the duplication win, per CLAUDE.md minimalism — not a line saving). + +2. **Serial transport = a new Improv vendor RPC carrying one op (platform + a tiny core seam).** Mirror the existing `SET_DEVICE_MODEL` (0xFE) / `SET_TX_POWER` (0xFD) vendor RPCs: add **`APPLY_OP` (0xFC)**. Payload = a compact op the device parses and routes: + - Frame fits the `kImprovMaxPayload = 128` budget (ImprovFrame.h:31). Most ops fit one frame. A rare long value (a big `pins` list) chunks across frames into a small reassembly buffer (the SET_DEVICE_MODEL fixed-buffer + atomic-ready pattern, generalised). + - **Producer/consumer across the task boundary** (the established pattern): the Improv task (platform_esp32_improv.cpp) writes the received op into a module-owned buffer + sets an atomic `opReady`; `ImprovProvisioningModule::loop` (ImprovProvisioningModule.h:96) polls it (exactly like `pendingDeviceModelReady_`) and calls the apply-core on the **main loop** (so the factory/tree mutation isn't on the Improv task). The device acks each op with an empty `RpcResponse` (like SET_DEVICE_MODEL), so the installer can pace/await. + - Op encoding: a tiny JSON object (`{"op":"add","type":"...","id":"...","parent":"..."}` etc.), parsed with the flat `JsonUtil` helpers (`parseString`/`parseInt`) the rest of core already uses. (JSON, not a bespoke binary TLV: same shape as the REST body, host-testable, recognisable.) + +3. **Installer: push ops over serial, delete the HTTP/handoff paths (JS).** `tryHttpInjectBoard` (install-orchestrator.js:384) already walks a catalog entry (replaceChildren pre-pass, then per-module add + per-control set). **Repurpose that walk** to emit ops instead of HTTP: `sendConfigOverSerial(port, board)` walks the same entry and, per unit, sends an `APPLY_OP` frame via the existing `buildImprovFrame` + `port.writable.getWriter()` send (mirror `sendSetBoardFrame`, :176), awaiting each ack so order is preserved. The provision flow (`start()`) calls it **right after `SET_DEVICE_MODEL`, while it still owns the port**. Then **delete** the HTTP fan-out (`tryHttpInjectBoard`'s HTTP version, `canFetchHttp`) and the whole handoff (`pendingBoardPush`, the `?deviceModel=` link decoration, the auto-open + "Open device & apply defaults" button). + +## Files + +**Core (apply-core extraction + the op seam):** +1. **Edit** `src/core/HttpServerModule.h` + `.cpp` — add `applyAddModule` / `applySetControl` / `applyClearChildren` (transport-free, return `ApplyResult`/bool); refactor `handleAddModule` / `handleSetControl` / the delete-children path into thin wrappers calling them. Add `applyOp(const char* opJson)` that parses `op` and dispatches to the three. +2. **Edit** `src/core/ImprovProvisioningModule.h` — add the `pendingOp_` buffer + `pendingOpReady_` atomic (mirror `pendingDeviceModel_`), wire it to platform init, and in `loop()` poll-and-apply by calling the HttpServerModule apply-core (add an applier handle the same way it holds `scheduler_`/`systemModule_`). +3. **Edit** `src/platform/platform.h` + `platform_esp32_improv.cpp` — `improvProvisioningInit` gains `opOut`/`opOutLen`/`opReady` (+ reassembly state); add `IMPROV_CMD_APPLY_OP = 0xFC`, `improvHandleApplyOp` (validate, reassemble if chunked, copy to `opOut`, set `opReady`, ack), and a dispatch branch in `improvDispatchFrame`. Desktop stub (`platform_desktop.cpp`) gains the extra params (no-op). + +**Installer (push over serial; delete HTTP-push + handoff):** +4. **Edit** `docs/install/install-orchestrator.js` — add `IMPROV_CMD_APPLY_OP = 0xFC`, `encodeApplyOp(op)` + chunker, `sendApplyOpFrame(port, op)` (mirror `sendSetBoardFrame`), `sendConfigOverSerial(port, board)` (walk the entry → ops), call it in `start()` after `SET_DEVICE_MODEL`. **Delete** `tryHttpInjectBoard`'s HTTP version, `clearDeviceChildren`, `deviceFetch`, `canFetchHttp`, the `httpBoardOk`/`pendingBoardPush` plumbing through `onSuccess`. +5. **Edit** `docs/install/index.html` — success screen becomes simple: "Applied {board} defaults." (or "Kept existing config" when unticked). **Delete** the auto-open, the `done-apply` button + its CSS, the `?deviceModel=` link decoration / `withParam` / `pendingBoardPush` logic in `handleSuccess`. +6. **Edit** `src/ui/app.js` — **delete** `consumePendingDeviceModelParam`, `clearModuleChildren`, `DEVICE_MODELS_JSON_URL`, and the call site (~:158). The device no longer fetches the catalog or interprets `?deviceModel=`. The big firmware subtraction. + +**Tests + docs:** +7. **New** `test/unit/core/unit_HttpServerModule_apply.cpp` + register in `test/CMakeLists.txt` — host-test the extracted apply-core directly (no HTTP): `applyAddModule` adds + dedups + single-instance-skips; `applySetControl` writes + range-rejects; `applyClearChildren` empties a container; `applyOp` routes each op type; malformed op → graceful error. +8. **Edit** `docs/moonmodules/core/ImprovProvisioningModule.md` + `SystemModule.md` + `docs/install/README.md` — document the `APPLY_OP` (0xFC) wire op as "a REST operation in an Improv frame, applied by the same core as `/api/modules` + `/api/control`", and the "config pushed over serial during provisioning" flow. **Remove** the handoff / `?deviceModel=` docs. Update `docs/architecture.md` installer + live-reconfig sections with the "Improv = REST over serial" framing and the deletion of the device-side catalog-fetch. + +## Verification + +- **Host:** `cmake --build build` (0 warnings), `ctest` (incl. the new apply-core test), `uv run scripts/scenario/run_scenario.py`, `check_specs.py`, `check_platform_boundary.py`, `check_devices.py`. +- **ESP32 build** (`build_esp32.py --firmware esp32s3-n16r8` + `esp32` classic) — compiles the new vendor RPC under `-Werror`. +- **Serial APPLY_OP probe** — send a hand-built `APPLY_OP` frame to a connected S3, confirm the op applies (e.g. set Grid width 8) + the device acks. Pins the wire contract without the browser. +- **Real install (the actual fix):** from the local preview, flash the S3 with erase + apply-defaults; confirm it comes up as 8×8 + AudioSpectrum + RandomMap with the serial monitor showing the ops applied, **no handoff link involved**. Repeat on the P4. Confirm no duplicate AudioModule. Serial push is now the *only* install-time path, so preview and deployed behave identically — the HTTPS-vs-HTTP difference that caused the original bug no longer exists. + +## Risks / notes + +- **Scope:** ~150–250 lines C++ (apply-core extraction is mostly moving existing lines) + ~40–60 lines JS + a no-op desktop-stub param bump. Bounded — every primitive already exists. +- **Net likely a line reduction, definitely a duplication reduction.** The handoff deletion removes a substantial chunk; the new serial op path is small (reuses the apply-core + the entry-walk). +- **Reassembly only for rare long ops.** Common path single-frame; cap the reassembly buffer and fail-safe (drop + error ack) on overflow. +- **Apply on the main loop, not the Improv task** (producer/consumer atomic) — same discipline `pendingDeviceModelReady_` follows. +- **Eth-only / no-Improv-at-boot:** out of scope here. With the handoff removed, an eth-only device's catalog defaults are applied via MoonDeck (direct REST on LAN) until the eth-Improv-listener lands. (Backlog: "Improv listener on eth-only boot" → then serial push is universal.) +- **Ordering:** ops apply in entry order (clear-children before adds; add before its controls); preserved by sequential send + per-op ack. diff --git a/docs/history/plans/Plan-20260621 - Improv frame-contract unit tests (pytest + node-test).md b/docs/history/plans/Plan-20260621 - Improv frame-contract unit tests (pytest + node-test).md new file mode 100644 index 0000000..338f47c --- /dev/null +++ b/docs/history/plans/Plan-20260621 - Improv frame-contract unit tests (pytest + node-test).md @@ -0,0 +1,41 @@ +# Plan — Improv frame-contract unit tests (pytest + node:test, in CI) + +## Context + +MoonDeck (Python) and the web installer (JS) have **no unit tests** today (no pytest dep, no package.json). The single highest-value target is the **Improv wire frame**, implemented **three times** that must agree byte-for-byte: + +- device C++ — `src/core/ImprovFrame.h` (parser) + `src/platform/esp32/platform_esp32_improv.cpp` (handlers) +- Python — `scripts/build/improv_provision.py::build_frame` / `checksum` +- installer JS — `docs/install/install-orchestrator.js::buildImprovFrame` + the APPLY_OP chunker + +All three use `IMPROV` magic + version 1 + type + length + payload + **sum-mod-256** checksum (verified: `ImprovFrame.h:115`, JS `& 0xff`, Py `& 0xFF`). Drift here silently breaks provisioning. Pin it with a **golden vector** asserted on both the Python and JS sides (and hand-checked against C++). + +## Decisions (product owner) + +- Frameworks: **pytest** (Python, add as dev dep, `uv run pytest`) + **node:test** (JS, `node --test`, zero npm deps). +- Scope: **Improv frame contract first** (not MoonDeck/installer-wide logic yet). +- CI: **add to commit gates + a new PR-triggered `.github/workflows/test.yml`** (no PR test gate exists today). +- JS testability: **extract `docs/install/improv-frame.js`** — the orchestrator's top-level `import` from `unpkg.com` makes the module non-importable in node, so move the pure byte-building into a dependency-free shared module both the orchestrator and the test import. +- Layout: **`test/python/` + `test/js/`** (mirrors the existing `test/` C++ dirs). + +## Files + +1. **New** `docs/install/improv-frame.js` — pure, dependency-free ES module: `IMPROV_MAGIC`, `IMPROV_FRAME_TYPE_RPC`, `IMPROV_CMD_*`, `APPLY_OP_CHUNK_MAX`, `buildImprovFrame(type, payload)`, `encodeApplyOpFrames(op)` (returns the array of frames for an op, incl. chunking). ~40 lines moved out of the orchestrator — not new logic. +2. **Edit** `docs/install/install-orchestrator.js` — `import` those from `./improv-frame.js`; delete the inlined copies + constants. No behavior change. `sendApplyOpFrame` becomes "encode via `encodeApplyOpFrames`, write each + pace." +3. **New** `test/js/improv-frame.test.mjs` — `node:test`: frame layout, checksum, APPLY_OP single-frame + multi-chunk (seq/last), and the golden vector. +4. **New** `test/python/test_improv_frame.py` — pytest over `improv_provision.build_frame`/`checksum` (imported via `sys.path`), same golden vector. (`import serial` is already lazy/`try`-guarded, so the import is clean without pyserial.) +5. **Edit** `pyproject.toml` — add `pytest` dev dependency. +6. **New** `.github/workflows/test.yml` — PR-triggered: `uv run pytest test/python` + `node --test test/js`. +7. **Edit** `CLAUDE.md` Event 1 (commit gates) — add the pytest + node:test step (trigger: `scripts/**`, `docs/install/**`, `test/python/**`, `test/js/**` changed). + +## Golden vector + +One fixed input → exact expected bytes, asserted identically in both test files (and documented for the C++ side). E.g. `buildImprovFrame(0x03, [0x01]) == IMPROV + [1,3,1,1] + [checksum]`, and an `APPLY_OP` of a known small op → its single frame; a >125-byte op → N frames with correct seq/last. + +## Verification + +`uv run pytest`, `node --test` both green; golden vector matches across Python ↔ JS; hand-verified against `ImprovFrame.h`. Existing 423 C++ tests + gates unaffected (no `src/` change). + +## Scope + +~40 lines moved (JS extract, net-neutral) + 2 small test files + 1 dev-dep + CI/gate glue. Mostly new test files; one clean no-behavior-change source extraction. diff --git a/docs/install/README.md b/docs/install/README.md index da2cd31..e581939 100644 --- a/docs/install/README.md +++ b/docs/install/README.md @@ -5,9 +5,13 @@ This directory holds the source for the **custom installer page** (driven by <https://moonmodules.org/projectMM/install/>. End users land here, pick a channel + device, click Install. The browser flashes -the device over USB (Web Serial → ESP32), then runs Improv-Serial provisioning, -SET_DEVICE_MODEL, and HTTP control fan-out — all from the same orchestrator, -end-to-end, no ESP Web Tools dependency on the install path. +the device over USB (Web Serial → ESP32), runs Improv-Serial provisioning, then +pushes the picked device-model's whole config over the **same serial port** as REST +operations (**"Improv = REST over serial"**: SET_DEVICE_MODEL + APPLY_OP) — all from +the same orchestrator, no ESP Web Tools dependency. Pushing over serial (not HTTP) +is what makes the deployed HTTPS installer work: a browser blocks an HTTPS page from +POSTing to a plain-`http://` device (mixed-content), so the old HTTP fan-out + the +`?deviceModel=` browser handoff are gone; serial bypasses the network entirely. ## What's in this directory @@ -16,32 +20,28 @@ end-to-end, no ESP Web Tools dependency on the install path. the picker's GitHub-release URLs to same-origin Pages URLs before handing them to the custom orchestrator (Web Serial is CORS-bound). - [`install-orchestrator.js`](install-orchestrator.js) — owns the - SerialPort across flash → reboot → Improv provision → SET_DEVICE_MODEL RPC. - Replaces the ESP Web Tools install button so the post-provision board - push works (EWT 10.x's `state-changed` event fires inside a shadow DOM - that's invisible to the host page; the orchestrator side-steps that by - owning the whole flow). Falls through to a "device IP" prompt when the - device doesn't speak Improv back — the picked deviceModel is handed off via the - `?deviceModel=` query param + HTTP `/api/control` inject (see the `deviceModel` - control on [SystemModule.md](../moonmodules/core/SystemModule.md)). An **Apply - device defaults** checkbox gates this whole inject (all three channels: - SET_DEVICE_MODEL, the HTTP `/api/control` fan-out, and the `?deviceModel=` - first-visit handoff). It auto-ticks with **Erase chip first** (a clean slate wants - defaults) and starts unticked otherwise, so re-flashing a configured device keeps - its current modules/controls instead of having the catalog's `replaceChildren` - delete the user's effects. The board's `txPower` brown-out cap still applies either - way — it's a hardware trait, not a default. + SerialPort across flash → reboot → Improv provision → config push. Replaces the + ESP Web Tools install button (EWT 10.x's `state-changed` event fires inside a + shadow DOM invisible to the host page; the orchestrator owns the whole flow so it + can write its own vendor RPC frames). After provisioning it pushes the picked + device-model's catalog over serial as APPLY_OP ops (a `clearChildren` pre-pass for + any `replaceChildren` container, then an `add` per module + a `set` per control) — + same operations the HTTP REST API does, applied by the same device-side core. An + **Apply device defaults** checkbox gates that push: it auto-ticks with **Erase chip + first** (a clean slate wants defaults) and starts unticked otherwise, so re-flashing + a configured device keeps its current modules/controls. The board's `txPower` + brown-out cap is still sent via its own `SET_TX_POWER` RPC **before** provisioning + (it must land before the first association on weak-power boards). When the device + doesn't speak Improv back (typed-IP / eth-only path), no serial push happens — apply + the defaults later from MoonDeck on the LAN (plain HTTP REST, no mixed-content). - [`devices.js`](devices.js) — the *Your devices* list. Stores devices the - user provisioned from this page so they can re-visit / erase / forget - them. Renders a dedicated *Inject* button next to Visit for every entry - with a `board` field; the button opens `<device>/?deviceModel=<name>` and the - device UI fetches the matching `deviceModels.json` entry from Pages and, for each of - its `modules`, adds the module (`/api/modules`) then sets its nested controls - (`/api/control`) — add-then-configure; see the schema below. Idempotent — safe - to re-click after a popup-blocker rejection or a follow-up catalog edit. -- [`deviceModels.json`](deviceModels.json) — the board catalog (name → firmware - variants + the modules/controls to inject) the picker fetches and the - installer / device-UI / MoonDeck injectors write from. Schema below. + user provisioned from this page so they can re-visit / erase / forget them + (Visit / Erase / Forget). The device-model defaults are applied during the install + over serial, so there is no "inject" button here — to re-apply a model to an + already-running device, use MoonDeck on the LAN. +- [`deviceModels.json`](deviceModels.json) — the device-model catalog (name → firmware + variants + the modules/controls). The installer walks it to emit the APPLY_OP ops; + MoonDeck and the picker also read it. Schema below. - [`favicon.png`](favicon.png) — moon-man, same as the device UI. - [`README.md`](README.md) — this file. @@ -67,11 +67,11 @@ labelled summary with a thumbnail: ## Catalog schema (`deviceModels.json`) A flat JSON array of catalog entries. Each entry is the single source of truth -for one piece of hardware and what to set up on it at install time. Three -clients consume it identically — the web installer (`install-orchestrator.js`), -the device UI's `?deviceModel=` inject (`src/ui/app.js`), and MoonDeck -(`scripts/moondeck.py`) — so **adding another module-with-controls unit needs no -client change**. +for one piece of hardware and what to set up on it at install time. Two clients +consume it identically — the web installer (`install-orchestrator.js`, which emits +the entry's units as APPLY_OP ops over serial) and MoonDeck (`scripts/moondeck.py`, +which POSTs them over HTTP REST on the LAN) — so **adding another module-with-controls +unit needs no client change** (both walk the same entry; only the transport differs). ```json { diff --git a/docs/install/devices.js b/docs/install/devices.js index fff2778..3531315 100644 --- a/docs/install/devices.js +++ b/docs/install/devices.js @@ -12,22 +12,14 @@ // fetch()-ing http://192.168.1.X. The device-side Diagnose button (in // app.js) does the same job from the right side of the security boundary. // -// State shape: `[{ name, url, lastSeen, board?, pendingBoard? }]` keyed under -// `projectMM.devices.v1` in localStorage. `board` is optional — entries -// from before Step 3 of the board-injection plan have no board field, and -// the render path treats it as absent. A schema bump (v2, …) is how future -// migrations land; additive fields like this one don't need one. -// -// `pendingBoard` carries a board name (the key into deviceModels.json) the -// installer couldn't push directly — Improv RPC unavailable AND the in- -// orchestrator HTTP fallback blocked by mixed-content. It only influences -// the *styling* of the **Inject** button (primary-flavoured when present); -// the button itself renders whenever the entry has a `board` field at all. -// Re-clicks are idempotent (the device just re-writes the same `controls.*` -// values), so we never gate the button on a one-shot flag — popup blockers, -// mistyped URLs, or a follow-up deviceModels.json edit all need the action to -// stay reachable. `acknowledgeBoardInject` clears `pendingBoard` after a -// click; the button stays, just neutral-styled. +// State shape: `[{ name, url, lastSeen, deviceModel? }]` keyed under +// `projectMM.devices.v1` in localStorage. `deviceModel` is a bookmark label only — +// the model's defaults are applied to the device during the install over serial +// ("Improv = REST over serial"), not from this list. It's optional; the render path +// treats an absent value as no model line. Entries saved before the board→deviceModel +// rename carry the old `board` field; the render reads it as a fallback (no migration +// needed). A schema bump (v2, …) is how future migrations land; additive/renamed +// fields read with a fallback don't need one. const STORAGE_KEY = "projectMM.devices.v1"; @@ -130,15 +122,15 @@ function render() { seenEl.className = "device-seen"; seenEl.textContent = `Provisioned ${relativeTime(device.lastSeen)}`; info.append(nameEl, urlEl); - // Board line (between URL and last-seen) renders only when set — - // legacy entries from before the field was added stay unchanged. - // The orchestrator passes board into addProvisionedDevice() when - // SET_DEVICE_MODEL succeeded; "(any board)" provisions skip the field. - if (device.board) { - const boardEl = document.createElement("div"); - boardEl.className = "device-board-name"; - boardEl.textContent = device.board; - info.append(boardEl); + // Device-model line (between URL and last-seen) renders only when set. + // Read `board` as a fallback so bookmarks saved before the board→deviceModel + // rename keep their label without a migration. "(any device)" provisions skip it. + const deviceModel = device.deviceModel || device.board; + if (deviceModel) { + const modelEl = document.createElement("div"); + modelEl.className = "device-model-name"; + modelEl.textContent = deviceModel; + info.append(modelEl); } info.append(seenEl); @@ -148,33 +140,6 @@ function render() { // noopener so the device-UI tab can't drive the install page. window.open(device.url, "_blank", "noopener"); }, "Open the device UI in a new tab"); - // Inject button: always rendered when the entry has a board name on - // it (whether or not we still have a `pendingBoard` flag). Opens the - // device UI with `?deviceModel=<name>` so the device's app.js fetches the - // matching deviceModels.json entry from Pages and POSTs each `controls.*` - // field to `/api/control`. Re-clicks are idempotent (same value - // written to the same controls), so we don't gate the button on a - // one-shot flag — popup blockers, mistyped URLs, and "the device - // rejected one field, retry after fixing deviceModels.json" all need the - // button to stay reachable. `pendingBoard` (set by the orchestrator - // when the in-page HTTP push didn't succeed) only affects styling: - // primary-flavoured when there's an unconfirmed push, neutral once - // the user has actioned it once. - if (device.board) { - const labelName = device.pendingBoard || device.board; - const inject = makeBtn("Inject", () => { - if (!confirm( - `Open ${device.name} and inject the board config for ` + - `"${labelName}"?\n\n` + - `The device will fetch the matching entry from deviceModels.json ` + - `and apply every field via /api/control. Safe to re-run — ` + - `the values are idempotent.`)) return; - window.open(buildInjectUrl(device), "_blank", "noopener"); - acknowledgeBoardInject(device); - }, `Push the deviceModels.json config for "${labelName}" to the device`); - if (device.pendingBoard) inject.classList.add("primary"); - actions.append(inject); - } const erase = makeBtn("Erase", () => { if (!confirm( `Erase ${device.name}? This wipes WiFi credentials and all ` + @@ -205,42 +170,6 @@ function makeBtn(label, handler, title) { return b; } -// Build `<device.url>?deviceModel=<name>` for the Inject button. The device UI's -// `consumePendingDeviceModelParam()` reads the param, fetches the matching entry -// from deviceModels.json on Pages, and POSTs each `controls.*` field to the -// device's `/api/control`. URLSearchParams handles encoding so names with -// spaces (e.g. "Olimex ESP32-Gateway Rev G") round-trip cleanly. -// `pendingBoard` is the name the orchestrator couldn't push directly; -// after the first Inject click that flag clears, but the button stays -// reachable and re-injects using the persistent `board` field (devices.js's own -// per-device record key — distinct from the device's `deviceModel` control). -function buildInjectUrl(device) { - const name = device.pendingBoard || device.board; - if (!name) return device.url; - try { - const u = new URL(device.url); - u.searchParams.set("deviceModel", name); - return u.toString(); - } catch (_) { - return device.url; - } -} - -// Single-shot: once the user clicks Inject, drop `pendingBoard` from the -// entry so the button doesn't reappear next time. The fetch + fan-out on -// the device side either succeeded (board fields applied) or failed -// (network error, SystemModule.setDeviceModel validation rejected a value) — either way -// we don't auto-retry; the user re-adds via a fresh install or sets the -// fields manually via MoonDeck. -function acknowledgeBoardInject(device) { - if (!device.pendingBoard) return; - const stored = state.devices.find(d => d.url === device.url); - if (!stored) return; - delete stored.pendingBoard; - saveDevices(state.devices); - render(); -} - export const myDevices = { /** * Mount the device list into the given container. @@ -263,19 +192,14 @@ export const myDevices = { * post-Improv success URL — typically `http://MM-XXXX.local/` or * `http://<ip>/` depending on the firmware. * @param {string} url - * @param {string} [board] - physical board name from the picker - * (Step 3 of the board-injection plan). Empty / undefined = user - * picked "(any board)" or the SET_DEVICE_MODEL RPC was skipped; the bookmark - * row omits the board line. Non-empty updates an existing entry's - * board on re-flash; never blanks a previously-set value. - * @param {object} [opts] - * @param {boolean} [opts.pendingBoardPush] - true when the installer - * couldn't push the board itself (HTTPS Pages → HTTP device blocked - * by mixed-content). Renders the row's Inject button with the - * primary style and seeds `pendingBoard` so the user knows a push - * is needed. Ignored when `board` is empty (nothing to push). + * @param {string} [deviceModel] - device-model name from the picker. Empty / + * undefined = user picked "(any device)" or didn't apply defaults; the + * bookmark row omits the model line. Non-empty updates an existing entry's + * model on re-flash; never blanks a previously-set value. (The device-model + * defaults themselves are applied during the install over serial — this is + * just the bookmark record, no inject handoff.) */ - addProvisionedDevice(url, board, opts) { + addProvisionedDevice(url, deviceModel) { if (!url || typeof url !== "string") return; // Restrict to http/https — the Visit button does window.open(url), // which would happily launch javascript: or file: URLs if a future @@ -285,30 +209,15 @@ export const myDevices = { let parsed; try { parsed = new URL(url); } catch (_) { return; } if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return; - const pendingBoardPush = !!(opts && opts.pendingBoardPush && board); const existing = state.devices.find(d => d.url === url); const now = new Date().toISOString(); if (existing) { existing.lastSeen = now; - // Only overwrite board when caller supplied a value — re-flashing - // with "(any board)" mustn't blank a previously-set entry. - if (board) existing.board = board; - // pendingBoard tracks "an Inject-button handoff is still owed." - // Set on push-failure (for the just-pushed board); clear on - // push-success — otherwise a re-install after a failed push - // leaves the flag stranded and buildInjectUrl keeps offering - // the stale name. Gate the clear on `board` truthy too: an - // "(any board)" refresh is not a board change, so it must not - // clear an outstanding-injection flag from a prior real push. - if (pendingBoardPush) existing.pendingBoard = board; - else if (board) delete existing.pendingBoard; + // Only overwrite deviceModel when caller supplied a value — re-flashing + // with "(any device)" mustn't blank a previously-set entry. + if (deviceModel) existing.deviceModel = deviceModel; } else { - const entry = { - name: nameFromUrl(url), url, lastSeen: now, - board: board || "", - }; - if (pendingBoardPush) entry.pendingBoard = board; - state.devices.push(entry); + state.devices.push({ name: nameFromUrl(url), url, lastSeen: now, deviceModel: deviceModel || "" }); } saveDevices(state.devices); render(); diff --git a/docs/install/improv-frame.js b/docs/install/improv-frame.js new file mode 100644 index 0000000..409ca0d --- /dev/null +++ b/docs/install/improv-frame.js @@ -0,0 +1,84 @@ +// Improv-serial frame building — the pure, dependency-free wire-format core. +// +// Extracted from install-orchestrator.js so it's importable in node:test without +// pulling the orchestrator's browser-only unpkg imports (esptool-js, the Improv +// SDK). The orchestrator imports these for the actual send; test/js imports them +// to pin the byte layout. This is the SAME wire format three implementations must +// agree on: device C++ (src/core/ImprovFrame.h + platform_esp32_improv.cpp), +// Python (scripts/build/improv_provision.py), and this file. The frame-contract +// tests (test/js, test/python) assert a shared golden vector so they can't drift. +// +// Frame layout (matches src/core/ImprovFrame.h): +// [I][M][P][R][O][V][version=1][type][length][payload×length][checksum] +// checksum = sum-mod-256 of the first 9+length bytes. + +// SET_DEVICE_MODEL vendor RPC command ID. High end of the conventional 0x80-0xFE +// vendor extension range. Matches the device-side handler at +// src/platform/esp32/platform_esp32_improv.cpp. +export const IMPROV_CMD_SET_DEVICE_MODEL = 0xFE; + +// SET_TX_POWER vendor RPC command ID — the pre-association TX-power cap for boards +// whose LDO browns out at full power. Sent BEFORE provisioning so the very first +// association runs capped. Matches the device-side handler. +export const IMPROV_CMD_SET_TX_POWER = 0xFD; + +// APPLY_OP vendor RPC command ID — "Improv = REST over serial". Carries ONE REST +// operation as JSON ({"op":"add|set|clearChildren",…}, the same shape an HTTP +// /api/modules or /api/control body has). Frame payload: [0xFC][seq][last][chunk…]. +// Matches improvHandleApplyOp at src/platform/esp32/platform_esp32_improv.cpp. +export const IMPROV_CMD_APPLY_OP = 0xFC; + +// Max op-JSON bytes per frame: the device's kImprovMaxPayload (128) minus the +// 3-byte [cmd][seq][last] header. A longer op (a big pins list) chunks. +export const APPLY_OP_CHUNK_MAX = 128 - 3; + +// Improv frame type for RPC commands (matches src/core/ImprovFrame.h). +export const IMPROV_FRAME_TYPE_RPC = 0x03; + +// Magic bytes that prefix every Improv frame. ASCII "IMPROV". +export const IMPROV_MAGIC = [0x49, 0x4d, 0x50, 0x52, 0x4f, 0x56]; + +// Wrap a payload in Improv framing. Returns a Uint8Array: +// IMPROV + version(1) + type + length + payload + checksum(sum-mod-256). +export function buildImprovFrame(type, payload) { + const len = payload.length; + // The length is a single byte on the wire — a payload over 255 would truncate + // silently and emit a corrupt frame. Throw instead. (Callers chunk well under + // the device's 128-byte kImprovMaxPayload, so this never fires in practice; it + // guards a future caller, matching ImprovFrame.h's oversize-payload rejection.) + if (len > 255) throw new Error(`Improv payload length ${len} exceeds 255 (one-byte length field)`); + const frame = new Uint8Array(6 + 1 + 1 + 1 + len + 1); + frame.set(IMPROV_MAGIC, 0); + frame[6] = 0x01; // version + frame[7] = type; + frame[8] = len; + frame.set(payload, 9); + let sum = 0; + for (let i = 0; i < 9 + len; i++) sum = (sum + frame[i]) & 0xff; + frame[9 + len] = sum; + return frame; +} + +// Encode ONE REST op into the APPLY_OP frame(s) it sends over serial. The op JSON +// is UTF-8'd and split into APPLY_OP_CHUNK_MAX-byte chunks; each chunk becomes a +// frame with payload [0xFC][seq][last][chunk…]. Always at least one frame (so +// `last` always sends, even for an empty op). The device reassembles by seq and +// applies on `last=1`. Returns an array of Uint8Array frames, in send order. +export function encodeApplyOpFrames(op) { + const bytes = new TextEncoder().encode(JSON.stringify(op)); + const total = bytes.length; + const chunks = Math.max(1, Math.ceil(total / APPLY_OP_CHUNK_MAX)); + const frames = []; + for (let seq = 0; seq < chunks; seq++) { + const start = seq * APPLY_OP_CHUNK_MAX; + const slice = bytes.subarray(start, start + APPLY_OP_CHUNK_MAX); + const last = seq === chunks - 1 ? 1 : 0; + const payload = new Uint8Array(3 + slice.length); + payload[0] = IMPROV_CMD_APPLY_OP; + payload[1] = seq & 0xFF; + payload[2] = last; + payload.set(slice, 3); + frames.push(buildImprovFrame(IMPROV_FRAME_TYPE_RPC, payload)); + } + return frames; +} diff --git a/docs/install/index.html b/docs/install/index.html index 2c8fb48..fc13db4 100644 --- a/docs/install/index.html +++ b/docs/install/index.html @@ -326,7 +326,7 @@ font-size: 12px; cursor: pointer; } - .device-board-name { color: var(--fg); font-size: 12px; margin-top: 2px; } + .device-model-name { color: var(--fg); font-size: 12px; margin-top: 2px; } .device-btn:hover { background: rgba(123, 158, 255, 0.08); } /* Install modal — backdrop + centered card. Replaces the ESP Web Tools @@ -772,8 +772,8 @@ <h2 id="install-title">Installing</h2> <!-- Needs-IP form: shown when the device didn't speak Improv back (alreadyOnline branch, or an Improv-less firmware like esp32-eth). User types the IP/hostname so we can still add the device to - "Your devices" and hand the picked board off through the Visit - click's same-origin URL (?deviceModel=…). + "Your devices". (No serial config push happens on this path — the + device-model defaults are applied later via MoonDeck on the LAN.) Retry button: cheap second chance at Improv before falling back to manual IP entry. Some boards (LOLIN S3 mini, slow-booting variants) lose the race against the post-flash 2 s reopen window; @@ -1309,82 +1309,64 @@ <h2>Serial monitor</h2> if (retrying) input.value = ""; } - function handleSuccess({ url, mdns, board, applyDefaults = true, viaHttp, httpBoardOk, alreadyOnline }) { + function handleSuccess({ url, mdns, board, applyDefaults = true, defaultsApplied = false, viaHttp, alreadyOnline }) { disarmUnloadGuard(); + // Device-model defaults are applied DURING the install over serial (Improv = + // REST over serial — SET_DEVICE_MODEL + APPLY_OP). So there's no "open this + // link to finish" step any more; the success screen just confirms + links. if (!url) { - // User skipped the IP prompt. They've already read the - // needs-ip section's hints ("find it on your router's admin page", - // "Leave blank and Skip…"), so a second consolation screen would - // just add a click — close the modal directly. + // No device URL (user skipped the IP prompt, or an eth-only/no-Improv device). + // On that path no serial config push happened. If a model was picked, say so — + // it can be applied later from MoonDeck on the LAN — else just close. + if (board && applyDefaults) { + showSection("done"); + document.getElementById("done-url").removeAttribute("href"); + document.getElementById("done-url").textContent = ""; + document.getElementById("done-url-mdns").hidden = true; + document.getElementById("done-defaults").textContent = + `Flashed. ${board} defaults weren't applied (no device address) — apply them later from MoonDeck on your network.`; + document.getElementById("done-defaults").hidden = false; + return; + } closeModal(); return; } showSection("done"); - // Always show the IP link — it's the address that's guaranteed to work, and the - // installer always has it here. When the boot serial also reported the device's - // <deviceName>.local name, show that as a SECOND link below it: it survives a DHCP - // lease change (the IP can go stale) but only resolves where mDNS works, so it's - // the friendly extra, not the primary. When there's no mdns name, only the IP shows. - // When the in-orchestrator HTTP push of the device-model controls didn't land - // (pendingBoardPush), decorate the clicked link with `?deviceModel=<name>` so - // visiting it runs the device-side consume path and applies the controls on first - // visit — no separate Inject click needed. Displayed text stays the clean URL. - // Only pending when the user actually wanted defaults applied — applyDefaults:false - // (re-flash keeping config) must NOT decorate the link or Inject button with - // ?deviceModel=, or clicking it would re-inject what the user chose to skip. - const pendingBoardPush = !!(board && applyDefaults && !httpBoardOk); - const withParam = (u) => pendingBoardPush - ? u + (u.includes("?") ? "&" : "?") + "deviceModel=" + encodeURIComponent(board) - : u; - // Fold the "applying device defaults" outcome into this popup (which stays up), - // since the in-progress status flashes by. Three cases mirror the orchestrator's - // log line: applied now (HTTP push succeeded), applied-on-first-visit (push didn't - // land, so the link below carries ?deviceModel= and the device applies them when - // opened), or not applied (checkbox unticked → config kept as-is). - const note = document.getElementById("done-defaults"); - if (board && applyDefaults) { - note.textContent = httpBoardOk - ? `Applied ${board} defaults.` - : `${board} defaults will be applied when you open the device below.`; - note.hidden = false; - } else if (board) { - note.textContent = `Kept the device's existing config (device defaults not applied).`; - note.hidden = false; - } else { - note.hidden = true; - } - const ipUrl = url; + // Always show the IP link (guaranteed to work). When the boot serial also + // reported the device's <deviceName>.local name, show it as a second link — + // it survives a DHCP lease change but only resolves where mDNS works. const a = document.getElementById("done-url"); - a.textContent = ipUrl; - a.href = withParam(ipUrl); + a.textContent = url; + a.href = url; const aMdns = document.getElementById("done-url-mdns"); if (mdns) { const mdnsUrl = `http://${mdns}/`; aMdns.textContent = mdnsUrl; - aMdns.href = withParam(mdnsUrl); + aMdns.href = mdnsUrl; aMdns.hidden = false; } else { aMdns.hidden = true; } - // Pending-board flag: set whenever the in-orchestrator HTTP push - // didn't succeed AND there's a board name to push. Covers both - // paths now: - // - viaHttp (needs-ip / typed-IP): HTTP push is the ONLY board- - // injection channel, so if it failed nothing landed. - // - Improv-success: SET_DEVICE_MODEL over serial already wrote - // System.deviceModel, but the per-device-model controls in `controls.*` - // (e.g. Network.txPowerSetting for the weak-power WiFi cap) - // only ship via the HTTP fan-out. If that fan-out failed — - // exactly the failure mode the fix exists to defend against — - // `pendingBoard` ensures the Inject button decorates the - // row and the device-side `?deviceModel=` consume path can land - // the controls on first visit. (pendingBoardPush is computed above, where - // it also gates the `?deviceModel=` on the success link.) - // When defaults were NOT applied (applyDefaults:false — a re-flash keeping the - // device's own config), store no board: the saved entry must not claim a model - // the device wasn't configured to, and its Inject button must not offer to push - // one the user chose to skip. - myDevices.addProvisionedDevice(url, applyDefaults ? board : "", { pendingBoardPush }); + // Report the defaults outcome HONESTLY, from `defaultsApplied` (did the serial + // push actually run?), not `applyDefaults` (the checkbox intent): applied, + // wanted-but-couldn't (model picked + ticked but no push happened — e.g. an + // Improv-less path), or kept-config (unticked). + const note = document.getElementById("done-defaults"); + if (defaultsApplied) { + note.textContent = `Applied ${board} defaults.`; + note.hidden = false; + } else if (board && applyDefaults) { + note.textContent = `Flashed, but ${board} defaults weren't applied — apply them from MoonDeck on your network.`; + note.hidden = false; + } else if (board) { + note.textContent = `Kept the device's existing config (device defaults not applied).`; + note.hidden = false; + } else { + note.hidden = true; + } + // Store no board unless the defaults actually applied, so the saved entry doesn't + // claim a model the device wasn't configured to. + myDevices.addProvisionedDevice(url, defaultsApplied ? board : ""); } function handleError(stage, error) { diff --git a/docs/install/install-orchestrator.js b/docs/install/install-orchestrator.js index 534dcc5..3a3a150 100644 --- a/docs/install/install-orchestrator.js +++ b/docs/install/install-orchestrator.js @@ -15,7 +15,11 @@ // 3. release the flash locks, hand the same port to ImprovSerial // 4. show a WiFi creds form, await user input // 5. provision via Improv standard SEND_WIFI_CREDENTIALS -// 6. send SET_DEVICE_MODEL vendor RPC (0xFE) with the picked board name +// 6. push the device-model config over serial — "Improv = REST over serial": +// SET_DEVICE_MODEL (0xFE, the identity name) then APPLY_OP (0xFC) frames for +// the deviceModels.json entry's modules + controls. No HTTP, no browser pull, +// so it works identically on the HTTPS deployed installer and local preview +// (the old HTTP /api/control fan-out couldn't run HTTPS→http — mixed-content). // 7. callback with { url, board } so the host page populates // "Your devices" + shows a "Visit device" link // @@ -31,27 +35,17 @@ import { ImprovSerial } from "https://unpkg.com/improv-wifi-serial-sdk@2.5.0/dis // Constants // --------------------------------------------------------------------------- -// SET_DEVICE_MODEL vendor RPC command ID. High end of the conventional 0x80-0xFE -// vendor extension range to maximize headroom against future Improv-spec -// expansion into the low vendor range. Matches the device-side handler at -// src/platform/esp32/platform_esp32_improv.cpp. -const IMPROV_CMD_SET_DEVICE_MODEL = 0xFE; - -// SET_TX_POWER vendor RPC command ID — the pre-association TX-power cap for -// boards whose LDO browns out at full power (a thin on-module LDO or marginal -// USB supply, e.g. some S2/S3 mini-class boards). Their deviceModels.json -// cap (controls.Network.txPowerSetting) used to arrive only via the HTTP -// fan-out AFTER the device was online, which a browning-out board can never -// reach: it fails WiFi auth at 20 dBm first (proven on the bench 2026-06-10). -// Sent BEFORE provisioning so the very first association runs capped. -// Matches the device-side handler at src/platform/esp32/platform_esp32_improv.cpp. -const IMPROV_CMD_SET_TX_POWER = 0xFD; - -// Improv frame type for RPC commands (matches src/core/ImprovFrame.h). -const IMPROV_FRAME_TYPE_RPC = 0x03; - -// Magic bytes that prefix every Improv frame. ASCII "IMPROV". -const IMPROV_MAGIC = [0x49, 0x4d, 0x50, 0x52, 0x4f, 0x56]; +// Improv frame building lives in improv-frame.js — the pure, dependency-free wire +// core shared with the frame-contract tests (test/js, test/python pin a golden +// vector so the device C++, Python, and JS implementations can't drift). The +// command IDs + frame layout are documented there. +import { + IMPROV_CMD_SET_DEVICE_MODEL, + IMPROV_CMD_SET_TX_POWER, + IMPROV_FRAME_TYPE_RPC, + buildImprovFrame, + encodeApplyOpFrames, +} from "./improv-frame.js"; // --------------------------------------------------------------------------- // Manifest parser @@ -112,26 +106,9 @@ function bufferToBinaryString(buffer) { } // --------------------------------------------------------------------------- -// Improv frame encoder (fallback for the private writePacketToStream) +// Improv RPC payload encoders (frame building is in improv-frame.js) // --------------------------------------------------------------------------- -// Mirrors buildImprovFrame in src/core/ImprovFrame.h. The wire format is: -// [I][M][P][R][O][V][version=1][type][length][payload×length][checksum] -// Checksum = sum-mod-256 of the first 9+length bytes. -function buildImprovFrame(type, payload) { - const len = payload.length; - const frame = new Uint8Array(6 + 1 + 1 + 1 + len + 1); - frame.set(IMPROV_MAGIC, 0); - frame[6] = 0x01; // version - frame[7] = type; - frame[8] = len; - frame.set(payload, 9); - let sum = 0; - for (let i = 0; i < 9 + len; i++) sum = (sum + frame[i]) & 0xff; - frame[9 + len] = sum; - return frame; -} - // Encodes the SET_DEVICE_MODEL RPC payload that the device parser at // platform_esp32_improv.cpp::improvHandleSetDeviceModel expects. // @@ -200,6 +177,97 @@ async function sendSetTxPowerFrame(port, dBm) { } } +// Send ONE REST op (a JS object like {op:"add",type,id,parent}) to the device over +// serial as APPLY_OP frames — "Improv = REST over serial". The op JSON is chunked +// into [0xFC][seq][last][chunk] frames (≤ APPLY_OP_CHUNK_MAX op bytes each; most ops +// are one frame). We own the port here (ImprovSerial closed after provision), so we +// write directly. Best-effort fire: the device acks each frame with an RpcResponse +// we don't read back (Web Serial duplex read while we hold the writer is awkward), +// and the single-buffer busy-guard on the device + a small inter-op delay keep the +// device from being overrun. Returns nothing; throws only on a write error. +async function sendApplyOpFrame(port, op) { + const frames = encodeApplyOpFrames(op); // [0xFC][seq][last][chunk…] per frame + const writer = port.writable.getWriter(); + try { + for (const frame of frames) await writer.write(frame); + } finally { + writer.releaseLock(); + } + // Pace so the device's main loop consumes the op (clears its single buffer) before + // the next op's frame arrives — the device refuses a new op while busy. This is + // open-loop (we don't read the ack back: a Web Serial duplex read while we hold the + // writer is awkward), so the delay must clear the worst-case consume window. A + // loaded tick (large grid, many modules) can run a few hundred µs, but the op is + // applied at the START of the next loop() poll, not after a full render — so ~120 ms + // comfortably covers it with headroom. (A read-back ack + retry-on-busy is the + // closed-loop upgrade; backlogged until a real install drops an op, since each op is + // also idempotent so a lost one would re-apply cleanly on a re-flash.) + await new Promise(r => setTimeout(r, 120)); +} + +// Apply a device-model's catalog defaults over serial: SET_DEVICE_MODEL (the identity +// name) then the full config as APPLY_OP ops. The caller must OWN the serial port (no +// ImprovSerial holding the writable lock). Works on any reachable device — fresh- +// provisioned (WiFi) OR already-online at boot (Ethernet) — because the serial RPCs +// need no provisioning state, only the open port. Gated by applyDefaults: when the +// "Apply device defaults" checkbox is unticked, push nothing (keep the device's +// config). Returns true iff the catalog push actually ran (so the success note can +// report honestly rather than always claiming "Applied"). +async function pushDefaultsOverSerial(port, board, applyDefaults, trackProgress, onLog) { + if (!(board && applyDefaults)) { + if (onLog) onLog(board + ? `[orchestrator] NOT applying ${board} defaults — "Apply device defaults" unticked; device config left as-is` + : `[orchestrator] no device model selected — no defaults to apply`); + return false; + } + trackProgress("apply-defaults", { board }); + if (onLog) onLog(`[orchestrator] applying ${board} defaults over serial (SET_DEVICE_MODEL + APPLY_OP)`); + await sendSetBoardFrame(port, board); + await new Promise(r => setTimeout(r, 100)); // let the UART task settle + return await sendConfigOverSerial(port, board, onLog); +} + +// Push a device-model's whole catalog config to the device over serial. Walks the +// SAME deviceModels.json entry the HTTP path used (replaceChildren pre-pass, then +// per-module add + per-control set) but emits APPLY_OP ops instead of HTTP requests +// — so the defaults apply during provisioning with no HTTP and no browser handoff. +// Returns true if the entry was found + pushed, false if no catalog entry for `board`. +async function sendConfigOverSerial(port, board, onLog) { + let entry; + try { + const res = await fetch("./deviceModels.json", { signal: AbortSignal.timeout(5000) }); + if (!res.ok) return false; + const catalog = await res.json(); + entry = Array.isArray(catalog) ? catalog.find(b => b && b.name === board) : null; + } catch (e) { + if (onLog) onLog(`[orchestrator] serial config: catalog fetch failed (${e && e.message || e})`); + return false; + } + if (!entry) return false; + const modules = Array.isArray(entry.modules) ? entry.modules : []; + // replaceChildren pre-pass: clear a container's boot defaults before its catalog + // children are added (so the entry's effects replace, not stack). + for (const m of modules) { + if (m && m.replaceChildren && typeof m.id === "string" && m.id) { + await sendApplyOpFrame(port, { op: "clearChildren", parent: m.id }); + } + } + for (const m of modules) { + if (!m || typeof m !== "object" || typeof m.id !== "string" || m.id === "") continue; + if (m.parent_id && m.type) { + await sendApplyOpFrame(port, { op: "add", type: m.type, id: m.id, parent: m.parent_id }); + } + const controls = m.controls; + if (controls && typeof controls === "object") { + for (const [control, value] of Object.entries(controls)) { + await sendApplyOpFrame(port, { op: "set", module: m.id, control, value }); + } + } + } + if (onLog) onLog(`[orchestrator] applied ${board} defaults over serial`); + return true; +} + // Read the device's boot serial log for the two facts the browser can't get any other // way: the device's IP and its mDNS `<deviceName>.local` address. projectMM appends // machine-parseable `MM_IP=<dotted-quad>` and `MM_DEVICE=<deviceName>.local` tokens to @@ -217,9 +285,9 @@ async function sendSetTxPowerFrame(port, dBm) { // firmware, modules, heap, …) from the live device's REST API (`http://<ip>/api/…`), // which is richer and always current — no reason to scrape more fields from a boot log. // The device name is the one exception that CAN'T come from REST: on the HTTPS Pages site -// a fetch to the plain-http device is blocked by mixed-content (see canFetchHttp), so the -// `.local` address has to ride the serial log too. Adding any OTHER device info to the -// installer = call the REST API with this IP, not grow this parser. +// a fetch to the plain-http device is blocked by mixed-content, so the `.local` address has +// to ride the serial log too. Adding any OTHER device info to the installer = call the REST +// API with this IP, not grow this parser. // // Returns { ip, mdns } — ip is the dotted-quad ("" on timeout), mdns is the // "<deviceName>.local" address ("" if the firmware predates MM_DEVICE). A fresh device @@ -303,151 +371,6 @@ function normalizeDeviceUrl(input) { } } -// Mixed-content guard: the installer page can only fetch() a plain http:// -// device URL when the page itself is on http:// (i.e. localhost preview). -// On https://moonmodules.org the browser blocks the fetch silently — caller -// must fall through to the query-param handoff via the Visit button. file: -// pages count as http for this purpose. -function canFetchHttp(deviceUrl) { - if (!deviceUrl) return false; - let target; - try { target = new URL(deviceUrl); } catch (_) { return false; } - if (target.protocol !== "http:") return true; // https device → no block - return window.location.protocol !== "https:"; -} - -// One device request with bounded retries. A fresh-flashed device (especially -// the P4 over Ethernet, busy with link bring-up + per-add buildState right after -// flash) intermittently times out or 5xx's a single call; without retry one blip -// would abandon the whole inject. So: retry on timeout / network error / 5xx with -// a short backoff, but NOT on a 4xx (a 404/400 is deterministic — the module/control -// genuinely doesn't exist, retrying won't change that). Returns the Response on a -// final 2xx/4xx, or null if every attempt threw (timeout/network). `label` + `onLog` -// surface what happened. 5s per-attempt timeout — generous for the bench. -async function deviceFetch(url, opts, label, onLog, attempts = 3) { - for (let i = 1; i <= attempts; i++) { - try { - const res = await fetch(url, { ...opts, signal: AbortSignal.timeout(5000) }); - if (res.ok || (res.status >= 400 && res.status < 500)) return res; // settled (success or deterministic 4xx) - if (onLog) onLog(`[orchestrator] ${label}: HTTP ${res.status} (attempt ${i}/${attempts})`); - } catch (e) { - if (onLog) onLog(`[orchestrator] ${label}: ${e && e.name === "TimeoutError" ? "timeout" : (e && e.message) || e} (attempt ${i}/${attempts})`); - } - if (i < attempts) await new Promise(r => setTimeout(r, 200 * i)); // linear backoff - } - return null; // every attempt threw -} - -// DELETE every current child of the module named `parentName` on `deviceUrl` — -// the installer-side counterpart of the device UI's clearModuleChildren, used by -// tryHttpInjectBoard's replaceChildren so an entry's effects replace the boot -// defaults instead of stacking. Fetch the live tree, find the container, DELETE -// each child by name. Best-effort with retry (deviceFetch): a child that won't -// delete is logged and skipped rather than aborting — the worst case is an effect -// stacking behind a leftover default, not a half-set-up device. -async function clearDeviceChildren(deviceUrl, parentName, onLog) { - const res = await deviceFetch(new URL("api/state", deviceUrl), {}, "replaceChildren read state", onLog); - if (!res || !res.ok) return; - let tree; - try { tree = await res.json(); } catch (_) { return; } - const findByName = (mods, name) => { - if (!Array.isArray(mods)) return null; - for (const m of mods) { - if (m && m.name === name) return m; - const hit = findByName(m && m.children, name); - if (hit) return hit; - } - return null; - }; - const parent = findByName(tree.modules, parentName); - const children = parent && Array.isArray(parent.children) ? parent.children : []; - for (const child of children) { - const d = await deviceFetch(new URL("api/modules/" + encodeURIComponent(child.name), deviceUrl), - { method: "DELETE" }, `replaceChildren DELETE ${child.name}`, onLog); - if (!d || !d.ok) { - if (onLog) onLog(`[orchestrator] replaceChildren: could not delete ${child.name} — continuing`); - } - } -} - -// Fan out every `controls.<Module>.<control>` field for `board` from the -// same-origin `./deviceModels.json` into the device's `/api/control`. Mirrors -// what the device UI's `consumePendingDeviceModelParam()` does for the Inject- -// button path — keeps preview-mode parity with production, INCLUDING the -// `replaceChildren` clear-then-add (so an entry's effects replace the boot -// defaults rather than stack behind them, same as the device-UI path). -// BEST-EFFORT (matching the device-UI handoff's contract, not all-or-nothing): a -// unit whose add or control write fails after retries is logged and skipped, so one -// transient hiccup on a flaky device (the P4 right after flash) can't abandon the -// rest of setup. Returns true only if EVERY unit applied cleanly; false if anything -// was skipped, so the caller still knows to fall back to the `?deviceModel=` handoff. -async function tryHttpInjectBoard(deviceUrl, board, onLog) { - let entry; - try { - const res = await fetch("./deviceModels.json", { signal: AbortSignal.timeout(5000) }); - if (!res.ok) return false; - const catalog = await res.json(); - entry = Array.isArray(catalog) - ? catalog.find(b => b && b.name === board) - : null; - } catch (_) { - return false; - } - if (!entry) return false; - // Each entry is a list of module-with-controls units: - // { type, id, parent_id?, controls?, replaceChildren? } - // Per module: add it first (when it has a parent_id — a fresh flash has no - // user-added modules like AudioModule, so a control write would 404), then set - // its controls. A module without parent_id is boot-wired/top-level (System, - // Network) that already exists — skip the add, just set controls. The add is - // idempotent (an existing id returns 200). - const modules = Array.isArray(entry.modules) ? entry.modules : []; - let allOk = true; - // replaceChildren pre-pass: a container unit (Layer/Layouts/Drivers) marked - // replaceChildren clears its boot defaults before this entry's children are - // added — without it the entry's effect sits behind the default and never - // renders. Same enumerate-then-DELETE as the device-UI path's clearModuleChildren. - for (const m of modules) { - if (m && m.replaceChildren && typeof m.id === "string" && m.id) { - await clearDeviceChildren(deviceUrl, m.id, onLog); - } - } - for (const m of modules) { - if (!m || typeof m !== "object") continue; - // id keys both the module add and every control write below; a unit - // without one is malformed catalog data — skip it rather than POST a - // body the device can't route. - if (typeof m.id !== "string" || m.id === "") continue; - if (m.parent_id && m.type) { - const res = await deviceFetch(new URL("api/modules", deviceUrl), { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ type: m.type, id: m.id, parent_id: m.parent_id }), - }, `add ${m.id}`, onLog); - if (!res || !res.ok) { - // Add failed after retries — skip this unit's controls (they'd 404 - // on a module that isn't there) but keep going with the rest. - if (onLog) onLog(`[orchestrator] add ${m.id} failed — skipping its controls, continuing`); - allOk = false; - continue; - } - } - const controls = m.controls; - if (!controls || typeof controls !== "object") continue; - for (const [controlName, value] of Object.entries(controls)) { - const res = await deviceFetch(new URL("api/control", deviceUrl), { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ module: m.id, control: controlName, value }), - }, `set ${m.id}.${controlName}`, onLog); - if (!res || !res.ok) { - if (onLog) onLog(`[orchestrator] set ${m.id}.${controlName} failed — continuing`); - allOk = false; - } - } - } - return allOk; -} // Inline of esptool-js@0.4.7's ESPLoader.main, with attempts=2 instead of // the default 7. The default takes ~60 s to fail when the user picks a @@ -582,7 +505,7 @@ export const installer = { * guidance message — the OS picker is modal and covers the install * modal. Optional; degrade gracefully to a silent re-prompt when * omitted (older host pages just lose the guidance section). - * @param {(detail: {url: string, mdns?: string, board: string, viaHttp?: boolean, httpBoardOk?: boolean, alreadyOnline?: boolean}) => void} opts.onSuccess + * @param {(detail: {url: string, mdns?: string, board: string, applyDefaults?: boolean, viaHttp?: boolean, alreadyOnline?: boolean}) => void} opts.onSuccess * `mdns` is the device's `<deviceName>.local` address from the boot serial * (deviceName is the single identity — mDNS = AP = DHCP hostname all follow * it), or "" if the firmware predates the MM_DEVICE token. The host shows it @@ -593,11 +516,8 @@ export const installer = { * initialize() fails and the orchestrator falls through to the * needs-ip prompt. `viaHttp:true` is set whenever the URL came from * the user-typed IP form (alreadyOnline OR Improv-less firmware). - * `httpBoardOk:true` is set when an HTTP `/api/control` push of the - * picked board succeeded inside the orchestrator — only possible in - * localhost preview, since HTTPS Pages can't fetch the device's HTTP - * URL (mixed-content). Host uses `httpBoardOk` to skip the pending- - * board query-param handoff in `addProvisionedDevice`. + * `applyDefaults` reflects the "Apply device defaults" checkbox; the host + * uses it to word the success note (applied over serial vs config kept). * @param {(stage: string, error: Error) => void} opts.onError * @param {(line: string) => void} [opts.onLog] - optional: each line * esptool-js writes to its "terminal" gets forwarded here. Host @@ -890,13 +810,23 @@ export const installer = { let viaHttp = false; let needsIp = false; let alreadyOnline = false; + let defaultsApplied = false; // true once the serial config push actually ran if (bootIp) { - if (onLog) onLog(`[orchestrator] device already online at ${bootIp}${bootMdns ? ` (${bootMdns})` : ""} (from boot serial) — skipping Improv`); + // Device is already online at boot — typically an Ethernet device (eth + // links up instantly, so it prints MM_IP before we'd provision) or a + // device with saved WiFi. We skip Improv *provisioning* (no creds needed), + // but we STILL own the serial port, and the device's Improv listener runs + // on eth too now — so push the device-model defaults over serial right + // here. (Before, this path skipped the push entirely, leaving an eth + // device flashed-but-unconfigured.) The TX-power cap isn't needed on an + // already-associated device, so it's skipped on this branch. + if (onLog) onLog(`[orchestrator] device already online at ${bootIp}${bootMdns ? ` (${bootMdns})` : ""} (from boot serial) — skipping Improv provisioning`); deviceUrl = `http://${bootIp}/`; deviceMdns = bootMdns; viaHttp = true; alreadyOnline = true; + defaultsApplied = await pushDefaultsOverSerial(port, board, applyDefaults, trackProgress, onLog); } else { // Pre-association TX-power cap (weak-power brown-out fix): push it // while we still own the port, before ImprovSerial locks it. @@ -979,19 +909,24 @@ export const installer = { try { if (improvClient) await improvClient.close(); } catch (_) { /* ignore */ } improvClient = null; if (!uiWaitForIp) { - // uiWaitForIp may be omitted on older host pages — - // degrade gracefully to the legacy empty-URL exit - // (no retry button without the dialog to host it). + // uiWaitForIp may be omitted on older host pages — degrade to the + // legacy empty-URL exit (no retry button without the dialog). + // This path never reached Improv-success, so no serial config push + // happened; carry board + applyDefaults so the success note can tell + // the user the defaults weren't applied (apply later via MoonDeck on + // the LAN). Defaults over serial only happen on the Improv path below. trackProgress("done"); - onSuccess({ url: "", board: "", alreadyOnline: true }); + onSuccess({ url: "", board, applyDefaults, alreadyOnline: true }); return; } trackProgress("needs-ip"); const ipResult = await uiWaitForIp(); if (!ipResult || ipResult.action === "skip") { - // User skipped the IP prompt; mirror the old behaviour. + // User skipped the IP prompt. No serial config push on this path; + // carry board + applyDefaults so the success note says the defaults + // weren't applied (apply later via MoonDeck on the LAN). trackProgress("done"); - onSuccess({ url: "", board: "", alreadyOnline }); + onSuccess({ url: "", board, applyDefaults, alreadyOnline }); return; } if (ipResult.action === "ip") { @@ -1071,8 +1006,10 @@ export const installer = { // outcomes ("we got a URL" vs "we didn't"). Different // shapes for different cardinalities, not drift. if (!ssid) { + // Empty SSID (Skip in the creds form). Keep board + applyDefaults + // so the success screen guides the user to apply defaults later. trackProgress("done"); - onSuccess({ url: "", board: "", alreadyOnline: false }); + onSuccess({ url: "", board, applyDefaults, alreadyOnline: false }); return; } @@ -1094,82 +1031,12 @@ export const installer = { const provName = improvClient.info && improvClient.info.name; if (provName) deviceMdns = `${provName}.local`; - // Push SET_DEVICE_MODEL vendor RPC if the user picked a board. - // ImprovSerial holds the writable lock — close it first so - // we can write our own raw frame. We're done with - // ImprovSerial anyway (provision is the last standard - // command we need). - // applyDefaults gates the catalog inject: when the user unticked - // "Apply device defaults" (e.g. re-flashing a configured device), skip - // pushing SET_DEVICE_MODEL so the device's existing config is left intact. - if (board && applyDefaults) { - trackProgress("set-board"); - if (onLog) onLog(`[orchestrator] SET_DEVICE_MODEL "${board}" over Improv serial`); - await improvClient.close(); - improvClient = null; - await sendSetBoardFrame(port, board); - // The device-side handler responds with RpcResponse, - // but we don't wait for it — failures (validation - // rejection, etc.) are reported via ErrorState which - // we'd need to re-open a reader to see. Best-effort: - // device persists in next 2s via FilesystemModule's - // debounced save. If push silently failed, the field - // stays empty and the user can pick the board via - // MoonDeck later. - // Small grace period so the device's UART task - // finishes processing before we close the port. - await new Promise(r => setTimeout(r, 200)); - } - } - - // One clear top-level line so the log shows whether this device's - // catalog defaults are being applied — and if not, why. Three cases: - // applying (a model was picked + the checkbox is ticked), skipped-by- - // choice (model picked but "Apply device defaults" unticked — a re-flash - // keeping config), or no-model ("(any board)" was selected). - // Visible modal status (not just the hidden log) so the user actually sees - // the defaults being applied — fires on BOTH the Improv and the eth/typed-IP - // (viaHttp) paths, unlike the earlier set-board stage which is Improv-only. - if (board && applyDefaults) trackProgress("apply-defaults", { board }); - if (onLog) { - if (board && applyDefaults) { - onLog(`[orchestrator] applying device defaults for "${board}" (deviceModels.json)`); - } else if (board) { - onLog(`[orchestrator] NOT applying device defaults for "${board}" — "Apply device defaults" unticked; device config left as-is`); - } else { - onLog(`[orchestrator] no device model selected — no defaults to apply`); - } - } - - // HTTP injection attempt — fans out the deviceModels.json `controls.*` - // entries to the device's `/api/control`, mirroring what the - // device UI's `consumePendingDeviceModelParam` does for the Inject- - // button path. Runs for BOTH paths: - // - needsIp (typed IP): the only board-injection path, since - // SET_DEVICE_MODEL over serial wasn't possible. - // - Improv-success: SET_DEVICE_MODEL already pushed `System.deviceModel` over - // serial, but every OTHER field in `controls.*` (e.g. - // `Network.txPowerSetting` for the weak-power WiFi cap) needs - // this fan-out to reach the device. Without it the board - // identifier lands but the per-board tweaks don't. - // Gated by `canFetchHttp(deviceUrl)` — on HTTPS Pages the - // browser blocks fetches to http:// device URLs (mixed-content); - // those users get the controls via the `?deviceModel=` query-param - // handoff after clicking Visit. Successful HTTP push tells the - // host page to skip the pending-board handoff via `httpBoardOk`. - // Gated by applyDefaults too (see the SET_DEVICE_MODEL site above) — an - // unticked "Apply device defaults" skips the controls fan-out, leaving a - // re-flashed device's existing config untouched. - let httpBoardOk = false; - if (board && applyDefaults && canFetchHttp(deviceUrl)) { - if (onLog) onLog(`[orchestrator] attempting HTTP inject for board="${board}" to ${deviceUrl}`); - httpBoardOk = await tryHttpInjectBoard(deviceUrl, board, onLog); - if (onLog) onLog(`[orchestrator] HTTP inject ${httpBoardOk ? "succeeded" : "failed"}`); - } else if (board && applyDefaults && onLog) { - // Skipped — most commonly HTTPS Pages → http:// device URL - // blocked by mixed-content. The `?deviceModel=` handoff via the - // Visit button picks up the controls fan-out same-origin. - onLog(`[orchestrator] HTTP inject skipped (cross-origin / mixed-content); relying on ?deviceModel= handoff`); + // Apply the device-model's catalog config over serial — "Improv = REST + // over serial". Close ImprovSerial first so we own the writable lock + // (provision was the last standard command we need it for), then push. + await improvClient.close(); + improvClient = null; + defaultsApplied = await pushDefaultsOverSerial(port, board, applyDefaults, trackProgress, onLog); } trackProgress("done"); @@ -1177,9 +1044,9 @@ export const installer = { url: deviceUrl, mdns: deviceMdns, // "<deviceName>.local" from the boot serial, "" if unknown board: board || "", - applyDefaults, // false → host page skips the ?deviceModel= first-visit handoff + applyDefaults, // the checkbox state (intent) + defaultsApplied, // whether the serial config push actually ran (truth) viaHttp, - httpBoardOk, alreadyOnline, }); } catch (e) { diff --git a/docs/moonmodules/core/ImprovProvisioningModule.md b/docs/moonmodules/core/ImprovProvisioningModule.md index 3e42e94..823742e 100644 --- a/docs/moonmodules/core/ImprovProvisioningModule.md +++ b/docs/moonmodules/core/ImprovProvisioningModule.md @@ -18,14 +18,17 @@ The listener serves **both** serial transports: UART0 (external USB-to-UART brid ## Wire contract -Both transports speak the same Improv-WiFi serial protocol — frames of `IMPROV` + version byte + type + length + payload + checksum. Full protocol details: <https://www.improv-wifi.com/serial/>. The on-device implementation supports four standard RPC commands plus two vendor extensions: +Both transports speak the same Improv-WiFi serial protocol — frames of `IMPROV` + version byte + type + length + payload + checksum. Full protocol details: <https://www.improv-wifi.com/serial/>. The on-device implementation supports four standard RPC commands plus three vendor extensions: - `GET_CURRENT_STATE` — returns "authorized" or "provisioned" depending on whether WiFi STA is connected. - `GET_DEVICE_INFO` — returns `[firmware, version, chipFamily, deviceName]` (where `firmware` = `"projectMM"`, `version` from `kVersion` in `build_info.h`, `chipFamily` from `platform::chipModel()`, `deviceName` from `SystemModule`). - `GET_WIFI_NETWORKS` — runs a synchronous WiFi scan, returns up to 10 SSIDs with RSSI + auth flag. **Rejected while STA is connected** (see below). - `WIFI_SETTINGS` — writes SSID + password to NetworkModule via `setWifiCredentials`, polls `wifiStaConnected()` for up to 30 s, replies with success (carrying `http://<ip>/`) or `ERROR_UNABLE_TO_CONNECT`. -- `SET_DEVICE_MODEL` (vendor, `0xFE`) — payload `[str_len][deviceModel name]`; persists the deviceModel name into SystemModule's `deviceModel` control (via `SystemModule::setDeviceModel`, which validates it). Sent by the web installer after provisioning. -- `SET_TX_POWER` (vendor, `0xFD`) — payload `[1][dBm]` (0–21; 0 lifts the cap); persists + applies `Network.txPowerSetting` **before** any association attempt. This is the provisioning escape hatch for boards whose LDO browns out at full TX power (a weak LDO / marginal supply): their `deviceModels.json` cap normally arrives over HTTP *after* the device is online — which a browning-out board can never reach, since it fails WiFi auth at 20 dBm first. `improv_provision.py --tx-power 8` (and the MoonDeck flow) sends this ahead of the credentials; error `0x81` on an out-of-range value. +- `SET_DEVICE_MODEL` (vendor, `0xFE`) — payload `[str_len][deviceModel name]`; persists the deviceModel name into SystemModule's `deviceModel` control (via `SystemModule::setDeviceModel`, which validates it). Sent by the web installer after provisioning, ahead of the `APPLY_OP` config push. +- `SET_TX_POWER` (vendor, `0xFD`) — payload `[1][dBm]` (0–21; 0 lifts the cap); persists + applies `Network.txPowerSetting` **before** any association attempt. This is the provisioning escape hatch for boards whose LDO browns out at full TX power (a weak LDO / marginal supply): the cap MUST land before the first association or the board fails WiFi auth at 20 dBm before it is ever online. `improv_provision.py --tx-power 8` (and the MoonDeck flow) sends this ahead of the credentials; error `0x81` on an out-of-range value. +- `APPLY_OP` (vendor, `0xFC`) — **"Improv = REST over serial".** Carries ONE REST operation as JSON, the same shape an HTTP `POST /api/modules` / `/api/control` body has: `{"op":"add","type":…,"id":…,"parent":…}` / `{"op":"set","module":…,"control":…,"value":…}` / `{"op":"clearChildren","parent":…}`. On the device the op is routed to `HttpServerModule`'s apply-core — the *exact same code* the HTTP handlers call — so a REST call over the network and an `APPLY_OP` over serial execute identically. (One schema caveat: the serial `add` op names the parent `parent`, while the HTTP `POST /api/modules` body names it `parent_id`. Both feed the one `applyAddModule()` core but the two transports parse different keys, so an HTTP payload is **not** a drop-in `APPLY_OP` — rename `parent_id` → `parent`. The serial key stays terse because every byte counts against the 128-byte frame.) The web installer pushes a device-model's whole catalog config this way during provisioning (a `clearChildren` pre-pass for any `replaceChildren` container, then an `add` per module + a `set` per control), so the defaults apply over serial with **no HTTP and no browser handoff** — sidestepping the mixed-content block that stops an HTTPS installer page from POSTing to an `http://` device. Frame payload: `[0xFC][seq][last][chunk]` — most ops are one frame; a long value (e.g. a big `pins` list) chunks across frames into a reassembly buffer, applied on the device's main loop when `last=1`. Single-buffered: the device errors a new op while the previous is unconsumed, and the installer awaits each ack. (The device-side catalog fetch + the old `?deviceModel=` handoff are removed — to re-apply a model to an already-running device, use MoonDeck on the LAN, which talks plain HTTP REST with no mixed-content barrier.) + +**The serial listener runs on every ESP32 target, including Ethernet-only builds** (`--firmware esp32-eth*`). On eth-only the WiFi-provisioning RPCs (`WIFI_SETTINGS`, `GET_WIFI_NETWORKS`) are compiled out — there's no STA to provision and the `esp_wifi_*` calls aren't linked — but the vendor RPCs (`SET_DEVICE_MODEL`, `SET_TX_POWER`, `APPLY_OP`) and `GET_CURRENT_STATE` / `GET_DEVICE_INFO` still work, so the web installer pushes a device-model's config over serial to an eth device exactly as it does to a WiFi one. On eth, `GET_CURRENT_STATE` reports "provisioned" + the device URL from the Ethernet link (`platform::ethConnected()` / `ethGetIPv4`) instead of the WiFi STA. `WIFI_SETTINGS` and `GET_WIFI_NETWORKS` are both **rejected with `ERROR_UNABLE_TO_CONNECT` while `platform::wifiStaConnected() == true`**. The scan gate protects large installs: `esp_wifi_scan_start` puts the radio into scan mode for 2-5 s, during which inbound ArtNet packets are dropped. On a 16K-LED rig that's a visible glitch. To re-provision a running device, wipe `ssid` via the UI and reboot, then run Improv before STA reconnects. `GET_CURRENT_STATE` and `GET_DEVICE_INFO` stay available regardless — they're read-only and don't touch the radio. diff --git a/docs/moonmodules/core/SystemModule.md b/docs/moonmodules/core/SystemModule.md index ab550f7..ea68e33 100644 --- a/docs/moonmodules/core/SystemModule.md +++ b/docs/moonmodules/core/SystemModule.md @@ -16,7 +16,7 @@ System-level diagnostics and device identity. Always loaded, always visible in t **Configurable:** - `deviceName` (text, default `MM-XXXX` where XXXX = last 4 hex of MAC) — the device's network identity (*which unit this is*). Used as hostname for mDNS, AP SSID, and UI display. Persisted. -- `deviceModel` (text, read-only in the UI) — the physical-hardware identity (*which product this is*, e.g. `Olimex ESP32-Gateway Rev G`), the entry name from the device-model catalog ([deviceModels.json](../../install/deviceModels.json)). The device can't self-identify its hardware, so this is *injected* by tooling: MoonDeck / the device UI's `?deviceModel=` inject over HTTP `/api/control`, or the web installer over the Improv `SET_DEVICE_MODEL` RPC (validated by `SystemModule::setDeviceModel`). Display-only in the UI (pushed, never user-typed at the device); persisted. (Was its own `BoardModule` child until folded into System.) +- `deviceModel` (text, read-only in the UI) — the physical-hardware identity (*which product this is*, e.g. `Olimex ESP32-Gateway Rev G`), the entry name from the device-model catalog ([deviceModels.json](../../install/deviceModels.json)). The device can't self-identify its hardware, so this is *pushed* by tooling: the web installer over the Improv `SET_DEVICE_MODEL` RPC during provisioning (alongside the rest of the catalog config via `APPLY_OP` — see [ImprovProvisioningModule.md](ImprovProvisioningModule.md)), or MoonDeck over HTTP `/api/control` on the LAN. Both go through `SystemModule::setDeviceModel`, which validates it. Display-only in the UI (pushed, never user-typed at the device); persisted. (Was its own `BoardModule` child until folded into System.) **Static (set at boot):** - `version` (read-only) — semver from library.json (`MM_VERSION`), plus the release channel in parentheses when the build was published under one: `1.0.0-rc2 (latest)`, `1.0.0 (v1.0.0)`. The channel (`MM_RELEASE`) is burned in by `release.yml` via `build_esp32.py --release <tag>`; a local / dev build has no channel and shows the bare semver. Semver answers *what code*; the channel answers *which release this device was flashed from* — a moving `latest` build and a tagged release can share a semver but differ in channel. Desktop builds show the bare semver today (the desktop packager doesn't set the channel). diff --git a/docs/testing.md b/docs/testing.md index a645cbe..cd32e46 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -43,13 +43,52 @@ test/ ├── unit/ │ ├── core/ unit_<Module>[_<topic>].cpp # mirrors src/core/ │ └── light/ unit_<Module>[_<topic>].cpp # mirrors src/light/ -└── scenarios/ - ├── core/ scenario_<Module>_<topic>.json # mirrors src/core/ - └── light/ scenario_<Module>_<topic>.json # mirrors src/light/ +├── scenarios/ +│ ├── core/ scenario_<Module>_<topic>.json # mirrors src/core/ +│ └── light/ scenario_<Module>_<topic>.json # mirrors src/light/ +├── python/ test_<topic>.py # host-side: MoonDeck / build-script logic +└── js/ <topic>.test.mjs # host-side: web-installer logic ``` A test lives under the subfolder of its **primary** `@module`'s source domain (e.g. `Layer` lives in `src/light/`, so `unit_Layer_extrude.cpp` goes in `test/unit/light/`). Cross-domain awareness travels through the `@also` list, not the directory. There's no `platform/` subfolder today — `src/platform/` is a pure abstraction layer whose desktop backend every unit test implicitly exercises; ESP32 platform code never runs on the desktop, so there's nothing to put there yet. +### Host-side tests (Python + JS) + +The C++ `ctest` / scenario suites can't reach the **Python** (MoonDeck, build scripts) or **JS** (web installer) code, so those get their own host-side unit tier — `test/python/` (pytest, run `uv run --with pytest --with pyserial pytest test/python`) and `test/js/` (Node's built-in runner, run `node --test "test/js/**/*.test.mjs"`; no `package.json`/`npm install`). Both run in `.github/workflows/test.yml` on every PR and are commit gates (CLAUDE.md Event 1, gate 10) when `scripts/` / `docs/install/` / the test dirs change. Python test files carry their deps in a PEP-723 `# /// script` block (the repo's convention — there's no central `pyproject.toml`); pyserial is a dep only because `improv_provision.py`'s import guard exits without it, not because the frame logic needs it. + +#### What's covered today: the Improv frame wire format + +The Improv serial frame is implemented **three times** — device C++ (`src/core/ImprovFrame.h`), Python (`scripts/build/improv_provision.py`), and installer JS (`docs/install/improv-frame.js`) — so a drift in any one silently breaks provisioning. Both suites assert the **same golden vector** so the Python and JS builders provably agree (hand-verified against the C++ sum-mod-256 checksum): + +``` +buildImprovFrame(type=0x03, payload=[0x01]) == 49 4d 50 52 4f 56 01 03 01 01 e3 + └IMPROV┘ v t l p checksum +``` + +**`test/python/test_improv_frame.py`** (pytest) — pins the *shared envelope* that MoonDeck and the provisioning scripts speak (`build_frame` / `checksum` in `improv_provision.py`): + +| Test | Pins | +|---|---| +| `test_checksum_is_sum_mod_256` | checksum is `sum(bytes) & 0xFF` — empty → 0, `0xFF 0xFF` → `0xFE` (wraps) | +| `test_frame_layout` | `IMPROV` magic, version 1, type, length, payload, total length | +| `test_golden_vector_g1` | the exact 11-byte golden frame above (the cross-impl anchor) | +| `test_checksum_covers_header_through_payload` | checksum spans header→payload, excludes the checksum byte itself | + +**`test/js/improv-frame.test.mjs`** (node:test) — pins the same envelope *plus* the `APPLY_OP` chunking that only the installer JS and device C++ implement (Python's provisioning path does WIFI_SETTINGS, not config push): + +| Test | Pins | +|---|---| +| frame layout | magic / version / type / length / payload / checksum positions | +| checksum is sum-mod-256 | matches Python + C++ | +| golden vector G1 | the same 11-byte frame as the pytest anchor | +| G2 — small APPLY_OP `set` is a single frame | `[0xFC][seq=0][last=1]` header + the op JSON byte-identical in the payload | +| G3 — a >125-byte op chunks into ordered frames | two frames, `seq` 0→1, `last` 0→1, the 125-byte chunk boundary, reassembly reproduces the op JSON exactly | +| at-least-one-frame | an empty op still emits one frame with `last=1` (so `last` always sends) | + +The JS suite proves the installer *chunks* an op correctly; the **device side that reassembles those chunks** is pinned by the C++ `unit_ImprovOpReassembler` suite (`src/core/ImprovOpReassembler.h`, the pure state machine behind the device's `APPLY_OP` handler — extracted from `platform_esp32_improv.cpp` so it's desktop-testable). It covers the full receive contract: in-order multi-chunk reassembly + NUL-termination, **duplicate-chunk rejection** and **out-of-order/skipped-seq rejection** (the guard against an installer retry corrupting the buffer), **overflow** rejection at the buffer-minus-NUL boundary, mid-stream `seq 0` abandoning a partial op, and clean recovery after every error. Encode (JS) + reassemble (C++) together prove APPLY_OP end to end without hardware. + +This is the first host-side suite; MoonDeck's pure logic (catalog reverse-lookup, state migration) and the installer's op-walk / storage are the next candidates as they accrete regression risk. + ### Naming convention - **Unit tests:** `unit_<ExactModuleName>[_<topic>].cpp` — `<ExactModuleName>` is the **CamelCase** class name as it appears in `// @module` (and in the source: `Layer`, `MoonModule`, `MultiplyModifier`, `NetworkSendDriver`). The optional `<topic>` collapses when the file's the only test for its module (`unit_Color.cpp` is fine if `@module Color`); add it when one module has several test files (`unit_Layer_extrude.cpp`, `unit_Layer_zero_grid.cpp`, …) or when the topic genuinely clarifies what the file covers (`unit_FilesystemModule_persistence.cpp`). diff --git a/src/core/HttpServerModule.cpp b/src/core/HttpServerModule.cpp index 269dba4..3cb267e 100644 --- a/src/core/HttpServerModule.cpp +++ b/src/core/HttpServerModule.cpp @@ -444,76 +444,86 @@ void HttpServerModule::writeControls(JsonSink& sink, MoonModule* mod) { } } -void HttpServerModule::handleSetControl(platform::TcpConnection& conn, const char* body) { - // Parse: {"module":"Noise","control":"scale","value":8} - char moduleName[32] = {}; - char controlName[32] = {}; - mm::json::parseString(body, "module", moduleName, sizeof(moduleName)); - mm::json::parseString(body, "control", controlName, sizeof(controlName)); - - // Find the module by name +// Apply-core: set one control's value. `valueJson` is a small JSON object holding +// the value under the "value" key ({"value":8}) — the same body the HTTP handler +// receives, so applyControlValue (which reads by key) is reused verbatim. Transport- +// free: no TcpConnection, returns an OpResult the caller maps to its own reporting. +HttpServerModule::OpResult HttpServerModule::applySetControl( + const char* moduleName, const char* controlName, const char* valueJson) { MoonModule* target = findModuleByName(moduleName); - if (!target) { - sendResponse(conn, 404, "application/json", "{\"error\":\"module not found\"}"); - return; - } + if (!target) return OpResult::ModuleNotFound; - // Handle module-level "enabled" property + // Module-level "enabled" pseudo-control. if (std::strcmp(controlName, "enabled") == 0) { - target->setEnabled(mm::json::parseBool(body, "value")); + target->setEnabled(mm::json::parseBool(valueJson, "value")); target->markDirty(); FilesystemModule::noteDirty(); if (scheduler_) scheduler_->buildState(); - sendResponse(conn, 200, "application/json", "{\"ok\":true}"); - return; + return OpResult::Ok; } - // Find the control by name and set value auto& ctrls = target->controls(); for (uint8_t i = 0; i < ctrls.count(); i++) { auto& c = ctrls[i]; if (std::strcmp(c.name, controlName) != 0) continue; - // Per-type parse + validate + apply lives in Control.cpp. We map - // the result to specific HTTP responses; non-Ok results leave the - // storage untouched, so no need to roll anything back. - ApplyResult r = applyControlValue(c, body, "value"); + // Per-type parse + validate + apply lives in Control.cpp. Non-Ok leaves the + // storage untouched, so no rollback needed. + ApplyResult r = applyControlValue(c, valueJson, "value"); switch (r) { - case ApplyResult::Ok: - break; - case ApplyResult::OutOfRange: - sendResponse(conn, 400, "application/json", "{\"error\":\"value out of range\"}"); - return; - case ApplyResult::Malformed: - sendResponse(conn, 400, "application/json", "{\"error\":\"value malformed\"}"); - return; - case ApplyResult::ReadOnly: - sendResponse(conn, 400, "application/json", "{\"error\":\"control is read-only\"}"); - return; + case ApplyResult::Ok: break; + case ApplyResult::OutOfRange: return OpResult::OutOfRange; + case ApplyResult::Malformed: return OpResult::Malformed; + case ApplyResult::ReadOnly: return OpResult::ReadOnly; } // Rebuild the control list after every change so onBuildControls() can - // re-evaluate which controls are visible for the new value — any control - // can reshape the list (a Select picking static-IP fields, a checkbox - // revealing its options). rebuildControls() is clear()+onBuildControls(), - // which the contract requires to be cheap and idempotent, so running it - // per-change costs nothing for the common case where the list is unchanged. + // re-evaluate which controls are visible for the new value (a Select + // revealing fields, etc.). clear()+onBuildControls(), cheap + idempotent. target->rebuildControls(); - // Three-tier control-change reaction (see MoonModule::onUpdate): - // 1. onUpdate — always, cheap. Lets the module recompute a small LUT etc. - // 2. rebuild — only when the control changes physical dims / mapping shape - // (Layout, Modifier). Most controls (effect values, brightness) skip this, - // so dragging a slider stays fluent with no tree-wide realloc sweep. + // Three-tier control-change reaction (see MoonModule::onUpdate): onUpdate + // always; a tree-wide buildState only when the control reshapes dims/mapping. target->onUpdate(controlName); target->markDirty(); FilesystemModule::noteDirty(); if (target->controlChangeTriggersBuildState(controlName) && scheduler_) { scheduler_->buildState(); } + return OpResult::Ok; + } + return OpResult::ControlNotFound; // control name not on this module +} - sendResponse(conn, 200, "application/json", "{\"ok\":true}"); - return; +void HttpServerModule::handleSetControl(platform::TcpConnection& conn, const char* body) { + // Parse: {"module":"Noise","control":"scale","value":8} — the apply-core reads + // the value out of `body` itself (so it sees the exact same JSON the API got). + char moduleName[32] = {}; + char controlName[32] = {}; + mm::json::parseString(body, "module", moduleName, sizeof(moduleName)); + mm::json::parseString(body, "control", controlName, sizeof(controlName)); + + switch (applySetControl(moduleName, controlName, body)) { + case OpResult::Ok: + sendResponse(conn, 200, "application/json", "{\"ok\":true}"); + return; + case OpResult::ModuleNotFound: + sendResponse(conn, 404, "application/json", "{\"error\":\"module not found\"}"); + return; + case OpResult::ControlNotFound: + sendResponse(conn, 404, "application/json", "{\"error\":\"control not found\"}"); + return; + case OpResult::OutOfRange: + sendResponse(conn, 400, "application/json", "{\"error\":\"value out of range\"}"); + return; + case OpResult::Malformed: + sendResponse(conn, 400, "application/json", "{\"error\":\"value malformed\"}"); + return; + case OpResult::ReadOnly: + sendResponse(conn, 400, "application/json", "{\"error\":\"control is read-only\"}"); + return; + default: + sendResponse(conn, 400, "application/json", "{\"error\":\"bad request\"}"); + return; } - sendResponse(conn, 404, "application/json", "{\"error\":\"control not found\"}"); } MoonModule* HttpServerModule::findModuleByName(const char* name) { @@ -588,89 +598,146 @@ void HttpServerModule::writeModuleMetricsJson(JsonSink& sink, MoonModule* mod, b } } -void HttpServerModule::handleAddModule(platform::TcpConnection& conn, const char* body) { - char typeName[32] = {}; - char id[32] = {}; - char parentId[32] = {}; - mm::json::parseString(body, "type", typeName, sizeof(typeName)); - mm::json::parseString(body, "id", id, sizeof(id)); - mm::json::parseString(body, "parent_id", parentId, sizeof(parentId)); - - if (typeName[0] == 0) { - sendResponse(conn, 400, "application/json", "{\"error\":\"missing type\"}"); - return; - } +// Apply-core: add one module under a named parent. Transport-free; returns an +// OpResult. Idempotent on the id (an existing name returns Ok, "already there"). +HttpServerModule::OpResult HttpServerModule::applyAddModule( + const char* typeName, const char* id, const char* parentId) { + if (!typeName || typeName[0] == 0) return OpResult::BadRequest; // Top-level modules (Layouts/Layers/Drivers/Filesystem/System/Network/HttpServer) - // are policy-fixed and wired in main.cpp at boot. The HTTP surface only - // allows adding *child* modules to an existing parent — anything else - // would be an orphan (not added to any tree, not registered with the - // scheduler, never ticked, leaked). Reject early and symmetrically with - // handleDeleteModule / handleReplaceModule (both also 400 on top-level). - // Scenario tests adding top-level modules go through scenario_runner.cpp's - // in-process path, not this HTTP handler. - if (parentId[0] == 0) { - sendResponse(conn, 400, "application/json", - "{\"error\":\"parent_id required (top-level modules are policy-fixed in main.cpp)\"}"); - return; - } + // are policy-fixed and wired in main.cpp at boot. Only *child* adds are allowed — + // anything else would orphan the module (never ticked, leaked). + if (!parentId || parentId[0] == 0) return OpResult::BadRequest; - // Check if module with this name already exists - if (id[0] != 0 && findModuleByName(id)) { - sendResponse(conn, 200, "application/json", "{\"ok\":true,\"note\":\"already exists\"}"); - return; - } + // Idempotent: an existing module with this name is success, not an error — so a + // re-run of the catalog inject (or a double APPLY_OP) is a no-op, not a dup. The + // distinct AlreadyExists (vs Ok) lets the HTTP handler report "already exists" so a + // client can tell created-now from already-there; both are success. + if (id && id[0] != 0 && findModuleByName(id)) return OpResult::AlreadyExists; - // Resolve the parent before allocating — failure here means we never - // construct an orphan module. + // Resolve the parent before allocating — failure means we never make an orphan. auto* parent = findModuleByName(parentId); - if (!parent) { - sendResponse(conn, 404, "application/json", "{\"error\":\"parent not found\"}"); - return; - } + if (!parent) return OpResult::ModuleNotFound; - // Create module via factory auto* mod = ModuleFactory::create(typeName); - if (!mod) { - sendResponse(conn, 400, "application/json", "{\"error\":\"unknown type\"}"); - return; - } - if (id[0] != 0) mod->setName(id); + if (!mod) return OpResult::UnknownType; + if (id && id[0] != 0) mod->setName(id); if (!parent->addChild(mod)) { delete mod; - sendResponse(conn, 400, "application/json", "{\"error\":\"parent rejected child\"}"); - return; + return OpResult::BadRequest; // parent rejected the child } - // Disambiguate the name if something else in the tree already carries - // it. Factory display names like "Layer" collide when a second Layer is - // added (factory has no per-instance state), and findModuleByName does - // a first-match DFS so the second one becomes unreachable. The - // Scheduler also runs the same pass over the whole tree after - // persistence load (see Scheduler::setup phase 2a). Single source of - // truth — both paths go through Scheduler::ensureUniqueName. + // Disambiguate a colliding name (a second "Layer" etc.) — same pass the Scheduler + // runs after persistence load; single source of truth. if (scheduler_) scheduler_->ensureUniqueName(mod); - // Lifecycle: same phase order as Scheduler::setup() — onBuildControls() first so - // control buffers are bound, then setup() (which may read those bound members), - // then onBuildState(). Getting this order wrong means a module's setup() sees - // uninitialized control state. + // Lifecycle in Scheduler::setup() order: onBuildControls() (bind buffers) → + // setup() (may read them) → onBuildState(). mod->onBuildControls(); mod->setup(); mod->onBuildState(); - if (scheduler_) scheduler_->buildState(); - // Persist the new tree shape — marking the parent dirty causes saveSubtree - // to write the parent's file with the new child slot included. The save is - // debounced (2s after the last dirty mark) so an immediate reboot won't catch - // the write; callers wanting a synchronous save can call FilesystemModule::flush(). - // parent is guaranteed non-null by the top-of-function checks. + // Persist the new tree shape (debounced save via noteDirty). parent->markDirty(); FilesystemModule::noteDirty(); + return OpResult::Ok; +} - sendResponse(conn, 200, "application/json", "{\"ok\":true}"); +void HttpServerModule::handleAddModule(platform::TcpConnection& conn, const char* body) { + char typeName[32] = {}; + char id[32] = {}; + char parentId[32] = {}; + mm::json::parseString(body, "type", typeName, sizeof(typeName)); + mm::json::parseString(body, "id", id, sizeof(id)); + mm::json::parseString(body, "parent_id", parentId, sizeof(parentId)); + + switch (applyAddModule(typeName, id, parentId)) { + case OpResult::Ok: + sendResponse(conn, 200, "application/json", "{\"ok\":true}"); + return; + case OpResult::AlreadyExists: + sendResponse(conn, 200, "application/json", "{\"ok\":true,\"note\":\"already exists\"}"); + return; + case OpResult::ModuleNotFound: + sendResponse(conn, 404, "application/json", "{\"error\":\"parent not found\"}"); + return; + case OpResult::UnknownType: + sendResponse(conn, 400, "application/json", "{\"error\":\"unknown type\"}"); + return; + case OpResult::BadRequest: + default: + sendResponse(conn, 400, "application/json", + "{\"error\":\"missing type, or parent_id required (top-level modules are policy-fixed in main.cpp), or parent rejected child\"}"); + return; + } +} + +// Apply-core: DELETE every user-editable child of `parentName` (the catalog +// inject's replaceChildren — an entry's effects replace the boot defaults instead +// of stacking). Same removeChild → teardown → deleteTree the HTTP delete does. +// Code-wired children (Preview, Improv) are left in place; they aren't what a +// catalog entry replaces. Transport-free. +HttpServerModule::OpResult HttpServerModule::applyClearChildren(const char* parentName) { + auto* parent = findModuleByName(parentName); + if (!parent) return OpResult::ModuleNotFound; + bool removedAny = false; + // Iterate from the end: removeChild compacts the array, so back-to-front keeps + // indices valid as we delete. + for (int i = static_cast<int>(parent->childCount()) - 1; i >= 0; i--) { + auto* c = parent->child(static_cast<uint8_t>(i)); + if (!c || !c->userEditable()) continue; + parent->removeChild(c); + c->teardown(); + Scheduler::deleteTree(c); + removedAny = true; + } + if (removedAny) { + if (scheduler_) scheduler_->buildState(); + parent->markDirty(); + FilesystemModule::noteDirty(); + } + return OpResult::Ok; +} + +// Apply-core dispatcher: one REST op as a JSON object. This is the wire shape the +// Improv APPLY_OP frame carries — "REST over serial". The op is a small flat object: +// {"op":"add","type":"...","id":"...","parent":"..."} +// {"op":"set","module":"...","control":"...","value":...} +// {"op":"clearChildren","parent":"..."} +// For "set" the whole op JSON is handed to applySetControl, which reads "value" by +// key — the same way the HTTP /api/control handler reads it from the request body, +// so any value type rides through unchanged. +// The wire shape the Improv APPLY_OP frame carries. NOTE the serial op's add uses the +// key "parent", while the HTTP POST /api/modules body uses "parent_id" for the same +// field — both feed the one applyAddModule() core, but the two transports parse different +// JSON keys, so an HTTP payload is NOT a drop-in APPLY_OP (rename parent_id → parent). The +// serial op stays terse because every byte counts against the 128-byte frame budget; the +// discrepancy is documented in docs/moonmodules/core/ImprovProvisioningModule.md. +HttpServerModule::OpResult HttpServerModule::applyOp(const char* opJson) { + if (!opJson) return OpResult::BadRequest; + char op[16] = {}; + mm::json::parseString(opJson, "op", op, sizeof(op)); + if (std::strcmp(op, "add") == 0) { + char type[32] = {}, id[32] = {}, parent[32] = {}; + mm::json::parseString(opJson, "type", type, sizeof(type)); + mm::json::parseString(opJson, "id", id, sizeof(id)); + mm::json::parseString(opJson, "parent", parent, sizeof(parent)); // "parent", not HTTP's "parent_id" + return applyAddModule(type, id, parent); + } + if (std::strcmp(op, "set") == 0) { + char module[32] = {}, control[32] = {}; + mm::json::parseString(opJson, "module", module, sizeof(module)); + mm::json::parseString(opJson, "control", control, sizeof(control)); + return applySetControl(module, control, opJson); + } + if (std::strcmp(op, "clearChildren") == 0) { + char parent[32] = {}; + mm::json::parseString(opJson, "parent", parent, sizeof(parent)); + return applyClearChildren(parent); + } + return OpResult::BadRequest; // unknown op } void HttpServerModule::handleDeleteModule(platform::TcpConnection& conn, const char* moduleName) { diff --git a/src/core/HttpServerModule.h b/src/core/HttpServerModule.h index 1404c48..a3c0d60 100644 --- a/src/core/HttpServerModule.h +++ b/src/core/HttpServerModule.h @@ -50,6 +50,36 @@ class HttpServerModule : public MoonModule, public BinaryBroadcaster { void loop20ms() override; void loop1s() override; + // ----------------------------------------------------------------------- + // Transport-free apply-core — "the REST API, callable in-process" + // ----------------------------------------------------------------------- + // The add/set/clear-children operations the HTTP handlers do, factored out of + // the TcpConnection so any transport can drive them. Two callers today: the + // HTTP handlers (thin wrappers that map OpResult → status code) and the Improv + // serial path (ImprovProvisioningModule applies a pushed op on the main loop — + // "Improv = REST over serial"). One home for the apply logic; transports differ + // only in how they frame the request and report the result. + enum class OpResult : uint8_t { + Ok, + AlreadyExists, // add is a no-op: a module with this id is already in the tree (still success) + ModuleNotFound, // module / parent name not in the tree + ControlNotFound, // module exists but has no such control (a distinct 404) + UnknownType, // factory doesn't know the type + BadRequest, // missing field, top-level add, parent rejected child + OutOfRange, // numeric value outside bounds + Malformed, // value didn't parse (e.g. IPv4) + ReadOnly, // tried to write a display-only control + }; + // body is a small JSON object: {"type","id","parent_id"} / {"module","control","value"}. + OpResult applyAddModule(const char* typeName, const char* id, const char* parentId); + OpResult applySetControl(const char* moduleName, const char* controlName, const char* valueJson); + // Enumerate-then-DELETE every child of `parentName` (the catalog inject's + // replaceChildren). Returns NotFound if the parent doesn't exist, else Ok. + OpResult applyClearChildren(const char* parentName); + // Parse a single REST op object ({"op":"add|set|clearChildren", …}) and dispatch + // to the three above. The wire shape the Improv APPLY_OP frame carries. + OpResult applyOp(const char* opJson); + private: platform::TcpServer server_; Scheduler* scheduler_ = nullptr; diff --git a/src/core/ImprovOpReassembler.h b/src/core/ImprovOpReassembler.h new file mode 100644 index 0000000..59a9662 --- /dev/null +++ b/src/core/ImprovOpReassembler.h @@ -0,0 +1,86 @@ +#pragma once + +// Reassembles a chunked APPLY_OP payload ("Improv = REST over serial") into one +// NUL-terminated op-JSON buffer. This is the pure state machine behind the device's +// 0xFC handler: the platform layer (platform_esp32_improv.cpp) owns the serial I/O +// (reading frames, sending acks/errors, the single-buffer opReady atomic), and hands +// each chunk's [seq][last][bytes] here. Splitting it out follows the same core/platform +// line as ImprovFrame.h — the algorithm is core (and desktop-unit-testable), the UART +// is platform — so the reassembly + sequence guard is proven without hardware. +// +// Frame chunk: [seq][last][chunk bytes…]. seq 0 starts a fresh op (resets the buffer); +// every later chunk must be the next seq in order. A duplicate (an installer retry on a +// misread timeout) or an out-of-order chunk would splice garbage into the buffer, so it +// is rejected and the buffer reset. USB serial is in-order, but the installer's open-loop +// send could re-emit a chunk, so the guard is real, not theoretical. + +#include <cstddef> +#include <cstdint> +#include <cstring> + +namespace mm { + +class ImprovOpReassembler { +public: + enum class Result : uint8_t { + Continue, // chunk accepted, more expected (not the last) + Ready, // last chunk accepted; out() is a complete NUL-terminated op + Error, // bad chunk (out-of-order / duplicate / overflow); buffer reset + }; + + // buf/cap is the caller-owned reassembly buffer (one byte is reserved for the NUL, + // so the largest op JSON that fits is cap-1 bytes). + ImprovOpReassembler(char* buf, size_t cap) : buf_(buf), cap_(cap) {} + + // Feed one chunk. `seq` is its 0-based index, `last` true on the final chunk. + // On Ready, out() holds the reassembled, NUL-terminated JSON and len() its length; + // the reassembler is reset for the next op. On Error the buffer is reset and the + // caller should error the frame. chunk may be null iff chunkLen is 0. + Result feed(uint8_t seq, bool last, const uint8_t* chunk, size_t chunkLen) { + // Sequence guard. seq 0 always starts fresh (so a new op recovers cleanly even + // after a previous one errored mid-stream); any other seq must be the next one. + if (seq == 0) { + len_ = 0; + nextSeq_ = 1; + } else if (seq != nextSeq_) { + reset(); + return Result::Error; + } else { + nextSeq_++; + } + + // Overflow guard: keep one byte for the NUL. Drop + error rather than truncate. + if (len_ + chunkLen >= cap_) { + reset(); + return Result::Error; + } + if (chunkLen) std::memcpy(buf_ + len_, chunk, chunkLen); + len_ += chunkLen; + + if (last) { + buf_[len_] = 0; + size_t complete = len_; + reset(); // ready for the next op + len_ = complete; // ...but keep the length readable until the next feed() + return Result::Ready; + } + return Result::Continue; + } + + const char* out() const { return buf_; } + size_t len() const { return len_; } + + // Drop any partial op (e.g. when the single-buffer consumer wants a clean slate). + void reset() { + len_ = 0; + nextSeq_ = 0; + } + +private: + char* buf_; + size_t cap_; + size_t len_ = 0; // bytes reassembled so far + uint8_t nextSeq_ = 0; // next chunk index expected (0 = awaiting a fresh op) +}; + +} // namespace mm diff --git a/src/core/ImprovProvisioningModule.h b/src/core/ImprovProvisioningModule.h index 5d498cf..22a1f63 100644 --- a/src/core/ImprovProvisioningModule.h +++ b/src/core/ImprovProvisioningModule.h @@ -3,10 +3,12 @@ #include "core/MoonModule.h" #include "core/NetworkModule.h" #include "core/SystemModule.h" +#include "core/HttpServerModule.h" #include "core/build_info.h" #include "platform/platform.h" #include <atomic> +#include <cstdio> #include <cstring> namespace mm { @@ -31,6 +33,9 @@ class ImprovProvisioningModule : public MoonModule { public: void setSystemModule(SystemModule* s) { systemModule_ = s; } void setNetworkModule(NetworkModule* n) { networkModule_ = n; } + // For the APPLY_OP vendor RPC — the module routes a pushed REST op to the + // HttpServerModule's apply-core (the same code /api/modules + /api/control use). + void setHttpServerModule(HttpServerModule* h) { httpServerModule_ = h; } // Diagnostics keep ticking; matches FirmwareUpdateModule / SystemModule. bool respectsEnabled() const override { return false; } @@ -57,7 +62,8 @@ class ImprovProvisioningModule : public MoonModule { statusStr_, sizeof(statusStr_), pendingDeviceModel_, sizeof(pendingDeviceModel_), &pendingDeviceModelReady_, - &pendingTxPower_, &pendingTxPowerReady_); + &pendingTxPower_, &pendingTxPowerReady_, + pendingOp_, sizeof(pendingOp_), &pendingOpReady_); } else { std::strncpy(statusStr_, "not supported on this platform", sizeof(statusStr_) - 1); } @@ -100,9 +106,39 @@ class ImprovProvisioningModule : public MoonModule { } } + // APPLY_OP is polled per-TICK (not loop1s) because the installer pushes a burst + // of ops during provisioning and single-buffers them: the Improv task refuses a + // new op until this consumes the previous (clears pendingOpReady_), so a fast + // poll keeps the busy-window to ~one tick and the install snappy. Applying on the + // main loop here (not the Improv task) keeps the factory/tree mutation off the + // serial task — the same discipline the credentials/deviceModel paths follow. + void loop() override { + if (pendingOpReady_.load(std::memory_order_acquire) && httpServerModule_) { + // The Improv task already acked frame RECEIPT; the op is APPLIED here. A + // failed op (UnknownType, OutOfRange, a not-found target) can't travel back + // on that spent ack, so surface it: log it over serial and park it in + // provision_status, so a silently-misconfigured device is visible on a + // monitor and via /api/state rather than looking like a clean install. + // Ok and AlreadyExists are both success (a re-pushed op is idempotent); + // only a genuine failure is surfaced. + auto r = httpServerModule_->applyOp(pendingOp_); + if (r != HttpServerModule::OpResult::Ok && + r != HttpServerModule::OpResult::AlreadyExists) { + std::printf("Improv APPLY_OP failed (result=%d): %s\n", + static_cast<int>(r), pendingOp_); + std::snprintf(statusStr_, sizeof(statusStr_), "error: apply failed (%d)", + static_cast<int>(r)); + } + std::memset(pendingOp_, 0, sizeof(pendingOp_)); + pendingOpReady_.store(false, std::memory_order_release); + } + MoonModule::loop(); // tick children (none today, but keep the contract) + } + private: - SystemModule* systemModule_ = nullptr; - NetworkModule* networkModule_ = nullptr; + SystemModule* systemModule_ = nullptr; + NetworkModule* networkModule_ = nullptr; + HttpServerModule* httpServerModule_ = nullptr; char statusStr_[64] = "listening"; // Buffers the platform task writes; sized to NetworkModule's storage. @@ -124,6 +160,12 @@ class ImprovProvisioningModule : public MoonModule { // for brown-out-prone boards; same producer/consumer shape as the above. uint8_t pendingTxPower_ = 0; std::atomic<bool> pendingTxPowerReady_{false}; + + // Vendor APPLY_OP RPC — one REST op as JSON, reassembled by the Improv task and + // applied here on the main loop via HttpServerModule::applyOp. Sized for the + // largest op (a long pins list fits comfortably in 512 bytes). + char pendingOp_[512] = {}; + std::atomic<bool> pendingOpReady_{false}; }; } // namespace mm diff --git a/src/main.cpp b/src/main.cpp index acd07a9..d783429 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -299,6 +299,12 @@ void mm_main(volatile bool& keepRunning, uint16_t httpPort) { // owns the preview wire format end to end; core just writes the bytes. preview->setBroadcaster(httpServer); + // APPLY_OP vendor RPC (0xFC): the installer pushes the device-model's catalog + // ops over serial during provisioning, and ImprovProvisioningModule routes each + // to the HttpServerModule apply-core (the same code /api/modules + /api/control + // use) — "Improv = REST over serial". Wired here once httpServer exists. + if (improvModule) improvModule->setHttpServerModule(httpServer); + // Register top-level modules with scheduler (scheduler deletes on teardown). // Order matters: filesystem first (load hook runs before any module's setup), // then system (deviceName), firmwareUpdate (status surface, no deps), network diff --git a/src/platform/desktop/platform_desktop.cpp b/src/platform/desktop/platform_desktop.cpp index afbbd38..bf39d60 100644 --- a/src/platform/desktop/platform_desktop.cpp +++ b/src/platform/desktop/platform_desktop.cpp @@ -567,7 +567,9 @@ bool improvProvisioningInit(const ImprovDeviceInfo& /*info*/, char* /*deviceModelOut*/, size_t /*deviceModelOutLen*/, std::atomic<bool>* /*deviceModelReady*/, uint8_t* /*txPowerOut*/, - std::atomic<bool>* /*txPowerReady*/) { + std::atomic<bool>* /*txPowerReady*/, + char* /*opOut*/, size_t /*opOutLen*/, + std::atomic<bool>* /*opReady*/) { if (statusBuf && statusBufLen > 0) { std::snprintf(statusBuf, statusBufLen, "unsupported on desktop"); } diff --git a/src/platform/esp32/platform_config.h b/src/platform/esp32/platform_config.h index a15d4ff..bd87048 100644 --- a/src/platform/esp32/platform_config.h +++ b/src/platform/esp32/platform_config.h @@ -194,16 +194,15 @@ constexpr EthPinConfig ethConfigDefault = // `if constexpr` on this so desktop builds get a 501-returning stub instead. constexpr bool hasOta = true; -// Improv WiFi listens on UART0 for WiFi credentials. Disabled on Ethernet-only -// firmwares (--firmware esp32-eth) — the WiFi headers and the esp_wifi_scan_* calls -// the listener uses are not linked there, and there's no WiFi STA to provision -// either way. The S3's native USB-Serial-JTAG (separate from UART0) is not -// supported by the Improv listener; see the ImprovProvisioningModule spec for -// the user-facing footnote. -#ifdef MM_NO_WIFI -constexpr bool hasImprov = false; -#else +// Improv-serial is the device's serial RPC channel (UART0 + native USB-Serial-JTAG): +// the WiFi-provisioning RPCs (WIFI_SETTINGS, GET_WIFI_NETWORKS) AND the vendor RPCs +// (SET_DEVICE_MODEL, SET_TX_POWER, APPLY_OP — "Improv = REST over serial"). The +// transport is always available on ESP32, so the listener runs everywhere — including +// Ethernet-only builds (`--firmware esp32-eth*`), where the WiFi-only RPCs are compiled +// out (the `esp_wifi_*` calls aren't linked) but the vendor RPCs still work, so the web +// installer can push a device-model's config over serial to an eth device just as it +// does to a WiFi one. `hasImprov` is therefore true on every ESP32 target; only desktop +// (no serial peripheral) leaves it false. constexpr bool hasImprov = true; -#endif } // namespace mm::platform diff --git a/src/platform/esp32/platform_esp32_improv.cpp b/src/platform/esp32/platform_esp32_improv.cpp index 7dcfddf..8640aff 100644 --- a/src/platform/esp32/platform_esp32_improv.cpp +++ b/src/platform/esp32/platform_esp32_improv.cpp @@ -1,25 +1,27 @@ -// Improv WiFi listener — UART0 RPC dispatch + credential pump. +// Improv-serial listener — UART0 + native-USB RPC dispatch. // -// Cut out of platform_esp32.cpp (plan-23) for size + readability. Self- -// contained: the file owns the g_improv state in an anonymous namespace -// and only reads back into the rest of the platform layer through the -// public wifiStaConnected() / wifiStaGetIPv4() symbols declared in -// platform.h. Move was a code-organisation change with no API delta. +// Cut out of platform_esp32.cpp (plan-23) for size + readability. Self-contained: +// owns the g_improv state in an anonymous namespace, reaching back into the rest of +// the platform layer only through public accessors declared in platform.h. // -// Whole file is compiled out on Ethernet-only builds (MM_NO_WIFI). A -// link-parity stub at the bottom satisfies the platform.h declaration -// on those profiles (ImprovProvisioningModule guards the call with -// `if constexpr (hasImprov)`, so it's never invoked at runtime). +// Runs on EVERY ESP32 target, including Ethernet-only builds (MM_NO_WIFI). The serial +// transport + the vendor RPCs (SET_DEVICE_MODEL, SET_TX_POWER, APPLY_OP — "Improv = +// REST over serial") need no WiFi, so the installer can push a device-model's config +// over serial to an eth device too. Only the WiFi-PROVISIONING RPCs (WIFI_SETTINGS, +// GET_WIFI_NETWORKS) and their `esp_wifi_*` calls are `#ifndef MM_NO_WIFI`-guarded — +// on eth those commands aren't offered (there's no WiFi STA to provision), and +// GET_CURRENT_STATE reports based on the Ethernet link instead. #include "platform/platform.h" -#ifndef MM_NO_WIFI - #include "core/ImprovFrame.h" +#include "core/ImprovOpReassembler.h" #include "driver/uart.h" #include "esp_log.h" -#include "esp_wifi.h" +#ifndef MM_NO_WIFI +#include "esp_wifi.h" // only the WiFi-provisioning RPCs touch esp_wifi_* +#endif #include "freertos/FreeRTOS.h" #include "freertos/task.h" #include "improv.h" @@ -79,6 +81,18 @@ struct ImprovTaskState { // whole dBm for brown-out-prone boards. Same producer/consumer dance. uint8_t* txPowerOut = nullptr; std::atomic<bool>* txPowerReady = nullptr; + + // Vendor APPLY_OP RPC (command 0xFC): one REST operation as JSON, pushed over + // serial during provisioning ("Improv = REST over serial"). The frame carries + // [0xFC][seq][last][chunk bytes…]; chunks are appended to opOut until last=1, + // then opReady is set and the module's loop applies the op on the MAIN loop + // (never the Improv task). Same producer/consumer dance as deviceModel; the + // buffer is module-owned and sized to hold the largest op (a long pins list). + // Chunk reassembly + the sequence guard live in mm::ImprovOpReassembler, bound + // to opOut in the handler — only the buffer + the ready flag are shared state here. + char* opOut = nullptr; + size_t opOutLen = 0; + std::atomic<bool>* opReady = nullptr; }; static ImprovTaskState g_improv; // single global — only one Improv task per device @@ -173,6 +187,10 @@ static void improvSendDeviceInfo() { improvSend(ImprovFrameType::RpcResponse, rpc); } +#ifndef MM_NO_WIFI +// --- WiFi-provisioning RPCs: only on WiFi builds. On Ethernet-only (MM_NO_WIFI) +// these aren't offered (no STA to provision) and the esp_wifi_* calls aren't linked. --- + static void improvSendWifiNetworks() { // Synchronous-ish scan. Replies one network per RPC frame per the Improv // spec, then a final empty payload to mark end-of-list. Limit to 10 @@ -265,6 +283,8 @@ static void improvHandleProvision(const improv::ImprovCommand& cmd) { improvSendCurrentState(improv::STATE_PROVISIONED); } +#endif // MM_NO_WIFI — end WiFi-provisioning RPCs + // SET_DEVICE_MODEL vendor RPC (command 0xFE) — Step 3 of the deviceModel-injection plan. // The web installer's orchestrator sends this after WiFi provisioning so the // device persists its physical-board name (e.g. "LOLIN D32") without needing @@ -375,6 +395,81 @@ static void improvHandleSetTxPower(const uint8_t* payload, uint8_t len) { improvSend(ImprovFrameType::RpcResponse, rpc); } +// APPLY_OP vendor RPC (command 0xFC) — "Improv = REST over serial". Carries ONE +// REST operation as JSON (the same shape an HTTP /api/modules or /api/control body +// has): {"op":"add",…} / {"op":"set",…} / {"op":"clearChildren",…}. The installer +// pushes these during provisioning while it owns the serial port, so device-model +// defaults apply over serial — no HTTP, no mixed-content, no browser pull/handoff. +// +// Frame payload layout (after the standard Improv frame header): +// [0xFC] command +// [seq] chunk index, 0-based (seq 0 resets the reassembly buffer) +// [last] 1 if this is the final chunk, else 0 +// [chunk bytes…] a slice of the op JSON (≤ kImprovMaxPayload-3 bytes) +// Most ops fit one frame (seq 0, last 1); a long value (e.g. a big pins list) +// chunks. On last=1 the reassembled JSON is NUL-terminated and opReady is set; the +// module's loop applies it on the MAIN loop (the factory/tree mutation must not run +// on the Improv task). Ack each frame so the installer can pace + retry. +static constexpr uint8_t IMPROV_CMD_APPLY_OP = 0xFC; +static constexpr uint8_t IMPROV_ERROR_INVALID_OP = 0x82; + +static void improvHandleApplyOp(const uint8_t* payload, uint8_t len) { + if (!g_improv.opOut || !g_improv.opReady) { + improvSendError(improv::ERROR_UNKNOWN_RPC); + return; + } + // Single-buffered: refuse a new op while the module hasn't consumed the previous + // one (opReady still set), so a fast installer can't overwrite an unapplied op. + // The installer treats this error as "retry shortly" and re-sends. Acquire-load + // pairs with the module's release-store when it clears the flag after applying. + if (g_improv.opReady->load(std::memory_order_acquire)) { + improvSendError(static_cast<improv::Error>(IMPROV_ERROR_INVALID_OP)); + return; + } + // [0xFC][seq][last] header = 3 bytes; chunk is the rest. + if (len < 3) { + improvSendError(static_cast<improv::Error>(IMPROV_ERROR_INVALID_OP)); + return; + } + uint8_t seq = payload[1]; + uint8_t last = payload[2]; + const uint8_t* chunk = payload + 3; + size_t chunkLen = static_cast<size_t>(len) - 3; + + // `last` is a boolean flag on the wire; anything but 0/1 is a malformed frame + // (a desync the parser's checksum didn't catch, or a non-conforming sender). + // Reject before reassembly rather than coerce a stray value to "more chunks". + if (last > 1) { + improvSendError(static_cast<improv::Error>(IMPROV_ERROR_INVALID_OP)); + return; + } + + // Chunk reassembly + the out-of-order/duplicate sequence guard live in + // mm::ImprovOpReassembler (core, desktop-unit-tested) so the algorithm is proven + // without hardware; this handler keeps only the serial I/O around it. Bound once + // to g_improv.opOut at first call — improvProvisioningInit sets opOut before the + // task starts, and there is a single Improv task per device for its lifetime, so + // the static never sees a stale buffer. Re-init with a different buffer is not + // supported (would need rebinding); the single-task design makes that moot. + static mm::ImprovOpReassembler reasm(g_improv.opOut, g_improv.opOutLen); + switch (reasm.feed(seq, last, chunk, chunkLen)) { + case mm::ImprovOpReassembler::Result::Error: + improvSendError(static_cast<improv::Error>(IMPROV_ERROR_INVALID_OP)); + return; + case mm::ImprovOpReassembler::Result::Continue: + break; + case mm::ImprovOpReassembler::Result::Ready: + // release-store pairs with the module's acquire-load before it applies. + g_improv.opReady->store(true, std::memory_order_release); + break; + } + // Ack every chunk (empty RpcResponse for 0xFC) so the installer awaits each. + auto rpc = improv::build_rpc_response( + static_cast<improv::Command>(IMPROV_CMD_APPLY_OP), + std::vector<std::string>{}, false); + improvSend(ImprovFrameType::RpcResponse, rpc); +} + // Dispatch a completed frame from the parser. Only RPC frames carry commands // we care about; the spec lets the other types through silently. static void improvDispatchFrame(const ImprovFrameParser& parser) { @@ -392,47 +487,50 @@ static void improvDispatchFrame(const ImprovFrameParser& parser) { improvHandleSetTxPower(raw, rawLen); return; } + if (rawLen >= 1 && raw[0] == IMPROV_CMD_APPLY_OP) { + improvHandleApplyOp(raw, rawLen); + return; + } improv::ImprovCommand cmd = improv::parse_improv_data( parser.lastPayload(), parser.lastPayloadLen(), false); switch (cmd.command) { - case improv::GET_CURRENT_STATE: - if (wifiStaConnected()) { + case improv::GET_CURRENT_STATE: { + // "Connected" means: on WiFi, the STA has an IP; on Ethernet-only, the eth + // link is up with a DHCP lease. Either way report PROVISIONED + the device + // URL (the way ESPHome does — makes the protocol self-describing on every + // reconnect; observable via improv_probe.py). Not connected → AUTHORIZED. + uint8_t ip[4] = {}; + bool connected = false; +#ifndef MM_NO_WIFI + if (wifiStaConnected()) { wifiStaGetIPv4(ip); connected = true; } + else +#endif + if (ethConnected()) { ethGetIPv4(ip); connected = true; } + if (connected) { improvSendCurrentState(improv::STATE_PROVISIONED); - // Follow up the state frame with the device URL on the - // WIFI_SETTINGS RPC, the way ESPHome does. The state frame - // alone tells a tool the device is on WiFi but doesn't say - // *where*; the URL follow-up makes the protocol self- - // describing on every reconnect (any future Improv client — - // a browser tab post-refresh, a re-run of improv_probe.py, - // another tool — can find the device without re-provisioning). - // ESP Web Tools' current rich-panel "Visit Device" affordance - // is in-session-only, so this doesn't visibly change its UI; - // the value is protocol completeness, observable via - // improv_probe.py. - uint8_t ip[4] = {}; - wifiStaGetIPv4(ip); if (ip[0] || ip[1] || ip[2] || ip[3]) { char url[64]; std::snprintf(url, sizeof(url), "http://%u.%u.%u.%u/", ip[0], ip[1], ip[2], ip[3]); std::vector<std::string> urls = { url }; - auto rpc = improv::build_rpc_response(improv::WIFI_SETTINGS, urls, false); - improvSend(ImprovFrameType::RpcResponse, rpc); + improvSend(ImprovFrameType::RpcResponse, + improv::build_rpc_response(improv::WIFI_SETTINGS, urls, false)); } } else { improvSendCurrentState(improv::STATE_AUTHORIZED); } break; + } case improv::GET_DEVICE_INFO: improvSendDeviceInfo(); break; +#ifndef MM_NO_WIFI case improv::GET_WIFI_NETWORKS: - // Refuse scans while WiFi STA is connected — esp_wifi_scan_start - // puts the radio into scan mode for 2-5 s, dropping inbound ArtNet. - // On large installs (16K+ LEDs) that's a visible glitch. The state - // returned by GET_CURRENT_STATE already tells the browser the device - // is online; a scan adds no new diagnostic value once provisioned. + // Refuse scans while WiFi STA is connected — esp_wifi_scan_start puts the + // radio into scan mode for 2-5 s, dropping inbound ArtNet (a visible glitch + // on a 16K-LED rig). GET_CURRENT_STATE already reports online. if (wifiStaConnected()) improvSendError(improv::ERROR_UNABLE_TO_CONNECT); else improvSendWifiNetworks(); break; case improv::WIFI_SETTINGS: improvHandleProvision(cmd); break; +#endif default: improvSendError(improv::ERROR_UNKNOWN_RPC); break; } } @@ -587,7 +685,9 @@ bool improvProvisioningInit(const ImprovDeviceInfo& info, char* deviceModelOut, size_t deviceModelOutLen, std::atomic<bool>* deviceModelReady, uint8_t* txPowerOut, - std::atomic<bool>* txPowerReady) { + std::atomic<bool>* txPowerReady, + char* opOut, size_t opOutLen, + std::atomic<bool>* opReady) { if (!info.name || !info.chipFamily || !info.firmwareVersion || !ssidOut || ssidOutLen == 0 || !passwordOut || passwordOutLen == 0 || @@ -611,6 +711,10 @@ bool improvProvisioningInit(const ImprovDeviceInfo& info, // SET_TX_POWER opt-in, same shape. g_improv.txPowerOut = txPowerOut; g_improv.txPowerReady = txPowerReady; + // APPLY_OP opt-in, same shape (the op reassembly buffer + ready flag). + g_improv.opOut = opOut; + g_improv.opOutLen = opOutLen; + g_improv.opReady = opReady; // 6 KB stack: parser is small, scan response uses std::vector + std::string // (some short-string-optimised, some heap). Priority 4 — below OTA (5), @@ -624,33 +728,3 @@ bool improvProvisioningInit(const ImprovDeviceInfo& info, } } // namespace mm::platform - -#else // MM_NO_WIFI — Ethernet-only build: no Improv listener. - -#include <atomic> -#include <cstdio> - -namespace mm::platform { - -// Stub for link parity. ImprovProvisioningModule guards the call with -// `if constexpr (hasImprov)`, which evaluates false on MM_NO_WIFI builds — -// so this is never invoked at runtime. Kept as a symbol so the platform.h -// declaration links cleanly on every build profile. -bool improvProvisioningInit(const ImprovDeviceInfo& /*info*/, - char* /*ssidOut*/, size_t /*ssidOutLen*/, - char* /*passwordOut*/, size_t /*passwordOutLen*/, - std::atomic<bool>* /*ready*/, - char* statusBuf, size_t statusBufLen, - char* /*deviceModelOut*/, size_t /*deviceModelOutLen*/, - std::atomic<bool>* /*deviceModelReady*/, - uint8_t* /*txPowerOut*/, - std::atomic<bool>* /*txPowerReady*/) { - if (statusBuf && statusBufLen > 0) { - std::snprintf(statusBuf, statusBufLen, "not supported (no WiFi)"); - } - return false; -} - -} // namespace mm::platform - -#endif // MM_NO_WIFI diff --git a/src/platform/platform.h b/src/platform/platform.h index 705b936..40cc6d9 100644 --- a/src/platform/platform.h +++ b/src/platform/platform.h @@ -258,6 +258,11 @@ struct ImprovDeviceInfo { // catalog cap normally arrives over HTTP *after* the device is online, // which such a board can never reach — proven on the bench 2026-06-10. Same // validate + buffer-write + flag-signal shape as SET_DEVICE_MODEL. +// opOut/opOutLen/opReady carry the APPLY_OP vendor RPC (0xFC) — one REST operation +// as JSON, pushed over serial during provisioning ("Improv = REST over serial"). +// Chunks reassemble into opOut; on the last chunk opReady's release-store publishes +// it and ImprovProvisioningModule applies the op on the main loop. Same buffer + +// flag shape as deviceModel; opt out by leaving null (desktop stub does). bool improvProvisioningInit(const ImprovDeviceInfo& info, char* ssidOut, size_t ssidOutLen, char* passwordOut, size_t passwordOutLen, @@ -266,7 +271,9 @@ bool improvProvisioningInit(const ImprovDeviceInfo& info, char* deviceModelOut = nullptr, size_t deviceModelOutLen = 0, std::atomic<bool>* deviceModelReady = nullptr, uint8_t* txPowerOut = nullptr, - std::atomic<bool>* txPowerReady = nullptr); + std::atomic<bool>* txPowerReady = nullptr, + char* opOut = nullptr, size_t opOutLen = 0, + std::atomic<bool>* opReady = nullptr); class UdpSocket { public: diff --git a/src/ui/app.js b/src/ui/app.js index 1fafbfa..de6ffea 100644 --- a/src/ui/app.js +++ b/src/ui/app.js @@ -146,16 +146,6 @@ async function init() { try { const resp = await fetch("/api/state"); state = await resp.json(); - // Pending-board handoff from the web installer. The installer page - // (docs/install/devices.js → Inject button) opens us with - // `?deviceModel=<name>` when the orchestrator couldn't push the board - // itself (HTTPS Pages → HTTP device blocked by mixed-content, OR - // an Improv-less firmware variant). We consume it once, fetch the - // deviceModels.json entry from Pages, add its modules + set their controls - // via the standard `/api/modules` + `/api/control` writes, then strip the param via - // history.replaceState. Fire-and-forget — the WS state push driven - // by sendControl re-renders the affected fields. - consumePendingDeviceModelParam(); const savedSel = lsRead(LS_SELECTED, "mm.selectedModule", null); if (state.modules && state.modules.length > 0) { const exists = savedSel && state.modules.some(m => m.name === savedSel); @@ -179,11 +169,9 @@ async function init() { } async function sendControl(moduleName, controlName, value) { - // Best-effort by design — failures are not retried here (see - // consumePendingDeviceModelParam's "No retry" contract). The query param is - // single-shot. Non-ok responses + network errors are logged to console - // so a user with devtools open can see what went wrong (e.g. a board- - // injected control value that the device-side validator rejected). + // Best-effort by design — failures are not retried here. Non-ok responses + + // network errors are logged to console so a user with devtools open can see + // what went wrong (e.g. a control value the device-side validator rejected). try { const res = await fetch("/api/control", { method: "POST", @@ -198,159 +186,6 @@ async function sendControl(moduleName, controlName, value) { } } -async function sendAddModule(m) { - // Best-effort add for the board-inject path, same no-retry contract as - // sendControl. Mirrors POST /api/modules {type, id, parent_id}; the - // endpoint is idempotent (an existing id returns 200), so re-running an - // inject re-adds nothing. Distinct from the interactive addModule() helper, - // which refetches state per call (wrong inside a batch loop). Returns true - // on success so the caller can skip a failed module's controls (writing - // them would just 404 against a module that was never created). - try { - const res = await fetch("/api/modules", { - method: "POST", - headers: {"Content-Type": "application/json"}, - body: JSON.stringify({type: m.type, id: m.id, parent_id: m.parent_id}) - }); - if (!res.ok) { - console.warn(`[board-inject] add module ${m.type} failed (status=${res.status})`); - return false; - } - return true; - } catch (e) { - console.warn(`[board-inject] add module ${m.type} failed (error=${e && e.message ? e.message : e})`); - return false; - } -} - -// Public Pages URL of the board catalog. The installer page also serves -// this at `./deviceModels.json` (same-origin from the installer's perspective), -// but the device UI is on a different origin (the device's HTTP server), -// so we fetch the canonical Pages copy. CORS: GitHub Pages static assets -// ship `Access-Control-Allow-Origin: *`, so a cross-origin fetch from -// http://<device>/ to https://moonmodules.org succeeds without proxying. -// Hardcoded rather than configurable: the catalog is project-global, not -// per-installation. During local development, flip this to -// `http://localhost:8000/deviceModels.json` and re-flash; preview_installer.py -// sends the same `Access-Control-Allow-Origin: *` header production does. -const DEVICE_MODELS_JSON_URL = "https://moonmodules.org/projectMM/install/deviceModels.json"; - -// Consume an installer-emitted `?deviceModel=<name>` query param: look the name -// up in deviceModels.json on Pages, then for each module in the entry add it -// (`/api/modules`) and set its nested controls (`/api/control`) so each -// module's validation runs. -// Strip the query param BEFORE the network round-trip so a mid-fetch -// refresh doesn't double-push. No retry — failures (network, rejection) -// leave the device unchanged; user re-runs the Inject button or sets the -// fields manually via MoonDeck. -// -// See docs/moonmodules/core/SystemModule.md (the deviceModel control) for the -// deviceModels.json schema and the full handoff sequence. -async function consumePendingDeviceModelParam() { - const params = new URLSearchParams(location.search); - const pendingDeviceModel = params.get("deviceModel"); - if (!pendingDeviceModel) return; - // Strip the param up-front so a refresh during the async fetch doesn't - // re-trigger the injection. - params.delete("deviceModel"); - const qs = params.toString(); - history.replaceState({}, "", - location.pathname + (qs ? "?" + qs : "") + location.hash); - - let entry; - try { - const res = await fetch(DEVICE_MODELS_JSON_URL); - if (!res.ok) throw new Error(`deviceModels.json HTTP ${res.status}`); - const catalog = await res.json(); - entry = Array.isArray(catalog) - ? catalog.find(b => b && b.name === pendingDeviceModel) - : null; - } catch (e) { - // Network down, Pages outage, or deviceModels.json missing the entry. - // Surface in the console so a user with devtools open can see why - // injection didn't take; don't show a modal since the rest of the - // UI is operational and most users just want to use the device. - console.warn("[board-inject] failed to fetch deviceModels.json:", e); - return; - } - if (!entry) { - console.warn(`[board-inject] no catalog entry for deviceModel "${pendingDeviceModel}"`); - return; - } - // Each entry is a list of module-with-controls units: - // entry.modules = [ { type, id, parent_id?, controls?, replaceChildren? }, ... ] - // Per module: add it first (when it has a parent_id — a fresh flash has no - // user-added modules like AudioModule, so a control write would 404), then - // set its controls. A module WITHOUT parent_id is a boot-wired/top-level one - // (System, Network, Drivers) that already exists — skip the add, just set - // controls. The add is idempotent (an existing id returns 200). If an add - // fails, skip that module's controls (writing them would 404) but keep going — - // a single failed module shouldn't abort the whole inject (best-effort - // contract; sendControl never aborts either). Sequential so each - // FilesystemModule debounced-save sees the full set. - // - // replaceChildren: a container unit (Layer, Layouts, Drivers) sets it to REPLACE - // the boot-wired defaults rather than add alongside them. The catalog inject is - // otherwise add-only, but a Layer only renders its FIRST enabled effect/modifier — - // so to make a device's catalog effects actually show (e.g. the testbench's - // AudioSpectrum instead of the default Noise) the container's existing children - // must be cleared first. We do that here by DELETE-ing each current child of the - // named container before its catalog children are added (same enumerate-then-delete - // the live scenario runner's clear_children uses). Done once per replaceChildren - // container, up front, so the subsequent add units land in an empty container. - // Defensive: a malformed catalog entry could have `modules` as a non-array (or - // absent) — normalise to [] so the loops below can't throw on bad JSON. - const modules = Array.isArray(entry.modules) ? entry.modules : []; - for (const c of modules) { - if (c && c.replaceChildren && c.id) await clearModuleChildren(c.id); - } - for (const m of modules) { - if (!m || typeof m !== "object") continue; - if (m.parent_id && m.type) { - if (!(await sendAddModule(m))) { - console.warn(`[board-inject] skipping controls for ${m.id || m.type} — its add failed`); - continue; - } - } - if (m.controls && typeof m.controls === "object") { - for (const [controlName, value] of Object.entries(m.controls)) { - await sendControl(m.id, controlName, value); - } - } - } -} - -// DELETE every current child of the module named `parentName` — used by the catalog -// inject's replaceChildren so an entry's effects/modifiers replace the boot defaults -// instead of stacking behind them. Best-effort (mirrors the inject's no-retry -// contract): fetch the live tree, find the container, DELETE each child by name. -// Only deletable (user-editable) children go; the device rejects a code-wired -// child's DELETE, which is fine — those aren't what a catalog entry replaces. -async function clearModuleChildren(parentName) { - let tree; - try { - const res = await fetch("/api/state"); - if (!res.ok) return; - tree = await res.json(); - } catch (_) { return; } - const findByName = (mods, name) => { - if (!Array.isArray(mods)) return null; // malformed payload — no children here - for (const m of mods) { - if (m && m.name === name) return m; - const hit = findByName(m && m.children, name); - if (hit) return hit; - } - return null; - }; - const parent = findByName(tree.modules, parentName); - const children = parent && Array.isArray(parent.children) ? parent.children : []; - for (const child of children) { - try { - await fetch("/api/modules/" + encodeURIComponent(child.name), { method: "DELETE" }); - } catch (_) { /* best-effort; keep clearing the rest */ } - } -} - async function refetchState() { try { const r = await fetch("/api/state"); @@ -1334,7 +1169,6 @@ function createControl(moduleName, moduleType, ctrl) { input.value = ctrl.value ?? ""; input.placeholder = "0.0.0.0"; input.maxLength = 15; // "255.255.255.255" = 15 - input.size = 15; input.addEventListener("input", () => { dragTs[key] = Date.now(); debounceSend(key, 500, () => sendControl(moduleName, ctrl.name, input.value)); diff --git a/src/ui/style.css b/src/ui/style.css index 64e1691..689a7ec 100644 --- a/src/ui/style.css +++ b/src/ui/style.css @@ -666,6 +666,7 @@ body { color: var(--fg-muted); width: 24px; height: 24px; + flex-shrink: 0; /* never let a wide input (e.g. an ipv4 field) squeeze the reset ↺ off the row */ border-radius: 4px; cursor: pointer; font-size: 14px; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1996f3e..9cd2ca0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -10,7 +10,9 @@ add_executable(mm_tests unit/core/unit_DeviceIdentify.cpp unit/core/unit_DevicesModule_ageout.cpp unit/core/unit_FilesystemModule_persistence.cpp + unit/core/unit_HttpServerModule_apply.cpp unit/core/unit_ImprovFrame.cpp + unit/core/unit_ImprovOpReassembler.cpp unit/core/unit_JsonUtil_parse.cpp unit/core/unit_MappingLUT.cpp unit/core/unit_ModuleFactory.cpp diff --git a/test/js/improv-frame.test.mjs b/test/js/improv-frame.test.mjs new file mode 100644 index 0000000..5444094 --- /dev/null +++ b/test/js/improv-frame.test.mjs @@ -0,0 +1,82 @@ +// Improv frame-contract tests — pin the wire format the device C++ +// (src/core/ImprovFrame.h), Python (scripts/build/improv_provision.py), and the +// installer JS (docs/install/improv-frame.js) must all agree on byte-for-byte. +// The golden vectors here are asserted identically in test/python/test_improv_frame.py +// so the JS and Python builders can't drift; they're hand-verified against the C++ +// checksum (sum-mod-256) too. Run: `node --test test/js`. + +import { test } from "node:test"; +import assert from "node:assert/strict"; +import { + buildImprovFrame, + encodeApplyOpFrames, + APPLY_OP_CHUNK_MAX, + IMPROV_CMD_APPLY_OP, + IMPROV_FRAME_TYPE_RPC, + IMPROV_MAGIC, +} from "../../docs/install/improv-frame.js"; + +const hex = (u8) => Array.from(u8).map((b) => b.toString(16).padStart(2, "0")).join(" "); +const bytes = (s) => Array.from(new TextEncoder().encode(s)); + +test("frame layout: magic, version, type, length, payload, checksum", () => { + const frame = buildImprovFrame(IMPROV_FRAME_TYPE_RPC, new Uint8Array([0x01])); + assert.deepEqual(Array.from(frame.subarray(0, 6)), IMPROV_MAGIC, "magic"); + assert.equal(frame[6], 0x01, "version"); + assert.equal(frame[7], IMPROV_FRAME_TYPE_RPC, "type"); + assert.equal(frame[8], 1, "length"); + assert.equal(frame[9], 0x01, "payload"); + assert.equal(frame.length, 11, "total = 9 header + 1 payload + 1 checksum"); +}); + +test("checksum is sum-mod-256 of the first 9+length bytes", () => { + const payload = new Uint8Array([0xAA, 0xBB, 0xCC]); + const frame = buildImprovFrame(IMPROV_FRAME_TYPE_RPC, payload); + let sum = 0; + for (let i = 0; i < frame.length - 1; i++) sum = (sum + frame[i]) & 0xff; + assert.equal(frame[frame.length - 1], sum); +}); + +test("golden vector G1: buildImprovFrame(RPC, [0x01])", () => { + // Shared with test/python G1. Hand-verified checksum 0xe3. + const frame = buildImprovFrame(IMPROV_FRAME_TYPE_RPC, new Uint8Array([0x01])); + assert.equal(hex(frame), "49 4d 50 52 4f 56 01 03 01 01 e3"); +}); + +test("golden vector G2: a small APPLY_OP set op is a single frame", () => { + const op = { op: "set", module: "Grid", control: "width", value: 8 }; + const frames = encodeApplyOpFrames(op); + assert.equal(frames.length, 1, "fits one frame"); + const f = frames[0]; + assert.equal(f[7], IMPROV_FRAME_TYPE_RPC); + assert.equal(f[9 + 0], IMPROV_CMD_APPLY_OP, "payload[0] = 0xFC"); + assert.equal(f[9 + 1], 0, "seq"); + assert.equal(f[9 + 2], 1, "last"); + // payload after the 3-byte header is the op JSON, byte-identical + assert.deepEqual(Array.from(f.subarray(9 + 3, 9 + f[8])), bytes(JSON.stringify(op))); +}); + +test("golden vector G3: a >125-byte op chunks into ordered frames", () => { + const op = { op: "set", module: "X", control: "pins", value: "1".repeat(140) }; + const json = JSON.stringify(op); + assert.ok(new TextEncoder().encode(json).length > APPLY_OP_CHUNK_MAX, "forces >1 chunk"); + const frames = encodeApplyOpFrames(op); + assert.equal(frames.length, 2); + // frame 0: seq 0, last 0, full chunk + assert.equal(frames[0][9 + 1], 0, "f0 seq"); + assert.equal(frames[0][9 + 2], 0, "f0 not-last"); + assert.equal(frames[0][8] - 3, APPLY_OP_CHUNK_MAX, "f0 carries a full chunk"); + // frame 1: seq 1, last 1, remainder + assert.equal(frames[1][9 + 1], 1, "f1 seq"); + assert.equal(frames[1][9 + 2], 1, "f1 last"); + // reassembling the chunks reproduces the op JSON exactly + let reassembled = []; + for (const f of frames) reassembled.push(...f.subarray(9 + 3, 9 + f[8])); + assert.deepEqual(reassembled, bytes(json)); +}); + +test("APPLY_OP always emits at least one frame (so `last` always sends)", () => { + const frames = encodeApplyOpFrames({}); + assert.equal(frames.length, 1); + assert.equal(frames[0][9 + 2], 1, "last=1 on the lone frame"); +}); diff --git a/test/python/test_improv_frame.py b/test/python/test_improv_frame.py new file mode 100644 index 0000000..d84c6fc --- /dev/null +++ b/test/python/test_improv_frame.py @@ -0,0 +1,54 @@ +# /// script +# dependencies = ["pytest", "pyserial"] +# /// +"""Improv frame-contract tests (Python side). + +Pins the wire format the device C++ (src/core/ImprovFrame.h), the installer JS +(docs/install/improv-frame.js), and this Python builder (scripts/build/improv_provision.py) +must all agree on byte-for-byte. The G1 golden vector below is the SAME one asserted +in test/js/improv-frame.test.mjs, so the JS and Python envelope builders can't drift; +it's hand-verified against the C++ sum-mod-256 checksum too. + +pyserial is an inline dep only because improv_provision.py's `import serial` guard +sys.exit()s when it's missing — the frame functions themselves need nothing. Run: +`uv run pytest test/python` (uv honours the inline deps above). + +APPLY_OP chunking is JS+device-C++ only (the Python provisioning script does WIFI_SETTINGS, +not config push), so that layer is pinned in the JS test; here we pin the shared envelope. +""" + +import sys +from pathlib import Path + +# improv_provision.py lives in scripts/build and imports a sibling (host_wifi). +sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "scripts" / "build")) + +import improv_provision as ip # noqa: E402 + + +def test_checksum_is_sum_mod_256(): + assert ip.checksum(b"abc") == sum(b"abc") & 0xFF + assert ip.checksum(b"") == 0 + assert ip.checksum(bytes([0xFF, 0xFF])) == 0xFE # wraps mod 256 + + +def test_frame_layout(): + frame = ip.build_frame(0x03, bytes([0x01])) + assert frame[0:6] == b"IMPROV", "magic" + assert frame[6] == 0x01, "version" + assert frame[7] == 0x03, "type" + assert frame[8] == 1, "length" + assert frame[9] == 0x01, "payload" + assert len(frame) == 11, "9 header + 1 payload + 1 checksum" + + +def test_golden_vector_g1(): + # Shared with test/js G1. Hand-verified checksum 0xe3. + frame = ip.build_frame(0x03, bytes([0x01])) + assert frame.hex(" ") == "49 4d 50 52 4f 56 01 03 01 01 e3" + + +def test_checksum_covers_header_through_payload(): + payload = bytes([0xAA, 0xBB, 0xCC]) + frame = ip.build_frame(0x03, payload) + assert frame[-1] == sum(frame[:-1]) & 0xFF diff --git a/test/scenarios/light/scenario_Driver_mutation.json b/test/scenarios/light/scenario_Driver_mutation.json index 44a2791..c1514e0 100644 --- a/test/scenarios/light/scenario_Driver_mutation.json +++ b/test/scenarios/light/scenario_Driver_mutation.json @@ -192,7 +192,7 @@ "pc-macos": { "tick_us": [ 8, - 10 + 11 ], "free_heap": [ 0, @@ -204,7 +204,7 @@ ], "at": [ "2026-06-13", - "2026-06-16" + "2026-06-21" ] } } diff --git a/test/scenarios/light/scenario_modifier_swap.json b/test/scenarios/light/scenario_modifier_swap.json index d94752c..f2d1ba1 100644 --- a/test/scenarios/light/scenario_modifier_swap.json +++ b/test/scenarios/light/scenario_modifier_swap.json @@ -152,7 +152,7 @@ "pc-macos": { "tick_us": [ 6, - 13 + 20 ], "free_heap": [ 0, @@ -164,7 +164,7 @@ ], "at": [ "2026-06-07", - "2026-06-11" + "2026-06-21" ] }, "esp32-eth": { diff --git a/test/scenarios/light/scenario_perf_full.json b/test/scenarios/light/scenario_perf_full.json index f57bbef..48467ff 100644 --- a/test/scenarios/light/scenario_perf_full.json +++ b/test/scenarios/light/scenario_perf_full.json @@ -1124,7 +1124,7 @@ "pc-macos": { "tick_us": [ 4, - 4 + 5 ], "free_heap": [ 0, @@ -1136,7 +1136,7 @@ ], "at": [ "2026-06-17", - "2026-06-17" + "2026-06-21" ] }, "esp32s3-n16r8": { @@ -1217,7 +1217,7 @@ "pc-macos": { "tick_us": [ 16, - 17 + 41 ], "free_heap": [ 0, @@ -1229,7 +1229,7 @@ ], "at": [ "2026-06-17", - "2026-06-17" + "2026-06-21" ] }, "esp32s3-n16r8": { @@ -1411,7 +1411,7 @@ "pc-macos": { "tick_us": [ 14, - 16 + 59 ], "free_heap": [ 0, @@ -1423,7 +1423,7 @@ ], "at": [ "2026-06-17", - "2026-06-17" + "2026-06-21" ] }, "esp32s3-n16r8": { @@ -1504,7 +1504,7 @@ "pc-macos": { "tick_us": [ 62, - 71 + 342 ], "free_heap": [ 0, @@ -1516,7 +1516,7 @@ ], "at": [ "2026-06-17", - "2026-06-19" + "2026-06-21" ] }, "esp32s3-n16r8": { @@ -1597,7 +1597,7 @@ "pc-macos": { "tick_us": [ 308, - 343 + 420 ], "free_heap": [ 0, @@ -1609,7 +1609,7 @@ ], "at": [ "2026-06-17", - "2026-06-19" + "2026-06-21" ] }, "esp32s3-n16r8": { @@ -1994,7 +1994,7 @@ }, "pc-macos": { "tick_us": [ - 63, + 62, 70 ], "free_heap": [ @@ -2007,7 +2007,7 @@ ], "at": [ "2026-06-17", - "2026-06-18" + "2026-06-21" ] }, "esp32": { diff --git a/test/scenarios/light/scenario_perf_light.json b/test/scenarios/light/scenario_perf_light.json index 28d1ef0..cb2e3fc 100644 --- a/test/scenarios/light/scenario_perf_light.json +++ b/test/scenarios/light/scenario_perf_light.json @@ -543,7 +543,7 @@ "pc-macos": { "tick_us": [ 14, - 16 + 18 ], "free_heap": [ 0, @@ -555,7 +555,7 @@ ], "at": [ "2026-06-17", - "2026-06-17" + "2026-06-21" ] }, "esp32s3-n16r8": { diff --git a/test/unit/core/unit_HttpServerModule_apply.cpp b/test/unit/core/unit_HttpServerModule_apply.cpp new file mode 100644 index 0000000..2997f43 --- /dev/null +++ b/test/unit/core/unit_HttpServerModule_apply.cpp @@ -0,0 +1,165 @@ +// @module HttpServerModule + +#include "doctest.h" +#include "core/HttpServerModule.h" +#include "core/Scheduler.h" +#include "core/ModuleFactory.h" +#include "core/MoonModule.h" + +#include <cstring> + +// Pins the transport-free apply-core that HttpServerModule exposes — applyAddModule +// / applySetControl / applyClearChildren / applyOp. These are the operations the +// HTTP /api/modules + /api/control handlers do, factored out of the TcpConnection so +// BOTH the HTTP path and the Improv-serial APPLY_OP path drive one shared +// implementation ("Improv = REST over serial"). Testing them directly here, without +// a socket, is the unit-test win of the extraction: the apply logic is now provable +// in isolation. Also exercises the robustness rule (the apply-core tolerates bad +// input — unknown module, unknown type, malformed op — without crashing, returning a +// typed result instead). + +namespace { + +// A leaf with one editable Uint8 control + a child-accepting container, so we can +// add, set, and clear-children without pulling in real light modules. +struct Knob : public mm::MoonModule { + uint8_t value = 10; + void onBuildControls() override { controls_.addUint8("value", value, 0, 100); } +}; +struct Box : public mm::MoonModule { + // accepts any child (the HTTP role gate lives above the apply-core). +}; + +// Build a tree: scheduler root "Root" (a Box) with HttpServerModule wired to it. +// Returns via out-params so each case starts clean. Caller owns teardown via the +// scheduler. +void registerTestTypes() { + static bool done = false; + if (done) return; + mm::ModuleFactory::registerType<Knob>("Knob"); + mm::ModuleFactory::registerType<Box>("Box"); + done = true; +} + +// Find a direct child of `parent` by name (the test inspects the tree directly +// rather than through HttpServerModule's private findModuleByName). +mm::MoonModule* childNamed(mm::MoonModule* parent, const char* name) { + for (uint8_t i = 0; i < parent->childCount(); i++) { + auto* c = parent->child(i); + if (c && std::strcmp(c->name(), name) == 0) return c; + } + return nullptr; +} + +} // namespace + +TEST_CASE("apply-core: applyAddModule adds a child, idempotent on the id") { + registerTestTypes(); + mm::Scheduler s; + auto* root = new Box(); + root->setName("Root"); + s.addModule(root); + mm::HttpServerModule http; + http.setScheduler(&s); + + using OpResult = mm::HttpServerModule::OpResult; + + // Add a Knob named "K" under "Root". + CHECK(http.applyAddModule("Knob", "K", "Root") == OpResult::Ok); + CHECK(childNamed(root, "K") != nullptr); + + // Idempotent: re-adding the same id is AlreadyExists (no duplicate) — a distinct + // success the HTTP handler reports as {"ok":true,"note":"already exists"}. + CHECK(http.applyAddModule("Knob", "K", "Root") == OpResult::AlreadyExists); + CHECK(root->childCount() == 1); + + // Unknown type / missing parent / top-level add are typed failures, not crashes. + CHECK(http.applyAddModule("NopeType", "X", "Root") == OpResult::UnknownType); + CHECK(http.applyAddModule("Knob", "Y", "NoSuchParent") == OpResult::ModuleNotFound); + CHECK(http.applyAddModule("Knob", "Z", "") == OpResult::BadRequest); // no parent → top-level + + s.deleteTree(root); +} + +TEST_CASE("apply-core: applySetControl writes a value, rejects out-of-range / unknown") { + registerTestTypes(); + mm::Scheduler s; + auto* root = new Box(); + root->setName("Root"); + s.addModule(root); + mm::HttpServerModule http; + http.setScheduler(&s); + using OpResult = mm::HttpServerModule::OpResult; + + REQUIRE(http.applyAddModule("Knob", "K", "Root") == OpResult::Ok); + + // The value JSON is the same {"value":N} body the HTTP handler reads by key. + CHECK(http.applySetControl("K", "value", "{\"value\":42}") == OpResult::Ok); + auto* k = static_cast<Knob*>(childNamed(root, "K")); + REQUIRE(k != nullptr); + CHECK(k->value == 42); + + // Out of the 0..100 range → typed rejection, value left unchanged. + CHECK(http.applySetControl("K", "value", "{\"value\":999}") == OpResult::OutOfRange); + CHECK(k->value == 42); + + // Unknown module vs unknown control → distinct typed failures (each a 404 with + // its own body on the HTTP path), no crash. + CHECK(http.applySetControl("Nope", "value", "{\"value\":1}") == OpResult::ModuleNotFound); + CHECK(http.applySetControl("K", "nope", "{\"value\":1}") == OpResult::ControlNotFound); + + s.deleteTree(root); +} + +TEST_CASE("apply-core: applyClearChildren empties a container (replaceChildren)") { + registerTestTypes(); + mm::Scheduler s; + auto* root = new Box(); + root->setName("Root"); + s.addModule(root); + mm::HttpServerModule http; + http.setScheduler(&s); + using OpResult = mm::HttpServerModule::OpResult; + + REQUIRE(http.applyAddModule("Knob", "A", "Root") == OpResult::Ok); + REQUIRE(http.applyAddModule("Knob", "B", "Root") == OpResult::Ok); + CHECK(root->childCount() == 2); + + CHECK(http.applyClearChildren("Root") == OpResult::Ok); + CHECK(root->childCount() == 0); + + // Clearing a non-existent parent is ModuleNotFound, not a crash. Clearing an + // already-empty container is Ok. + CHECK(http.applyClearChildren("Nope") == OpResult::ModuleNotFound); + CHECK(http.applyClearChildren("Root") == OpResult::Ok); + + s.deleteTree(root); +} + +TEST_CASE("apply-core: applyOp dispatches each op type and tolerates bad input") { + registerTestTypes(); + mm::Scheduler s; + auto* root = new Box(); + root->setName("Root"); + s.addModule(root); + mm::HttpServerModule http; + http.setScheduler(&s); + using OpResult = mm::HttpServerModule::OpResult; + + // The op JSON shapes are exactly what the installer pushes over APPLY_OP. + CHECK(http.applyOp("{\"op\":\"add\",\"type\":\"Knob\",\"id\":\"K\",\"parent\":\"Root\"}") == OpResult::Ok); + CHECK(childNamed(root, "K") != nullptr); + + CHECK(http.applyOp("{\"op\":\"set\",\"module\":\"K\",\"control\":\"value\",\"value\":7}") == OpResult::Ok); + CHECK(static_cast<Knob*>(childNamed(root, "K"))->value == 7); + + CHECK(http.applyOp("{\"op\":\"clearChildren\",\"parent\":\"Root\"}") == OpResult::Ok); + CHECK(root->childCount() == 0); + + // Unknown op verb and a malformed (no "op") object are BadRequest, not crashes — + // the robustness rule: any pushed bytes are tolerated. + CHECK(http.applyOp("{\"op\":\"frobnicate\"}") == OpResult::BadRequest); + CHECK(http.applyOp("{\"nope\":1}") == OpResult::BadRequest); + + s.deleteTree(root); +} diff --git a/test/unit/core/unit_ImprovOpReassembler.cpp b/test/unit/core/unit_ImprovOpReassembler.cpp new file mode 100644 index 0000000..9fdb408 --- /dev/null +++ b/test/unit/core/unit_ImprovOpReassembler.cpp @@ -0,0 +1,120 @@ +// @module ImprovOpReassembler + +// Unit tests for src/core/ImprovOpReassembler.h — the chunk-reassembly + sequence +// guard behind the device's APPLY_OP (0xFC) handler ("Improv = REST over serial"). +// The ESP32 handler (platform_esp32_improv.cpp::improvHandleApplyOp) owns the serial +// I/O and hands each [seq][last][bytes] chunk here; isolating the state machine lets +// us prove every path — in-order multi-chunk, duplicate, out-of-order, overflow, +// recovery — without an MCU + serial cable. This is the heart of config-push: a bug +// here silently misconfigures a freshly-flashed device. + +#include "doctest.h" +#include "core/ImprovOpReassembler.h" + +#include <cstring> +#include <string> + +using namespace mm; +using R = ImprovOpReassembler::Result; + +// Feed a chunk from a string; chunk index + last flag explicit so a test reads like +// the wire frames it models. +static R feedStr(ImprovOpReassembler& r, uint8_t seq, bool last, const std::string& s) { + return r.feed(seq, last, reinterpret_cast<const uint8_t*>(s.data()), s.size()); +} + +TEST_CASE("a single-frame op (seq 0, last 1) is Ready with the exact bytes") { + char buf[128]; + ImprovOpReassembler r(buf, sizeof(buf)); + const std::string op = R"({"op":"set","module":"Grid","control":"width","value":8})"; + CHECK(feedStr(r, 0, true, op) == R::Ready); + CHECK(r.len() == op.size()); + CHECK(std::string(r.out()) == op); // NUL-terminated, byte-identical +} + +TEST_CASE("a multi-chunk op reassembles in order and NUL-terminates") { + char buf[128]; + ImprovOpReassembler r(buf, sizeof(buf)); + CHECK(feedStr(r, 0, false, "{\"op\":\"set\",") == R::Continue); + CHECK(feedStr(r, 1, false, "\"module\":\"X\",") == R::Continue); + CHECK(feedStr(r, 2, true, "\"value\":1}") == R::Ready); + CHECK(std::string(r.out()) == "{\"op\":\"set\",\"module\":\"X\",\"value\":1}"); +} + +TEST_CASE("a duplicate chunk is rejected and resets the buffer") { + char buf[128]; + ImprovOpReassembler r(buf, sizeof(buf)); + CHECK(feedStr(r, 0, false, "AAA") == R::Continue); + CHECK(feedStr(r, 1, false, "BBB") == R::Continue); + // The installer re-sends seq 1 (a misread-timeout retry): out of sequence → Error. + CHECK(feedStr(r, 1, false, "BBB") == R::Error); + // Buffer reset: a stale partial can't leak into the next op. A fresh op (seq 0) + // recovers cleanly. + CHECK(feedStr(r, 0, true, "{\"ok\":1}") == R::Ready); + CHECK(std::string(r.out()) == "{\"ok\":1}"); +} + +TEST_CASE("an out-of-order chunk (skipped seq) is rejected") { + char buf[128]; + ImprovOpReassembler r(buf, sizeof(buf)); + CHECK(feedStr(r, 0, false, "AAA") == R::Continue); + // seq jumps 0 -> 2 (seq 1 lost): the guard rejects rather than splice a hole. + CHECK(feedStr(r, 2, true, "CCC") == R::Error); +} + +TEST_CASE("a non-zero opening seq (no fresh start) is rejected") { + char buf[128]; + ImprovOpReassembler r(buf, sizeof(buf)); + // First chunk seen is seq 1 (we missed seq 0): only seq 0 may start an op. + CHECK(feedStr(r, 1, true, "X") == R::Error); + // seq 0 then starts cleanly. + CHECK(feedStr(r, 0, true, "Y") == R::Ready); + CHECK(std::string(r.out()) == "Y"); +} + +TEST_CASE("overflow past the buffer (minus the NUL) is rejected, not truncated") { + char buf[8]; // 7 usable bytes + 1 reserved for NUL + ImprovOpReassembler r(buf, sizeof(buf)); + CHECK(feedStr(r, 0, false, "ABCD") == R::Continue); // 4 bytes + // 4 + 4 = 8 >= cap(8): would leave no room for the NUL → Error + reset. + CHECK(feedStr(r, 1, true, "EFGH") == R::Error); + // A small op fits and works after the overflow reset. + CHECK(feedStr(r, 0, true, "{}") == R::Ready); + CHECK(std::string(r.out()) == "{}"); +} + +TEST_CASE("exactly buffer-minus-one bytes fits (boundary)") { + char buf[8]; // 7 usable + ImprovOpReassembler r(buf, sizeof(buf)); + CHECK(feedStr(r, 0, true, "1234567") == R::Ready); // 7 bytes + NUL = 8, exactly fits + CHECK(r.len() == 7); + CHECK(std::string(r.out()) == "1234567"); +} + +TEST_CASE("seq 0 mid-stream abandons a partial op and starts fresh") { + char buf[128]; + ImprovOpReassembler r(buf, sizeof(buf)); + CHECK(feedStr(r, 0, false, "partial-") == R::Continue); + // A new op begins (seq 0) before the previous finished — the old partial is dropped, + // not concatenated. (Models the installer moving to the next op after an error.) + CHECK(feedStr(r, 0, true, "{\"fresh\":1}") == R::Ready); + CHECK(std::string(r.out()) == "{\"fresh\":1}"); +} + +TEST_CASE("an empty final chunk still completes (last with zero bytes)") { + char buf[16]; + ImprovOpReassembler r(buf, sizeof(buf)); + CHECK(feedStr(r, 0, false, "{}") == R::Continue); + CHECK(r.feed(1, true, nullptr, 0) == R::Ready); // trailing empty last frame + CHECK(std::string(r.out()) == "{}"); +} + +TEST_CASE("reset() drops a partial op") { + char buf[16]; + ImprovOpReassembler r(buf, sizeof(buf)); + CHECK(feedStr(r, 0, false, "abc") == R::Continue); + r.reset(); + // After reset, a chunk with seq 1 is out of order (we're back to awaiting seq 0). + CHECK(feedStr(r, 1, true, "x") == R::Error); + CHECK(feedStr(r, 0, true, "ok") == R::Ready); +}