From b3f895375e73a3f718bed19f3bf51bb2dd9e13a4 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Sat, 6 Jun 2026 19:23:14 -0400 Subject: [PATCH 1/2] Recommendations: per-database RCSI consent cards (reader/writer-gated) Surface per-database RCSI (enable READ_COMMITTED_SNAPSHOT) recommendation cards, each Apply-able behind the EXISTING two-sided informed-consent gate. Reuses the proven RcsiHandler + SetDatabaseOptionAsync + FactRiskDisclosure unchanged; the new code only carries data + fans cards. - RemediationAction gains RcsiTarget (db + RcsiInactionFigures) carried on the DB_CONFIG action purely for the reader; never executed from there (DbConfigHandler ignores it). FactRemediation.CollectRcsiTargets collects all qualifying rcsi-off dbs; BuildAction populates RcsiTargets; BuildRcsiAction builds from the same list. - Reader fans each RcsiTarget into a per-db RCSI card (Setting=Rcsi) whose Apply reconstructs a FactKey="RCSI" action with the db's RcsiFigures -> dispatches to RcsiHandler (IsDestructive) + the two-sided consent gate, rendering REAL figures. - AlertContext DTO round-trips RcsiTargets. - CONTENTION GATE: RCSI is recommended only where there is meaningful READER/WRITER blocking (rcsi_reader_writer_pct >= FactRiskDisclosure.ReaderWriterMeaningfulPct). Writer/writer-dominant blocking (X/IX/U) and raw deadlock counts do NOT trigger it -- RCSI does not relieve those. The reader/writer share is classified from the blocked-process report's lock modes. Tests: 351 Dashboard.Tests pass (fan-out, DTO round-trip, contention gate incl. writer/writer-dominant excluded, affordance). Security-reviewed clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- Dashboard.Tests/RecommendationDeduperTests.cs | 99 +++++++++++++++++++ .../RecommendationsViewModelTests.cs | 24 +++++ .../RemediationApplyServiceTests.cs | 33 +++++++ Dashboard.Tests/RemediationTests.cs | 73 ++++++++++++-- .../Recommendations/RecommendationsReader.cs | 45 ++++++++- .../FactRemediation.cs | 96 ++++++++++++++---- .../RemediationAction.cs | 23 ++++- .../AlertContext.cs | 47 ++++++++- 8 files changed, 404 insertions(+), 36 deletions(-) diff --git a/Dashboard.Tests/RecommendationDeduperTests.cs b/Dashboard.Tests/RecommendationDeduperTests.cs index a3e520d5..1573f7fd 100644 --- a/Dashboard.Tests/RecommendationDeduperTests.cs +++ b/Dashboard.Tests/RecommendationDeduperTests.cs @@ -523,6 +523,105 @@ public void MapEngineFindings_NonDbConfig_ReturnsSingleItemUnchanged() Assert.Same(action, item.Remediation); // same single action, NOT sliced } + // ---- per-db RCSI fan-out of DB_CONFIG findings (destructive, consent-gated) ---- + + [Fact] + public void MapEngineFindings_DbConfig_RcsiTargets_FanToPerDbRcsiCardsWithRcsiAction() + { + // A DB_CONFIG finding whose action carries TWO per-db RCSI targets fans to TWO RCSI + // cards — each a distinct FactKey="RCSI" action (dispatches to RcsiHandler + the two- + // sided consent gate) carrying THAT db's real inaction figures. + var action = new RemediationAction( + "DB_CONFIG", "set", Array.Empty(), + DbConfigTargets: Array.Empty(), + RcsiTargets: new[] + { + new RcsiTarget("Sales", new RcsiInactionFigures(40, 2, 85)), + new RcsiTarget("Orders", new RcsiInactionFigures(12, 0, 30)) + }); + + var finding = new AnalysisFinding + { + ServerId = 1, + ServerName = "S", + DatabaseName = null, // DB_CONFIG is SERVER-scoped + Severity = 0.3, + Category = "database_config", + RootFactKey = "DB_CONFIG", + StoryPathHash = "h", + StoryPath = "p", + Remediation = action + }; + + var items = RecommendationsReader.MapEngineFindings(finding); + + Assert.Equal(2, items.Count); + Assert.All(items, i => + { + Assert.Equal(RecommendationSource.Engine, i.Source); + Assert.Equal(RecommendationSetting.Rcsi, i.Setting); + Assert.NotNull(i.Remediation); + Assert.Equal("RCSI", i.Remediation!.FactKey); // dispatches to RcsiHandler + Assert.NotNull(i.Remediation.RcsiFigures); // real figures for the consent dialog + Assert.Contains("READ_COMMITTED_SNAPSHOT ON", i.CopyPasteSql); + Assert.Contains(i.Database!, i.Title); // Title names the db + // The reconstructed action's single target is THIS db with the RCSI setting. + var target = Assert.Single(i.Remediation.DbConfigTargets!); + Assert.Equal(i.Database, target.Database); + Assert.Equal(DbConfigSetting.ReadCommittedSnapshotOn, target.Setting); + Assert.Equal("h", i.StoryPathHash); + }); + + var sales = Assert.Single(items, i => i.Database == "Sales"); + Assert.Equal(40, sales.Remediation!.RcsiFigures!.BlockingEvents); + Assert.Equal(2, sales.Remediation.RcsiFigures.Deadlocks); + Assert.Equal(85, sales.Remediation.RcsiFigures.ReaderWriterPct); + + var orders = Assert.Single(items, i => i.Database == "Orders"); + Assert.Equal(12, orders.Remediation!.RcsiFigures!.BlockingEvents); + Assert.Equal(0, orders.Remediation.RcsiFigures.Deadlocks); + Assert.Equal(30, orders.Remediation.RcsiFigures.ReaderWriterPct); + } + + [Fact] + public void MapEngineFindings_DbConfig_SafeAndRcsiTargets_ProduceBothSetsOfCards() + { + // One DB_CONFIG finding carrying BOTH a safe AUTO_SHRINK target and an RCSI target must + // produce a safe-setting card AND an RCSI card (the two fan-outs are independent). + var action = new RemediationAction( + "DB_CONFIG", "set", Array.Empty(), + DbConfigTargets: new[] { new DbConfigTarget("Sales", DbConfigSetting.AutoShrinkOff, "ON") }, + RcsiTargets: new[] { new RcsiTarget("Orders", new RcsiInactionFigures(12, 3, 80)) }); + + var finding = new AnalysisFinding + { + ServerId = 1, + ServerName = "S", + DatabaseName = null, + Severity = 0.3, + Category = "database_config", + RootFactKey = "DB_CONFIG", + StoryPathHash = "h", + StoryPath = "p", + Remediation = action + }; + + var items = RecommendationsReader.MapEngineFindings(finding); + + Assert.Equal(2, items.Count); + // The safe AUTO_SHRINK card: DB_CONFIG action, AutoShrink setting. + var safe = Assert.Single(items, i => i.Setting == RecommendationSetting.AutoShrink); + Assert.Equal("Sales", safe.Database); + Assert.Equal("DB_CONFIG", safe.Remediation!.FactKey); + Assert.Contains("AUTO_SHRINK OFF", safe.CopyPasteSql); + // The destructive RCSI card: distinct FactKey="RCSI" action with figures. + var rcsi = Assert.Single(items, i => i.Setting == RecommendationSetting.Rcsi); + Assert.Equal("Orders", rcsi.Database); + Assert.Equal("RCSI", rcsi.Remediation!.FactKey); + Assert.Equal(12, rcsi.Remediation.RcsiFigures!.BlockingEvents); + Assert.Contains("READ_COMMITTED_SNAPSHOT ON", rcsi.CopyPasteSql); + } + [Fact] public void EngineDbConfigFanOut_DeDupesWithLegacyPerDbRow_EngineWinsWithApply() { diff --git a/Dashboard.Tests/RecommendationsViewModelTests.cs b/Dashboard.Tests/RecommendationsViewModelTests.cs index d3b74d7d..09dee1c8 100644 --- a/Dashboard.Tests/RecommendationsViewModelTests.cs +++ b/Dashboard.Tests/RecommendationsViewModelTests.cs @@ -351,6 +351,30 @@ public void AutogrowthFix_ShowsCopyFixAndApply_NotIncident() Assert.True(card.ShowApply); // autogrowth IS Apply-able (FileAutogrowthHandler) } + [Fact] + public void RcsiCard_ShowsApplyAndCopyFix_NotIncident() + { + // A per-db RCSI recommendation: Setting=Rcsi (config-fix, NOT a time-bound incident) with + // a distinct FactKey="RCSI" action. It must offer Apply (-> RcsiHandler + the two-sided + // consent gate) and Copy fix (the ALTER), and never the incident affordances. + var card = Card(Item( + CanonicalSeverity.Warning, + setting: RecommendationSetting.Rcsi, + remediation: new RemediationAction( + "RCSI", "set", Array.Empty(), + new[] { new DbConfigTarget("Sales", DbConfigSetting.ReadCommittedSnapshotOn, "OFF") }, + RcsiFigures: new RcsiInactionFigures(40, 2, 85)), + sql: "ALTER DATABASE [Sales] SET READ_COMMITTED_SNAPSHOT ON;", + db: "Sales")); + + Assert.False(card.IsIncident); + Assert.True(card.IsConfigFix); + Assert.True(card.ShowApply); + Assert.True(card.ShowCopyFix); + Assert.False(card.ShowOpenInActiveQueries); + Assert.False(card.ShowAskAi); + } + // ---- Mute visibility (Source == Engine) ------------------------------- [Fact] diff --git a/Dashboard.Tests/RemediationApplyServiceTests.cs b/Dashboard.Tests/RemediationApplyServiceTests.cs index 58478f8c..a14239ad 100644 --- a/Dashboard.Tests/RemediationApplyServiceTests.cs +++ b/Dashboard.Tests/RemediationApplyServiceTests.cs @@ -644,6 +644,39 @@ await service.ApplyAsync(persistedAction, Server, previewSql: "preview", "DOM\\o Assert.DoesNotContain(captured.Risks.RisksOfNotChanging, r => r.Text.Contains("Little or no reader/writer blocking")); } + [Fact] + public void RcsiTargets_OnDbConfigAction_SurviveAlertContextRoundTrip_WithFigures() + { + // The per-db RCSI targets carried on a DB_CONFIG action (for the read-time card fan-out) + // must survive the AlertContext serialize -> deserialize round-trip with their figures + // intact — otherwise the Recommendations reader fans no RCSI cards after persistence. + var action = new RemediationAction( + "DB_CONFIG", "set", Array.Empty(), + DbConfigTargets: new[] { new DbConfigTarget("Sales", DbConfigSetting.AutoShrinkOff, "ON") }, + RcsiTargets: new[] + { + new RcsiTarget("Sales", new RcsiInactionFigures(40, 2, 85)), + new RcsiTarget("Orders", new RcsiInactionFigures(12, 0, null)) + }); + + var ctx = new AlertContext(); + ctx.Details.Add(new AlertDetailItem { Heading = "DB config", IsCodeBlock = true, Remediation = action }); + Assert.True(AlertContextSerializer.TryDeserialize(AlertContextSerializer.Serialize(ctx), out var round)); + var persisted = round.Details[0].Remediation!; + + // Safe target preserved... + Assert.Single(persisted.DbConfigTargets!); + // ...and the two RCSI targets with their figures. + Assert.Equal(2, persisted.RcsiTargets!.Count); + var sales = Assert.Single(persisted.RcsiTargets, t => t.Database == "Sales"); + Assert.Equal(40, sales.Figures.BlockingEvents); + Assert.Equal(2, sales.Figures.Deadlocks); + Assert.Equal(85, sales.Figures.ReaderWriterPct); + var orders = Assert.Single(persisted.RcsiTargets, t => t.Database == "Orders"); + Assert.Equal(12, orders.Figures.BlockingEvents); + Assert.Null(orders.Figures.ReaderWriterPct); // nullable pct round-trips as null + } + [Fact] public async Task Apply_Destructive_NoFiguresNoFinding_ShowsWeakCaseBaseline() { diff --git a/Dashboard.Tests/RemediationTests.cs b/Dashboard.Tests/RemediationTests.cs index 2782b1dd..46a8e488 100644 --- a/Dashboard.Tests/RemediationTests.cs +++ b/Dashboard.Tests/RemediationTests.cs @@ -1005,13 +1005,23 @@ public void BuildRcsiAction_ReturnsNull_WhenEnrichmentAbsent() } [Fact] - public void BuildAction_Unchanged_NeverEmitsRcsi_Phase2RegressionGuard() - { - // The always-safe BuildAction must STILL never emit RCSI, even on the enriched - // RCSI-off finding — the destructive arm rides BuildRcsiAction only. - Assert.Null(FactRemediation.BuildAction(RcsiOffFinding())); - - // And a mixed finding (RCSI off + a safe setting) yields only the safe target. + public void BuildAction_NeverPutsRcsiInSafeTargets_ButCarriesRcsiTargets() + { + // The always-safe BuildAction must STILL never put RCSI in the EXECUTED DbConfigTargets + // (DbConfigHandler runs those). RCSI is now CARRIED on the action's RcsiTargets purely so + // the Recommendations reader can fan per-db RCSI cards — never executed from here. On an + // enriched, CONTENDED RCSI-off finding the action exists (so the reader can fan), has NO + // safe DbConfigTargets, and carries exactly the one RCSI target. + var rcsiOnly = FactRemediation.BuildAction(RcsiOffFinding()); + Assert.NotNull(rcsiOnly); + Assert.Equal("DB_CONFIG", rcsiOnly!.FactKey); + Assert.True(rcsiOnly.DbConfigTargets is null || rcsiOnly.DbConfigTargets.Count == 0); + var carried = Assert.Single(rcsiOnly.RcsiTargets!); + Assert.Equal("Foo", carried.Database); + Assert.Equal(12, carried.Figures.BlockingEvents); + + // And a mixed finding (RCSI off + a safe setting) yields the safe target in + // DbConfigTargets (never RCSI) AND carries the RCSI target separately. var mixed = DbConfigFinding(new List { new { database = "Foo", rcsi = false, query_store = true, @@ -1022,7 +1032,56 @@ public void BuildAction_Unchanged_NeverEmitsRcsi_Phase2RegressionGuard() var safe = FactRemediation.BuildAction(mixed); Assert.NotNull(safe); Assert.Equal("DB_CONFIG", safe!.FactKey); + Assert.NotEmpty(safe.DbConfigTargets!); Assert.All(safe.DbConfigTargets!, t => Assert.NotEqual(DbConfigSetting.ReadCommittedSnapshotOn, t.Setting)); + Assert.Single(safe.RcsiTargets!); // RCSI carried for the card fan-out, not executed + } + + [Fact] + public void CollectRcsiTargets_OnlyReaderWriterContention_WriterWriterAndUnknownExcluded() + { + // RCSI only relieves reader-vs-writer blocking. The gate is the reader/writer SHARE + // (>= FactRiskDisclosure.ReaderWriterMeaningfulPct), NOT raw blocking/deadlock counts: + // - reader/writer-dominant -> recommended + // - writer/writer-dominant -> NOT recommended, even with HEAVY blocking (RCSI does + // nothing for X/IX/U vs X/IX/U contention) + // - no blocked-process detail -> NOT recommended (pct null — can't confirm RCSI helps) + var finding = DbConfigFinding(new List + { + new { database = "ReaderWriter", rcsi = false, query_store = true, + auto_shrink = false, auto_close = false, page_verify = "CHECKSUM", + issues = new[] { "RCSI OFF" }, + rcsi_blocking_events = 40, rcsi_deadlocks = 2, rcsi_reader_writer_pct = 85 }, + new { database = "WriterWriter", rcsi = false, query_store = true, + auto_shrink = false, auto_close = false, page_verify = "CHECKSUM", + issues = new[] { "RCSI OFF" }, + // Heavy blocking + deadlocks, but almost all writer/writer — RCSI won't relieve it. + rcsi_blocking_events = 500, rcsi_deadlocks = 12, rcsi_reader_writer_pct = 15 }, + new { database = "Unknown", rcsi = false, query_store = true, + auto_shrink = false, auto_close = false, page_verify = "CHECKSUM", + issues = new[] { "RCSI OFF" }, + rcsi_blocking_events = 30, rcsi_deadlocks = 0, rcsi_reader_writer_pct = (int?)null }, + new { database = "AtThreshold", rcsi = false, query_store = true, + auto_shrink = false, auto_close = false, page_verify = "CHECKSUM", + issues = new[] { "RCSI OFF" }, + rcsi_blocking_events = 10, rcsi_deadlocks = 0, rcsi_reader_writer_pct = 50 } + }); + + var collected = FactRemediation.CollectRcsiTargets(finding); + + Assert.Equal(2, collected.Count); + Assert.DoesNotContain(collected, t => t.Database == "WriterWriter"); // heavy blocking, but writer/writer + Assert.DoesNotContain(collected, t => t.Database == "Unknown"); // no reader/writer detail captured + var rw = Assert.Single(collected, t => t.Database == "ReaderWriter"); + Assert.Equal(40, rw.Figures.BlockingEvents); + Assert.Equal(85, rw.Figures.ReaderWriterPct); + Assert.Single(collected, t => t.Database == "AtThreshold"); // pct == threshold qualifies + + // BuildAction carries exactly the two reader/writer targets (no safe DbConfigTargets here). + var action = FactRemediation.BuildAction(finding); + Assert.NotNull(action); + Assert.Equal(2, action!.RcsiTargets!.Count); + Assert.True(action.DbConfigTargets is null || action.DbConfigTargets.Count == 0); } // ── FactRiskDisclosure: two-sided, honest-both-directions, golden prose ────── diff --git a/Dashboard/Services/Recommendations/RecommendationsReader.cs b/Dashboard/Services/Recommendations/RecommendationsReader.cs index 83c6aff9..ba4ee9b4 100644 --- a/Dashboard/Services/Recommendations/RecommendationsReader.cs +++ b/Dashboard/Services/Recommendations/RecommendationsReader.cs @@ -136,13 +136,18 @@ public async Task> GetRecommendationsAsync( internal static IReadOnlyList MapEngineFindings(AnalysisFinding finding) { if (string.Equals(finding.RootFactKey, "DB_CONFIG", StringComparison.Ordinal) && - finding.Remediation?.DbConfigTargets is { Count: > 0 } targets) + finding.Remediation is { } remediation && + ((remediation.DbConfigTargets is { Count: > 0 }) || (remediation.RcsiTargets is { Count: > 0 }))) { var band = RecommendationDeduper.FromEngineSeverity(finding.Severity); var adviceText = ComposeEngineAdvice(FactAdvice.GetForFactKey(finding.RootFactKey)); - var items = new List(targets.Count); + var safeTargets = remediation.DbConfigTargets ?? Array.Empty(); + var rcsiTargets = remediation.RcsiTargets ?? Array.Empty(); + var items = new List(safeTargets.Count + rcsiTargets.Count); - foreach (var target in targets) + // Safe (always-online) DB-config settings: one per-(db, setting) card with a + // single-target Apply, so Copy/Apply act on ONLY this (database, setting). + foreach (var target in safeTargets) { var setting = SettingFromDbConfig(target.Setting); if (setting == RecommendationSetting.None) @@ -157,8 +162,6 @@ internal static IReadOnlyList MapEngineFindings(AnalysisFind Title = DbConfigCardTitle(target), ProblemArea = finding.Category, AdviceText = adviceText, - // Single-target copy-paste + a single-target Apply action, so Copy/Apply - // on this card act on ONLY this (database, setting) — not the whole server. CopyPasteSql = AlterStatementFor(target), Remediation = new RemediationAction( "DB_CONFIG", "set", Array.Empty(), new[] { target }), @@ -170,6 +173,38 @@ internal static IReadOnlyList MapEngineFindings(AnalysisFind }); } + // DESTRUCTIVE per-db RCSI: one card per contended RCSI-off database. Each + // RECONSTRUCTS a distinct FactKey="RCSI" action (the SAME shape + // FactRemediation.BuildRcsiAction produces) so Apply routes to the destructive + // RCSI handler (IsDestructive == true) and the two-sided informed-consent dialog + // renders the REAL figures from this target's RcsiInactionFigures. The DB_CONFIG + // action's own RcsiTargets are NEVER executed — they are carried only to drive + // this fan-out. + foreach (var t in rcsiTargets) + { + items.Add(new RecommendationItem + { + Source = RecommendationSource.Engine, + CanonicalSeverity = band, + RawSeverity = finding.Severity, + Database = t.Database, + Title = $"RCSI is OFF — {t.Database}", + ProblemArea = finding.Category, + AdviceText = adviceText, + CopyPasteSql = AlterStatementFor( + new DbConfigTarget(t.Database, DbConfigSetting.ReadCommittedSnapshotOn, "OFF")), + Remediation = new RemediationAction( + "RCSI", "set", Array.Empty(), + new[] { new DbConfigTarget(t.Database, DbConfigSetting.ReadCommittedSnapshotOn, "OFF") }, + RcsiFigures: t.Figures), + StoryPathHash = finding.StoryPathHash, + StoryPath = finding.StoryPath, + Setting = RecommendationSetting.Rcsi, + WindowStartUtc = AsUtc(finding.TimeRangeStart), + WindowEndUtc = AsUtc(finding.TimeRangeEnd) + }); + } + if (items.Count > 0) return items; // No mappable target — fall through to the single generic card below. diff --git a/PerformanceMonitor.Analysis/FactRemediation.cs b/PerformanceMonitor.Analysis/FactRemediation.cs index 148fdf2b..5a80c805 100644 --- a/PerformanceMonitor.Analysis/FactRemediation.cs +++ b/PerformanceMonitor.Analysis/FactRemediation.cs @@ -66,9 +66,20 @@ public static class FactRemediation : new RemediationAction("PLAN_REGRESSION", "force", targets); case "DB_CONFIG": var dbConfigTargets = ExtractDbConfigTargets(finding); - return dbConfigTargets.Count == 0 + // Per-db RCSI targets are CARRIED on the safe DB_CONFIG action so the + // Recommendations reader can fan per-db RCSI cards on read. They are NEVER + // executed from here (DbConfigHandler only runs DbConfigTargets). Returning the + // action when ONLY RCSI targets exist matters: a finding whose only issue is + // contended RCSI-off databases would otherwise persist nothing (no safe target), + // and the persisted action — not the ephemeral drill-down — is what the reader + // fans from. This also lets the DB_CONFIG action win AnalysisService's + // BuildAction ?? BuildRcsiAction chain, so the persisted action carries the + // fan-out data (BuildRcsiAction's singular action does not carry RcsiTargets). + var rcsiTargets = CollectRcsiTargets(finding); + return dbConfigTargets.Count == 0 && rcsiTargets.Count == 0 ? null - : new RemediationAction("DB_CONFIG", "set", Array.Empty(), dbConfigTargets); + : new RemediationAction("DB_CONFIG", "set", Array.Empty(), dbConfigTargets, + RcsiTargets: rcsiTargets); default: return null; } @@ -115,13 +126,49 @@ public static class FactRemediation /// public static RemediationAction? BuildRcsiAction(AnalysisFinding finding) { - if (finding is null || !string.Equals(finding.RootFactKey, "DB_CONFIG", StringComparison.Ordinal)) + // Single source of truth for the qualifying-db scan + the real-contention gate: + // build the alert-path action from the FIRST collected target (or null when none), + // so the alert path and the per-db Recommendations cards apply the SAME gate. + var rcsiTargets = CollectRcsiTargets(finding); + if (rcsiTargets.Count == 0) return null; + var first = rcsiTargets[0]; + var target = new DbConfigTarget(first.Database, DbConfigSetting.ReadCommittedSnapshotOn, "OFF"); + + // The risk-of-NOT-changing figures were captured by CollectRcsiTargets (the finding + // was available) and are carried on the persisted action, so the informed-consent + // dialog renders the REAL numbers at apply time — when only the persisted action + // survives (the UI apply call site passes no finding). FactRiskDisclosure reads these + // from the action in preference to the finding. + return new RemediationAction("RCSI", "set", Array.Empty(), new[] { target }, first.Figures); + } + + /// + /// Collects EVERY per-database RCSI target a DB_CONFIG finding qualifies for (B3 Phase 3, + /// recommendations rebuild). Scans config_issues EXACTLY like + /// — a row qualifies only when its rcsi value is + /// false (M2-1 polarity: RCSI is OFF) AND the §3.3 enrichment is present + /// () — but additionally GATES on REAL contention: a row is + /// included only when it carries a positive rcsi_blocking_events OR + /// rcsi_deadlocks count. An RCSI-off database with NO observed blocking/deadlocks is + /// NOT recommended: enabling RCSI there only adds tempdb version-store cost for no + /// concurrency benefit. Unlike (which stops at the first + /// qualifying db for the singular alert action) this returns ALL qualifying databases, each + /// carrying its own , so the Recommendations reader can fan + /// one per-database RCSI card. A defensive cap of 50 mirrors the other extractors. + /// + public static IReadOnlyList CollectRcsiTargets(AnalysisFinding finding) + { + var targets = new List(); + + if (finding is null || !string.Equals(finding.RootFactKey, "DB_CONFIG", StringComparison.Ordinal)) + return targets; + if (finding.DrillDown is null || !finding.DrillDown.TryGetValue("config_issues", out var raw) || raw is null) - return null; + return targets; JsonElement element; try @@ -130,21 +177,22 @@ public static class FactRemediation } catch { - return null; + return targets; } if (element.ValueKind != JsonValueKind.Array) - return null; + return targets; foreach (var row in element.EnumerateArray()) { + if (targets.Count >= 50) break; if (row.ValueKind != JsonValueKind.Object) continue; var database = GetString(row, "database"); if (string.IsNullOrEmpty(database)) continue; - // M2-1: rcsi == true means RCSI is ON. Emit ONLY when RCSI is OFF + // M2-1: rcsi == true means RCSI is ON. Collect ONLY when RCSI is OFF // (JsonValueKind.False) — mirrors the rcsiOffDatabases scan exactly. if (!row.TryGetProperty("rcsi", out var r) || r.ValueKind != JsonValueKind.False) continue; @@ -155,22 +203,30 @@ public static class FactRemediation if (!HasRcsiEnrichment(row)) continue; - var target = new DbConfigTarget(database, DbConfigSetting.ReadCommittedSnapshotOn, "OFF"); - - // Capture the risk-of-NOT-changing figures HERE (the finding is available) - // and carry them on the persisted action, so the informed-consent dialog can - // render the REAL numbers at apply time — when only the persisted action - // survives (the UI apply call site passes no finding). FactRiskDisclosure - // reads these from the action in preference to the finding. - var figures = new RcsiInactionFigures( - BlockingEvents: GetInt(row, "rcsi_blocking_events"), - Deadlocks: GetInt(row, "rcsi_deadlocks"), - ReaderWriterPct: GetNullableInt(row, "rcsi_reader_writer_pct")); + // Reader/writer-contention gate: RCSI only relieves blocking BETWEEN readers and + // writers (a reader's S lock waiting on a writer's X lock, or a writer waiting behind + // a reader's S lock). Writer/writer blocking (X/IX/U) and raw deadlock counts are NOT + // helped by RCSI — recommending it there only adds tempdb version-store overhead. So + // gate on the reader/writer SHARE (classified from the blocked-process report's lock + // modes in CollectRcsiInactionFigures), using the SAME threshold the consent disclosure + // uses to say "RCSI eliminates this". An rcsi-off db whose contention is writer/writer- + // dominant (pct below the threshold) or where no reader/writer blocking was captured + // (pct null) gets NO card. The alert path and the cards share this gate (BuildRcsiAction + // builds from this same list). The raw blocking/deadlock counts are still carried for + // the consent dialog's magnitude context. + var readerWriterPct = GetNullableInt(row, "rcsi_reader_writer_pct"); + if (readerWriterPct is not int pct || pct < FactRiskDisclosure.ReaderWriterMeaningfulPct) + continue; - return new RemediationAction("RCSI", "set", Array.Empty(), new[] { target }, figures); + targets.Add(new RcsiTarget( + database, + new RcsiInactionFigures( + BlockingEvents: GetInt(row, "rcsi_blocking_events"), + Deadlocks: GetInt(row, "rcsi_deadlocks"), + ReaderWriterPct: pct))); } - return null; + return targets; } /// diff --git a/PerformanceMonitor.Analysis/RemediationAction.cs b/PerformanceMonitor.Analysis/RemediationAction.cs index 0aaf63c0..bfbb3d47 100644 --- a/PerformanceMonitor.Analysis/RemediationAction.cs +++ b/PerformanceMonitor.Analysis/RemediationAction.cs @@ -33,7 +33,28 @@ public sealed record RemediationAction( RcsiInactionFigures? RcsiFigures = null, // B3 Phase 3: RCSI risk-of-not-changing figures carried on the persisted action IReadOnlyList? ClearPlanTargets = null, // clear-cached-plan targets (null for the other fact keys) ClearPlanFigures? ClearPlanFigures = null, // clear-cached-plan risk-of-not-changing figures carried on the persisted action - IReadOnlyList? FileGrowthTargets = null); // WS3: percent-autogrowth files — Apply-able (FileAutogrowthHandler: MODIFY FILE FILEGROWTH) + copy-paste + IReadOnlyList? FileGrowthTargets = null, // WS3: percent-autogrowth files — Apply-able (FileAutogrowthHandler: MODIFY FILE FILEGROWTH) + copy-paste + IReadOnlyList? RcsiTargets = null); // per-DB RCSI targets CARRIED on a DB_CONFIG action so the reader can fan per-db RCSI cards on read — NEVER executed from here (see RcsiTarget) + +/// +/// One per-database RCSI target — a (database, inaction-figures) pair carried on the +/// DB_CONFIG list PURELY so the Recommendations +/// reader can fan one per-database RCSI card on read (the config_issues drill-down they +/// were collected from is ephemeral and is NOT returned by the store read-back). +/// +/// +/// These are NEVER executed from the DB_CONFIG action: DbConfigHandler only ever +/// executes the always-safe and ignores this +/// list entirely, so no destructive RCSI change can ride the safe action. For each target the +/// reader reconstructs a DISTINCT FactKey="RCSI" (a single +/// target + these ), +/// which is what runs through RcsiHandler (IsDestructive == true) behind the two-sided +/// informed-consent gate — identical to the action +/// produces for the alert path. is carried so that reconstructed action's +/// consent dialog renders the REAL blocking/deadlock/reader-writer numbers. +/// +/// +public sealed record RcsiTarget(string Database, RcsiInactionFigures Figures); /// /// The risk-of-NOT-changing monitoring figures for a destructive CLEAR_PLAN action diff --git a/PerformanceMonitor.Notifications/AlertContext.cs b/PerformanceMonitor.Notifications/AlertContext.cs index 8312e459..ca6768a5 100644 --- a/PerformanceMonitor.Notifications/AlertContext.cs +++ b/PerformanceMonitor.Notifications/AlertContext.cs @@ -84,7 +84,20 @@ public record RemediationActionDto( RcsiInactionFiguresDto? RcsiFigures = null, List? ClearPlanTargets = null, ClearPlanFiguresDto? ClearPlanFigures = null, - List? FileGrowthTargets = null); + List? FileGrowthTargets = null, + List? RcsiTargets = null); + +/// +/// JSON mirror of . The per-database RCSI targets are carried on a +/// DB_CONFIG action PURELY so the Recommendations reader can fan per-db RCSI cards on read +/// (the drill-down is ephemeral); they are never executed from the DB_CONFIG action itself. +/// The trailing optional RcsiTargets member on keeps +/// the round-trip backward-compatible: legacy/non-DB_CONFIG contextJson without it deserializes +/// to null. +/// +public record RcsiTargetDto( + string Database, + RcsiInactionFiguresDto Figures); /// /// JSON mirror of (clear-cached-plan, PR-B). The @@ -314,8 +327,22 @@ public static bool TryDeserialize(string? json, out AlertContext context) t.Database, t.LogicalFileName, t.CurrentSizeMb, t.CurrentGrowthPercent, t.RecommendedGrowthMb)); } + // Per-db RCSI targets (carried on a DB_CONFIG action for the read-time card fan-out). + // Persist them so the Recommendations reader can fan per-db RCSI cards after the round- + // trip (the drill-down they came from is ephemeral). Null for every other fact key -> + // backward-compatible. Never executed from the DB_CONFIG action. + List? rcsiTargets = null; + if (action.RcsiTargets is not null) + { + rcsiTargets = new List(action.RcsiTargets.Count); + foreach (var t in action.RcsiTargets) + rcsiTargets.Add(new RcsiTargetDto( + t.Database, + new RcsiInactionFiguresDto(t.Figures.BlockingEvents, t.Figures.Deadlocks, t.Figures.ReaderWriterPct))); + } + return new RemediationActionDto(action.FactKey, action.Action, targets, dbConfigTargets, - rcsiFigures, clearPlanTargets, clearPlanFigures, fileGrowthTargets); + rcsiFigures, clearPlanTargets, clearPlanFigures, fileGrowthTargets, rcsiTargets); } private static RemediationAction? FromDto(RemediationActionDto? dto) @@ -392,7 +419,21 @@ public static bool TryDeserialize(string? json, out AlertContext context) t.Database, t.LogicalFileName, t.CurrentSizeMb, t.CurrentGrowthPercent, t.RecommendedGrowthMb)); } + // Per-db RCSI targets: rebuild from the DTO and PASS them to the ctor (the trailing + // RcsiTargets member defaults to null, so a short call would silently drop them on the + // round-trip — the reader would then fan no RCSI cards). Legacy JSON without the field + // deserializes to null → backward-compatible. + List? rcsiTargets = null; + if (dto.RcsiTargets is not null) + { + rcsiTargets = new List(dto.RcsiTargets.Count); + foreach (var t in dto.RcsiTargets) + rcsiTargets.Add(new RcsiTarget( + t.Database, + new RcsiInactionFigures(t.Figures.BlockingEvents, t.Figures.Deadlocks, t.Figures.ReaderWriterPct))); + } + return new RemediationAction(dto.FactKey, dto.Action, targets, dbConfigTargets, rcsiFigures, - clearPlanTargets, clearPlanFigures, fileGrowthTargets); + clearPlanTargets, clearPlanFigures, fileGrowthTargets, rcsiTargets); } } From e5cd3165e3ffa0b7ca0c11c2586768bcf4738966 Mon Sep 17 00:00:00 2001 From: Erik Darling <2136037+erikdarlingdata@users.noreply.github.com> Date: Sat, 6 Jun 2026 20:32:27 -0400 Subject: [PATCH 2/2] Recommendations: RCSI card severity from contention + ignore TRANSACTION_MUTEX Two refinements from testing the RCSI cards: - RCSI card severity now reflects the reader/writer blocking it would relieve (its inaction risk), not the flat config-advisory band: >=100 blocking events or >=10 deadlocks -> Critical, >=10 or >=1 -> Warning, else Info. An RCSI rec was sitting at Info next to a Warning blocking-spike, which read wrong. - TRANSACTION_MUTEX added to config.ignored_wait_types -- it is an internal synchronization wait (MARS / multiple requests sharing one transaction), not a DBA-tunable condition, and belongs with the other *_MUTEX waits already ignored. It was omitted from the seed. Added to install/03 (fresh) + an idempotent upgrade script (existing servers, listed in upgrade.txt); the wait collector filters on the list so it stops being collected/surfaced. 358 Dashboard.Tests pass (incl. RcsiSeverityBand boundary cases). Co-Authored-By: Claude Opus 4.8 (1M context) --- Dashboard.Tests/RecommendationDeduperTests.cs | 18 +++++++ .../Recommendations/RecommendationsReader.cs | 33 ++++++++++++- install/03_create_config_tables.sql | 1 + .../03_add_transaction_mutex_ignored_wait.sql | 47 +++++++++++++++++++ upgrades/2.11.0-to-2.12.0/upgrade.txt | 1 + 5 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 upgrades/2.11.0-to-2.12.0/03_add_transaction_mutex_ignored_wait.sql diff --git a/Dashboard.Tests/RecommendationDeduperTests.cs b/Dashboard.Tests/RecommendationDeduperTests.cs index 1573f7fd..c7b9441e 100644 --- a/Dashboard.Tests/RecommendationDeduperTests.cs +++ b/Dashboard.Tests/RecommendationDeduperTests.cs @@ -241,6 +241,24 @@ public void FromEngineSeverity_BoundaryCases(double severity, CanonicalSeverity Assert.Equal(expected, RecommendationDeduper.FromEngineSeverity(severity)); } + // ---- RCSI card severity scales with the contention it would relieve ---- + + [Theory] + [InlineData(0, 0, CanonicalSeverity.Info)] // gated in (rw>=50) but minimal magnitude + [InlineData(9, 0, CanonicalSeverity.Info)] // just under the Warning blocking threshold + [InlineData(10, 0, CanonicalSeverity.Warning)] // notable reader/writer blocking + [InlineData(0, 1, CanonicalSeverity.Warning)] // a deadlock escalates off Info + [InlineData(99, 9, CanonicalSeverity.Warning)] // just under Critical on both axes + [InlineData(100, 0, CanonicalSeverity.Critical)] // extreme blocking + [InlineData(0, 10, CanonicalSeverity.Critical)] // many deadlocks + public void RcsiSeverityBand_ScalesWithContention(int blocking, int deadlocks, CanonicalSeverity expected) + { + // rwPct doesn't affect the band (the >=50% reader/writer gate already decided the card + // exists); the band reflects magnitude. 80 here is just a representative gated value. + var band = RecommendationsReader.RcsiSeverityBand(new RcsiInactionFigures(blocking, deadlocks, 80)); + Assert.Equal(expected, band); + } + // ---- canonical severity: legacy text --------------------------------- [Theory] diff --git a/Dashboard/Services/Recommendations/RecommendationsReader.cs b/Dashboard/Services/Recommendations/RecommendationsReader.cs index ba4ee9b4..4fd486aa 100644 --- a/Dashboard/Services/Recommendations/RecommendationsReader.cs +++ b/Dashboard/Services/Recommendations/RecommendationsReader.cs @@ -182,11 +182,16 @@ finding.Remediation is { } remediation && // this fan-out. foreach (var t in rcsiTargets) { + // RCSI's value is relieving reader/writer blocking, so escalate the card by the + // contention it would address (its inaction risk) rather than the flat config + // band auto_shrink/page_verify get — an RCSI rec sitting at Info next to a + // Warning blocking-spike reads wrong. + var rcsiBand = RcsiSeverityBand(t.Figures); items.Add(new RecommendationItem { Source = RecommendationSource.Engine, - CanonicalSeverity = band, - RawSeverity = finding.Severity, + CanonicalSeverity = rcsiBand, + RawSeverity = RcsiRawSeverity(rcsiBand), Database = t.Database, Title = $"RCSI is OFF — {t.Database}", ProblemArea = finding.Category, @@ -213,6 +218,30 @@ finding.Remediation is { } remediation && return new[] { MapEngineFinding(finding) }; } + /// + /// The canonical severity for an RCSI card, driven by the reader/writer contention the + /// change would relieve (its inaction risk) — NOT the flat config-advisory band the safe + /// settings (auto_shrink/page_verify) get. Blocking is the primary signal (RCSI directly + /// removes reader S-lock vs writer X-lock waits); deadlocks escalate it. Counts are over + /// the analysis window. + /// + internal static CanonicalSeverity RcsiSeverityBand(RcsiInactionFigures f) + { + if (f.BlockingEvents >= 100 || f.Deadlocks >= 10) + return CanonicalSeverity.Critical; + if (f.BlockingEvents >= 10 || f.Deadlocks >= 1) + return CanonicalSeverity.Warning; + return CanonicalSeverity.Info; + } + + /// A raw-severity stand-in for an RCSI card's within/cross-band sort. + private static double RcsiRawSeverity(CanonicalSeverity band) => band switch + { + CanonicalSeverity.Critical => 2.0, + CanonicalSeverity.Warning => 1.0, + _ => 0.3 + }; + /// /// The per-card title for a fanned-out DB_CONFIG safe-setting target: /// "<SETTING> — <database>" (e.g. "AUTO_SHRINK is ON — MyDb"), so each diff --git a/install/03_create_config_tables.sql b/install/03_create_config_tables.sql index cc6781e8..12f96d34 100644 --- a/install/03_create_config_tables.sql +++ b/install/03_create_config_tables.sql @@ -604,6 +604,7 @@ BEGIN (N'SNI_HTTP_ACCEPT'), (N'SOS_PROCESS_AFFINITY_MUTEX'), (N'SOS_WORK_DISPATCHER'), (N'SP_SERVER_DIAGNOSTICS_SLEEP'), (N'SQLTRACE_BUFFER_FLUSH'), (N'SQLTRACE_FILE_BUFFER'), (N'SQLTRACE_FILE_READ_IO_COMPLETION'), (N'SQLTRACE_FILE_WRITE_IO_COMPLETION'), (N'SQLTRACE_INCREMENTAL_FLUSH_SLEEP'), (N'SQLTRACE_WAIT_ENTRIES'), + (N'TRANSACTION_MUTEX'), (N'UCS_SESSION_REGISTRATION'), (N'VDI_CLIENT_OTHER'), (N'WAIT_FOR_RESULTS'), (N'WAIT_XTP_CKPT_CLOSE'), (N'WAIT_XTP_HOST_WAIT'), (N'WAIT_XTP_OFFLINE_CKPT_NEW_LOG'), (N'WAIT_XTP_RECOVERY'), (N'WAITFOR'), (N'WAITFOR_TASKSHUTDOWN'), (N'WINDOW_AGGREGATES_MULTIPASS'), (N'XE_BUFFERMGR_ALLPROCESSED_EVENT'), diff --git a/upgrades/2.11.0-to-2.12.0/03_add_transaction_mutex_ignored_wait.sql b/upgrades/2.11.0-to-2.12.0/03_add_transaction_mutex_ignored_wait.sql new file mode 100644 index 00000000..43609272 --- /dev/null +++ b/upgrades/2.11.0-to-2.12.0/03_add_transaction_mutex_ignored_wait.sql @@ -0,0 +1,47 @@ +/* +Copyright 2026 Darling Data, LLC +https://www.erikdarling.com/ + +Upgrade from 2.11.0 to 2.12.0 +Add TRANSACTION_MUTEX to config.ignored_wait_types. It is an internal synchronization +wait (multiple requests sharing one transaction / MARS / app pattern), not a DBA-tunable +condition -- it belongs with the other *_MUTEX waits already ignored +(RESOURCE_SEMAPHORE_MUTEX, QUERY_TASK_ENQUEUE_MUTEX, SOS_PROCESS_AFFINITY_MUTEX, ...). It +was simply omitted from the default seed, so existing servers keep collecting it and can +surface noise (e.g. an ANOMALY_WAIT_TRANSACTION_MUTEX finding). The base seed in +install/03 also adds it for fresh installs. Any already-collected rows age out of the +analysis window on their own once collection stops. +*/ + +SET ANSI_NULLS ON; +SET ANSI_PADDING ON; +SET ANSI_WARNINGS ON; +SET ARITHABORT ON; +SET CONCAT_NULL_YIELDS_NULL ON; +SET QUOTED_IDENTIFIER ON; +SET NUMERIC_ROUNDABORT OFF; +SET IMPLICIT_TRANSACTIONS OFF; +SET STATISTICS TIME, IO OFF; +GO + +USE PerformanceMonitor; +GO + +/* Idempotent: only insert when missing. */ +IF NOT EXISTS +( + SELECT + 1/0 + FROM config.ignored_wait_types + WHERE wait_type = N'TRANSACTION_MUTEX' +) +BEGIN + INSERT INTO + config.ignored_wait_types + ( + wait_type + ) + VALUES + (N'TRANSACTION_MUTEX'); +END; +GO diff --git a/upgrades/2.11.0-to-2.12.0/upgrade.txt b/upgrades/2.11.0-to-2.12.0/upgrade.txt index dc2756ff..a2543b93 100644 --- a/upgrades/2.11.0-to-2.12.0/upgrade.txt +++ b/upgrades/2.11.0-to-2.12.0/upgrade.txt @@ -1,2 +1,3 @@ 01_extend_blocked_process_report_columns.sql 02_make_other_process_cpu_nullable.sql +03_add_transaction_mutex_ignored_wait.sql