From 8c054303ab639b42b97d06e1a88b74437280e58c Mon Sep 17 00:00:00 2001 From: Michael Dailey Date: Thu, 4 Jun 2026 13:24:18 -0500 Subject: [PATCH 1/4] PDX-509: feat(mcp): validity bridge, tri-state status, quality threshold RCA: The local MCP validator runs a two-layer model the Quality Hub API does not have. Critical best-practice violations (e.g. unknown apiId) only moved quality_score and never gated is_valid, so a test that will not load in Provar could still be reported as valid. The quality threshold also defaulted to 80 with no env override and collapsed sub-threshold cases to a bare "invalid". Fix: Bridge critical best-practice violations into Layer-1 issues so they gate is_valid (deduped against hand-coded rules; score parity preserved). Add a shared resolveQualityThreshold (arg -> PROVAR_MCP_QUALITY_THRESHOLD -> 90) and a tri-state status (invalid / needs_improvement / valid) with meets_quality_threshold across the testcase and hierarchy validators. --- .../sf.provar.automation.project.validate.md | 2 +- .../provar/automation/project/validate.ts | 19 ++- src/mcp/tools/hierarchyValidate.ts | 10 +- src/mcp/tools/projectValidateFromPath.ts | 5 +- src/mcp/tools/testCaseValidate.ts | 127 +++++++++++++++++- src/mcp/tools/testPlanValidate.ts | 8 +- src/mcp/tools/testSuiteValidate.ts | 8 +- src/mcp/utils/qualityThreshold.ts | 45 +++++++ src/services/projectValidation.ts | 5 +- 9 files changed, 203 insertions(+), 26 deletions(-) create mode 100644 src/mcp/utils/qualityThreshold.ts diff --git a/messages/sf.provar.automation.project.validate.md b/messages/sf.provar.automation.project.validate.md index 2a868d38..c3695df2 100644 --- a/messages/sf.provar.automation.project.validate.md +++ b/messages/sf.provar.automation.project.validate.md @@ -15,7 +15,7 @@ Path to the Provar project root (directory containing the .testproject file). De # flags.quality-threshold.summary -Minimum quality score (0-100) for a test case to pass validation (default: 80). +Minimum quality score (0-100) for a test case to pass validation (default: 90). # flags.save-results.summary diff --git a/src/commands/provar/automation/project/validate.ts b/src/commands/provar/automation/project/validate.ts index 4db13bbf..eb02b173 100644 --- a/src/commands/provar/automation/project/validate.ts +++ b/src/commands/provar/automation/project/validate.ts @@ -8,7 +8,11 @@ /* eslint-disable camelcase */ import { SfCommand, Flags } from '@salesforce/sf-plugins-core'; import { Messages } from '@salesforce/core'; -import { validateProjectFromPath, ProjectValidationError, type ProjectValidationResult } from '../../../../services/projectValidation.js'; +import { + validateProjectFromPath, + ProjectValidationError, + type ProjectValidationResult, +} from '../../../../services/projectValidation.js'; Messages.importMessagesDirectoryFromMetaUrl(import.meta.url); const messages = Messages.loadMessages('@provartesting/provardx-cli', 'sf.provar.automation.project.validate'); @@ -27,7 +31,7 @@ export default class SfProvarAutomationProjectValidate extends SfCommand= quality_threshold; + const status = !isValid ? 'invalid' : meets_quality_threshold ? 'valid' : 'needs_improvement'; + return { status, quality_threshold, meets_quality_threshold }; +} + /** Resolve validation result from QualityHub API or fall back to local. */ async function resolveBaseResult( source: string, @@ -141,7 +164,7 @@ export function registerTestCaseValidate(server: McpServer, config: ServerConfig { title: 'Validate Test Case', description: desc( - "Validate a Provar XML test case for structural correctness and quality. Checks XML declaration, root element, required attributes (guid UUID v4, testItemId integer), presence, and applies best-practice rules. When a Provar API key is configured (via sf provar auth login or PROVAR_API_KEY env var), calls the Quality Hub API for full 170-rule scoring. Falls back to local validation if no key is set or the API is unavailable. Returns validity_score (schema compliance), quality_score (best practices, 0–100), and validation_source indicating which ruleset was applied. Every response includes run_id — pass it as baseline_run_id in the next call to receive only new/resolved issues. Data-driven note (DATA-001): when file_path is supplied and the project's provardx-properties.json references the test case directly via top-level `testCase` / `testCases` rather than via a `.testinstance` inside a plan, the validator emits DATA-001 warning a declaration will resolve all variables to null in direct testCase-mode — wire the test into a plan via provar_testplan_add-instance to enable data-driven iteration. When structural errors are returned, consult the provar://docs/step-reference MCP resource for correct step attribute schemas.", + "Validate a Provar XML test case for structural correctness and quality. Checks XML declaration, root element, required attributes (guid UUID v4, testItemId integer), presence, and applies best-practice rules. When a Provar API key is configured (via sf provar auth login or PROVAR_API_KEY env var), calls the Quality Hub API for full 170-rule scoring. Falls back to local validation if no key is set or the API is unavailable. Returns validity_score (schema compliance), quality_score (best practices, 0–100), and validation_source indicating which ruleset was applied. Returns a tri-state status: 'invalid' (a critical defect — the test will not load in Provar, is_valid=false), 'needs_improvement' (loads but quality_score is below quality_threshold), or 'valid' (loads and clears the bar); meets_quality_threshold and the effective quality_threshold are also returned. Note: a critical best-practice violation (e.g. an unknown apiId) now gates is_valid the same way a structural error does — it surfaces in issues[] as an ERROR. major/minor/info best-practice violations affect quality_score (and the status verdict) only. Every response includes run_id — pass it as baseline_run_id in the next call to receive only new/resolved issues. Data-driven note (DATA-001): when file_path is supplied and the project's provardx-properties.json references the test case directly via top-level `testCase` / `testCases` rather than via a `.testinstance` inside a plan, the validator emits DATA-001 warning a declaration will resolve all variables to null in direct testCase-mode — wire the test into a plan via provar_testplan_add-instance to enable data-driven iteration. When structural errors are returned, consult the provar://docs/step-reference MCP resource for correct step attribute schemas.", 'Validate a Provar XML test case: structure, UUIDs, steps, quality scoring; run_id for baseline diff.' ), inputSchema: { @@ -164,6 +187,17 @@ export function registerTestCaseValidate(server: McpServer, config: ServerConfig 'enum summary|standard|full, optional; default standard' ) ), + quality_threshold: z + .number() + .min(0) + .max(100) + .optional() + .describe( + desc( + 'Minimum quality_score for status to be "valid" rather than "needs_improvement". Does NOT affect is_valid (only critical defects do). Precedence: this arg → PROVAR_MCP_QUALITY_THRESHOLD env → 90.', + 'number 0–100, optional; default 90' + ) + ), baseline_run_id: z .string() .optional() @@ -175,7 +209,7 @@ export function registerTestCaseValidate(server: McpServer, config: ServerConfig ), }, }, - async ({ content, xml, file_path, detail, baseline_run_id }) => { + async ({ content, xml, file_path, detail, baseline_run_id, quality_threshold }) => { const requestId = makeRequestId(); log('info', 'provar_testcase_validate', { requestId, has_content: !!(content ?? xml), file_path }); @@ -208,7 +242,13 @@ export function registerTestCaseValidate(server: McpServer, config: ServerConfig const context = tcRunContext(file_path, source); const contextHash = computeContextHash('tc', context); const runId = generateRunId(context); - const bpViolations = (baseResult.best_practices_violations ?? []) as unknown as DiffableViolation[]; + // A critical BP violation bridged into issues[] shares its rule_id with the + // surfaced issue (the issue/BP rule_id namespaces are disjoint), so drop it + // from the BP list here to avoid double-counting it in the baseline diff. + const issueRuleIds = new Set(baseResult.issues.map((i) => i.rule_id)); + const bpViolations = (baseResult.best_practices_violations ?? []).filter( + (v) => !issueRuleIds.has(v.rule_id) + ) as unknown as DiffableViolation[]; const currentViolations: DiffableViolation[] = [ ...(baseResult.issues as unknown as DiffableViolation[]), ...bpViolations, @@ -262,12 +302,18 @@ export function registerTestCaseValidate(server: McpServer, config: ServerConfig const completeness_score = calcCompletenessScore(baseResult.is_valid ? 1 : 0, 1); const recommended_next_action = calcNextAction(completeness_score, hasBaseline, currentViolations.length); + // PDX-509 tri-state verdict. is_valid answers "will it load" (gated by + // criticals only); status layers the quality bar on top so an AI client + // gets an explicit "loads but fix before running" signal. + const verdict = deriveQualityVerdict(baseResult.is_valid, baseResult.quality_score, quality_threshold); + const result = { requestId, run_id: runId, completeness_score, recommended_next_action, ...baseResult, + ...verdict, }; const detailLevel = (detail ?? 'standard') as DetailLevel; @@ -915,6 +961,69 @@ function validateComparisonTypes(tc: Record, issues: Validation } } +/** + * PDX-509 — validity bridge. + * + * Best-practice violations carry a `critical|major|minor|info` severity, but + * historically that severity only moved `quality_score` and never gated + * `is_valid`. Per the canonical taxonomy, `critical` means "Provar will not + * load/render the test case" — exactly the class of defect that MUST flip + * validity. This map records, for each `critical` BP rule, the hand-coded + * Layer-1 rule(s) that already cover the same defect; when one of those issues + * is already present we suppress the bridge so the failure is reported once, + * not twice. + * + * `major`/`minor`/`info` are intentionally NOT bridged — a major is a runtime + * ERROR that still loads, so it influences the quality score (and the + * `needs_improvement` verdict) without flipping `is_valid`. + */ +const BRIDGE_SUPPRESSED_BY: Record = { + 'SCHEMA-ROOT-001': ['TC_003'], + 'SCHEMA-STEPS-001': ['TC_020'], + 'VALID-STEPS-001': ['TC_020'], + 'SCHEMA-ID-001': ['TC_010', 'TC_011', 'TC_012'], + 'VALID-GUID-001': ['TC_010', 'TC_011', 'TC_012'], + 'STEP-ITEMID-001': ['TC_034', 'TC_035'], + // comparisonType enum BP rule (deferred) maps to the hand-coded Layer-1 check. + 'COMPARISON-TYPE-ENUM-001': ['COMPARISON-TYPE-001'], +}; + +/** Map a Best-Practices `appliesTo` token to the issue `applies_to` vocabulary. */ +const BP_APPLIES_TO_ISSUE: Record = { + TestCase: 'testCase', + Step: 'apiCall', + Argument: 'argument', + Document: 'document', +}; + +/** + * Surface every un-suppressed `critical` best-practice violation as an ERROR + * issue so it gates `is_valid`. Mutates `issues` in place. The BP list itself is + * left untouched, so `quality_score` (and its Lambda parity) is unchanged — a + * bridged critical deliberately appears in BOTH `issues[]` (validity) and + * `best_practices_violations[]` (scoring). + */ +function bridgeCriticalViolations( + bpViolations: Array, + issues: ValidationIssue[] +): void { + const present = new Set(issues.map((i) => i.rule_id)); + for (const v of bpViolations) { + if (v.severity !== 'critical') continue; + if (present.has(v.rule_id)) continue; + const suppressors = BRIDGE_SUPPRESSED_BY[v.rule_id]; + if (suppressors && suppressors.some((id) => present.has(id))) continue; + issues.push({ + rule_id: v.rule_id, + severity: 'ERROR', + message: v.message, + applies_to: BP_APPLIES_TO_ISSUE[v.applies_to[0] ?? ''] ?? 'testCase', + suggestion: v.recommendation, + }); + present.add(v.rule_id); + } +} + function finalize( issues: ValidationIssue[], testCaseId: string | null, @@ -923,15 +1032,19 @@ function finalize( xmlContent: string, testName?: string ): TestCaseValidationResult { + // Layer 2: quality score (best practices engine — same rules & formula as Quality Hub API) + const bp = runBestPractices(xmlContent, { testName }); + + // PDX-509: bridge critical BP violations into Layer-1 issues BEFORE counting, + // so a "won't load" best-practice defect flips is_valid like a structural one. + bridgeCriticalViolations(bp.violations, issues); + const errorCount = issues.filter((i) => i.severity === 'ERROR').length; const warningCount = issues.filter((i) => i.severity === 'WARNING').length; - // Layer 1: validity score (schema compliance — existing rules) + // Layer 1: validity score (schema compliance — existing rules + bridged criticals) const validity_score = Math.max(0, 100 - errorCount * 20); - // Layer 2: quality score (best practices engine — same rules & formula as Quality Hub API) - const bp = runBestPractices(xmlContent, { testName }); - return { is_valid: errorCount === 0, validity_score, diff --git a/src/mcp/tools/testPlanValidate.ts b/src/mcp/tools/testPlanValidate.ts index d1e91fc9..c6830627 100644 --- a/src/mcp/tools/testPlanValidate.ts +++ b/src/mcp/tools/testPlanValidate.ts @@ -12,6 +12,7 @@ import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; import { calcCompletenessScore, calcNextAction } from '../utils/validationScore.js'; +import { resolveQualityThreshold } from '../utils/qualityThreshold.js'; import { validatePlan, buildHierarchySummary, @@ -142,7 +143,10 @@ export function registerTestPlanValidate(server: McpServer): void { .max(100) .optional() .describe( - desc('Minimum quality score for a test case to be considered valid (default: 80)', 'number 0–100, optional') + desc( + 'Minimum quality score for a test case to be considered valid. Precedence: this arg → PROVAR_MCP_QUALITY_THRESHOLD env → 90.', + 'number 0–100, optional; default 90' + ) ), detail: z .enum(['summary', 'standard', 'full']) @@ -161,7 +165,7 @@ export function registerTestPlanValidate(server: McpServer): void { log('info', 'provar_testplan_validate', { requestId, plan_name }); try { - const threshold = quality_threshold ?? 80; + const threshold = resolveQualityThreshold(quality_threshold); const input: TestPlanInput = { name: plan_name, test_suites: test_suites ?? [], diff --git a/src/mcp/tools/testSuiteValidate.ts b/src/mcp/tools/testSuiteValidate.ts index 07e96c47..c5be195b 100644 --- a/src/mcp/tools/testSuiteValidate.ts +++ b/src/mcp/tools/testSuiteValidate.ts @@ -12,6 +12,7 @@ import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; import { calcCompletenessScore, calcNextAction } from '../utils/validationScore.js'; +import { resolveQualityThreshold } from '../utils/qualityThreshold.js'; import { generateRunId, saveRun, @@ -114,7 +115,10 @@ export function registerTestSuiteValidate(server: McpServer): void { .max(100) .optional() .describe( - desc('Minimum quality score for a test case to be considered valid (default: 80)', 'number 0–100, optional') + desc( + 'Minimum quality score for a test case to be considered valid. Precedence: this arg → PROVAR_MCP_QUALITY_THRESHOLD env → 90.', + 'number 0–100, optional; default 90' + ) ), detail: z .enum(['summary', 'standard', 'full']) @@ -142,7 +146,7 @@ export function registerTestSuiteValidate(server: McpServer): void { log('info', 'provar_testsuite_validate', { requestId, suite_name }); try { - const threshold = quality_threshold ?? 80; + const threshold = resolveQualityThreshold(quality_threshold); const input: TestSuiteInput = { name: suite_name, test_cases: test_cases ?? [], diff --git a/src/mcp/utils/qualityThreshold.ts b/src/mcp/utils/qualityThreshold.ts new file mode 100644 index 00000000..39f46031 --- /dev/null +++ b/src/mcp/utils/qualityThreshold.ts @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +/* + * Quality-threshold resolution for the validation tools. + * + * A test case "meets quality" when its best-practices `quality_score` is at or + * above this threshold. The bar is resolved with the precedence: + * per-call `quality_threshold` arg → PROVAR_MCP_QUALITY_THRESHOLD env → default (90). + * + * The default was raised from 80 to 90 so that "valid" means genuinely + * production-ready, not merely "loads". The env var lets a team pin a house + * standard (e.g. relax to 80 during a migration) without threading an argument + * through every call. Out-of-range or unparseable values fall through to the + * next source — mirroring the depth-guard convention in server.ts. + */ + +export const DEFAULT_QUALITY_THRESHOLD = 90; + +/** True when `n` is a finite number inside the inclusive 0–100 score range. */ +function inRange(n: number): boolean { + return Number.isFinite(n) && n >= 0 && n <= 100; +} + +/** + * Resolve the effective quality threshold. + * + * @param perCallArg The tool's `quality_threshold` argument, if the caller set one. + * @returns A number in 0–100; never NaN. + */ +export function resolveQualityThreshold(perCallArg?: number): number { + if (typeof perCallArg === 'number' && inRange(perCallArg)) return perCallArg; + + const raw = process.env['PROVAR_MCP_QUALITY_THRESHOLD']; + if (raw !== undefined && raw.trim() !== '') { + const parsed = Number(raw); + if (inRange(parsed)) return parsed; + } + + return DEFAULT_QUALITY_THRESHOLD; +} diff --git a/src/services/projectValidation.ts b/src/services/projectValidation.ts index f4bea14a..5744eeb2 100644 --- a/src/services/projectValidation.ts +++ b/src/services/projectValidation.ts @@ -23,6 +23,7 @@ import { type HierarchyViolation, type HierarchySummary, } from '../mcp/tools/hierarchyValidate.js'; +import { resolveQualityThreshold } from '../mcp/utils/qualityThreshold.js'; // ── Public error type ───────────────────────────────────────────────────────── @@ -39,7 +40,7 @@ export class ProjectValidationError extends Error { export interface ProjectValidationOptions { project_path: string; - quality_threshold?: number; // default 80 + quality_threshold?: number; // resolved via resolveQualityThreshold: arg → PROVAR_MCP_QUALITY_THRESHOLD → 90 save_results?: boolean; // default true (any value !== false means save) results_dir?: string; // default '{project_path}/provardx/validation' } @@ -842,7 +843,7 @@ export function validateProjectFromPath(options: ProjectValidationOptions): Proj ); } - const threshold = quality_threshold ?? 80; + const threshold = resolveQualityThreshold(quality_threshold); // 1. Read project context from .testproject const { projectName, context } = readProjectContext(projectRoot); From a57e5bb016421742fd748ff9cd814501aecf59e9 Mon Sep 17 00:00:00 2001 From: Michael Dailey Date: Thu, 4 Jun 2026 14:01:14 -0500 Subject: [PATCH 2/4] PDX-509: feat(mcp): bridge critical violations, fix date emission, audit severities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RCA: With the validity bridge live, critical best-practice violations now gate is_valid. This surfaced two latent, previously score-only defects: (1) the generator emitted valueClass="date"/"datetime" with raw ISO strings, but the real Provar corpus (651 cases) stores date/dateTime exclusively as epoch milliseconds with camelCase dateTime — an ISO string fails to load (RENDER-DATE- VALUECLASS-001), and the casing rule wrongly lowercase-enforced dateTime; (2) the varStringLiteral back-end exemption for UI-target args is wrong (a bare {Var} there lands in the URL as %7B...%7D and hard-fails). Fix: Suppress Layer-1-owned concepts from the bridge so the authoritative schema rule reports them once (empty still loads). Convert ISO date/datetime to epoch ms (UTC) with canonical dateTime casing in the generator and accept that casing in RENDER-CASE-001. Drop the varStringLiteral UI-target exemption. Add PDX-509 bridge/threshold/tri-state tests and a resolveQualityThreshold suite; update docs/mcp.md. Severity audit: all implemented criticals are load-blocking except VAR-NAMING-001 (its own text says "RUNTIME FAILURES" = major) — flagged for review, not changed, to preserve QH score parity. --- docs/mcp.md | 103 ++++++------ .../rules/provar_best_practices_rules.json | 2 +- src/mcp/tools/bestPracticesEngine.ts | 34 +++- src/mcp/tools/testCaseGenerate.ts | 27 +++- src/mcp/tools/testCaseValidate.ts | 40 +++-- test/unit/mcp/bestPracticesEngine.test.ts | 24 ++- test/unit/mcp/qualityThreshold.test.ts | 55 +++++++ test/unit/mcp/testCaseGenerate.test.ts | 19 ++- test/unit/mcp/testCaseValidate.test.ts | 147 +++++++++++++++++- test/unit/mcp/testPlanValidate.test.ts | 2 +- test/unit/mcp/testSuiteValidate.test.ts | 36 ++++- 11 files changed, 400 insertions(+), 89 deletions(-) create mode 100644 test/unit/mcp/qualityThreshold.test.ts diff --git a/docs/mcp.md b/docs/mcp.md index 55cd297d..9489bf08 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -190,18 +190,19 @@ sf provar mcp start -a /workspace/project-a -a /workspace/project-b The MCP server reads the following environment variables at startup or during tool invocation. Internal/dev-only variables (license bypass, ALGAS dev credentials) are intentionally not documented here — they remain source-only and are not supported for production use. -| Variable | Purpose | Default | -| ---------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------- | -| `PROVAR_HOME` | Provar Automation install root. Used to locate license files (`/.licenses/*.properties`) and resolve home-relative tool defaults. | `~/Provar` (`%USERPROFILE%\Provar` on Windows) | -| `PROVAR_API_KEY` | API key for Quality Hub validation. Takes priority over any stored key in `~/.provar/credentials.json`. Must start with `pv_k_` — any other value is ignored. | None — falls back to stored credentials | -| `PROVAR_QUALITY_HUB_URL` | Override the Quality Hub API base URL. Set when pointing at a non-default Quality Hub environment. | Dev API Gateway URL (`/dev`) | -| `PROVAR_MCP_TOOLS` | Comma-separated list of tool groups to register at startup. Deep-dive: [Tool group filtering](#tool-group-filtering-provar_mcp_tools). | All groups registered | -| `PROVAR_MCP_SCHEMA_MODE` | Set to `compact` to shorten all tool descriptions. Deep-dive: [Compact descriptions](#compact-descriptions-provar_mcp_schema_mode). | Standard (full) descriptions | -| `PROVAR_MCP_MAX_TOOL_DEPTH` | Agentic loop guard — max tool calls per MCP session before further calls return `TOOL_BUDGET_EXCEEDED`. Deep-dive: [Agentic loop guard](#agentic-loop-guard-provar_mcp_max_tool_depth). | `50` | -| `PROVAR_MCP_EMIT_TOKEN_META` | When `true`, appends a `_meta` token-attribution block to every tool response. Deep-dive: [Per-call token attribution](#per-call-token-attribution-provar_mcp_emit_token_meta). | unset (no `_meta` block) | -| `PROVAR_MCP_VALIDATION_DIR` | Override the directory where `provar_testcase_validate` writes validation diff artifacts. | `/.provar-mcp/validation/` | -| `PROVAR_NO_UPDATE_CHECK` | When set (any non-empty value), skips the startup npm-registry update check. Same effect as `--no-update-check`. | unset (check runs) | -| `PROVAR_AUTO_DEFECTS` | When `1`, enables the Quality Hub auto-defect creation flow. Normally set by passing the `--auto-defects` flag rather than directly. | unset (auto-defects disabled) | +| Variable | Purpose | Default | +| ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------- | +| `PROVAR_HOME` | Provar Automation install root. Used to locate license files (`/.licenses/*.properties`) and resolve home-relative tool defaults. | `~/Provar` (`%USERPROFILE%\Provar` on Windows) | +| `PROVAR_API_KEY` | API key for Quality Hub validation. Takes priority over any stored key in `~/.provar/credentials.json`. Must start with `pv_k_` — any other value is ignored. | None — falls back to stored credentials | +| `PROVAR_QUALITY_HUB_URL` | Override the Quality Hub API base URL. Set when pointing at a non-default Quality Hub environment. | Dev API Gateway URL (`/dev`) | +| `PROVAR_MCP_TOOLS` | Comma-separated list of tool groups to register at startup. Deep-dive: [Tool group filtering](#tool-group-filtering-provar_mcp_tools). | All groups registered | +| `PROVAR_MCP_SCHEMA_MODE` | Set to `compact` to shorten all tool descriptions. Deep-dive: [Compact descriptions](#compact-descriptions-provar_mcp_schema_mode). | Standard (full) descriptions | +| `PROVAR_MCP_MAX_TOOL_DEPTH` | Agentic loop guard — max tool calls per MCP session before further calls return `TOOL_BUDGET_EXCEEDED`. Deep-dive: [Agentic loop guard](#agentic-loop-guard-provar_mcp_max_tool_depth). | `50` | +| `PROVAR_MCP_EMIT_TOKEN_META` | When `true`, appends a `_meta` token-attribution block to every tool response. Deep-dive: [Per-call token attribution](#per-call-token-attribution-provar_mcp_emit_token_meta). | unset (no `_meta` block) | +| `PROVAR_MCP_VALIDATION_DIR` | Override the directory where `provar_testcase_validate` writes validation diff artifacts. | `/.provar-mcp/validation/` | +| `PROVAR_MCP_QUALITY_THRESHOLD` | Minimum `quality_score` for a test case to count as `valid` (vs `needs_improvement`) across all validation tools. A per-call `quality_threshold` argument overrides it. Out-of-range or unparseable values are ignored. | `90` | +| `PROVAR_NO_UPDATE_CHECK` | When set (any non-empty value), skips the startup npm-registry update check. Same effect as `--no-update-check`. | unset (check runs) | +| `PROVAR_AUTO_DEFECTS` | When `1`, enables the Quality Hub auto-defect creation flow. Normally set by passing the `--auto-defects` flag rather than directly. | unset (auto-defects disabled) | ### Setting these in your MCP client config @@ -1011,37 +1012,43 @@ Validates an XML test case for schema correctness (validity score) and best prac **Input** -| Parameter | Type | Required | Description | -| ----------------- | --------------------------------- | ------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `content` | string | one of `content`/`xml`/`file_path` required | XML content to validate (MCP field name) | -| `xml` | string | one of `content`/`xml`/`file_path` required | XML content to validate (API-compatible alias) | -| `file_path` | string | one of `content`/`xml`/`file_path` required | Path to the `.testcase` XML file | -| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: is_valid, scores, and stop signal only. `"standard"`/`"full"`: full issues list (default). | -| `baseline_run_id` | string | no | `run_id` from a previous call. Returns only new/resolved issues since that run (`{ added, resolved, unchanged_count, run_id }`). Returns `BASELINE_NOT_FOUND` if the run ID is unknown. | +| Parameter | Type | Required | Description | +| ------------------- | --------------------------------- | ------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `content` | string | one of `content`/`xml`/`file_path` required | XML content to validate (MCP field name) | +| `xml` | string | one of `content`/`xml`/`file_path` required | XML content to validate (API-compatible alias) | +| `file_path` | string | one of `content`/`xml`/`file_path` required | Path to the `.testcase` XML file | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: is_valid, scores, and stop signal only. `"standard"`/`"full"`: full issues list (default). | +| `quality_threshold` | number (0–100) | no | Minimum `quality_score` for `status` to be `"valid"` rather than `"needs_improvement"`. Does **not** affect `is_valid` (only critical defects do). Precedence: this arg → `PROVAR_MCP_QUALITY_THRESHOLD` env → `90`. | +| `baseline_run_id` | string | no | `run_id` from a previous call. Returns only new/resolved issues since that run (`{ added, resolved, unchanged_count, run_id }`). Returns `BASELINE_NOT_FOUND` if the run ID is unknown. | **Output** -| Field | Type | Description | -| -------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------ | -| `run_id` | string | Stable identifier for this validation run. Pass as `baseline_run_id` in the next call to receive only new/resolved issues. | -| `completeness_score` | number (0–1) | Ratio of valid test cases to total test cases validated (`0.0`–`1.0`). | -| `recommended_next_action` | string | `"stop"` (all passing), `"continue"` (issues remain), or `"escalate"` (no baseline yet — run without `baseline_run_id` first). | -| `is_valid` | boolean | `true` if zero ERROR-level schema violations | -| `validity_score` | number (0–100) | Schema compliance score (100 − errorCount × 20) | -| `quality_score` | number (0–100) | Best-practices score (weighted deduction formula) | -| `error_count` | integer | Schema error count | -| `warning_count` | integer | Schema warning count | -| `step_count` | integer | Number of `` steps | -| `test_case_id` | string | Value of the `id` attribute | -| `test_case_name` | string | Value of the `name` attribute | -| `issues` | array | Schema issues with `rule_id`, `severity`, `message` | -| `best_practices_violations` | array | Best-practices violations with `rule_id`, `severity`, `weight`, `message` | -| `best_practices_rules_evaluated` | integer | How many best-practices rules were checked | -| `validation_source` | string | `quality_hub`, `local`, or `local_fallback` — see Authentication section | -| `validation_warning` | string | Present when `validation_source` is `local` (onboarding) or `local_fallback` (explains why API failed) | +| Field | Type | Description | +| -------------------------------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `run_id` | string | Stable identifier for this validation run. Pass as `baseline_run_id` in the next call to receive only new/resolved issues. | +| `completeness_score` | number (0–1) | Ratio of valid test cases to total test cases validated (`0.0`–`1.0`). | +| `recommended_next_action` | string | `"stop"` (all passing), `"continue"` (issues remain), or `"escalate"` (no baseline yet — run without `baseline_run_id` first). | +| `is_valid` | boolean | `true` if the test case will load in Provar — i.e. zero ERROR-level schema violations **and** zero `critical` best-practice violations (the latter are bridged into `issues[]`; see note below) | +| `status` | string | Tri-state verdict: `"invalid"` (a critical defect — will not load), `"needs_improvement"` (loads but `quality_score` is below `quality_threshold`), or `"valid"` (loads and clears the bar) | +| `quality_threshold` | number (0–100) | The effective threshold applied (resolved from the `quality_threshold` arg, the `PROVAR_MCP_QUALITY_THRESHOLD` env var, or the default `90`) | +| `meets_quality_threshold` | boolean | `true` when `quality_score >= quality_threshold` | +| `validity_score` | number (0–100) | Schema compliance score (100 − errorCount × 20) | +| `quality_score` | number (0–100) | Best-practices score (weighted deduction formula) | +| `error_count` | integer | Schema error count | +| `warning_count` | integer | Schema warning count | +| `step_count` | integer | Number of `` steps | +| `test_case_id` | string | Value of the `id` attribute | +| `test_case_name` | string | Value of the `name` attribute | +| `issues` | array | Schema issues with `rule_id`, `severity`, `message` | +| `best_practices_violations` | array | Best-practices violations with `rule_id`, `severity`, `weight`, `message` | +| `best_practices_rules_evaluated` | integer | How many best-practices rules were checked | +| `validation_source` | string | `quality_hub`, `local`, or `local_fallback` — see Authentication section | +| `validation_warning` | string | Present when `validation_source` is `local` (onboarding) or `local_fallback` (explains why API failed) | **Key schema rules:** TC_001 (missing XML declaration), TC_002 (malformed XML), TC_003 (wrong root element), TC_010/011/012 (missing/invalid id/guid), TC_031 (invalid apiCall guid), TC_034/035 (non-integer testItemId). +**Validity bridge.** A `critical` best-practice violation means "Provar will not load/render the test case" (e.g. an unknown/hallucinated `apiId`, a missing required control or connection argument, an invalid render value-class). These now gate `is_valid` the same way a structural schema error does — each is surfaced into `issues[]` as an `ERROR` and flips `is_valid` to `false`. `major` (runtime ERROR), `minor` (warning), and `info` violations do **not** flip `is_valid`; they affect `quality_score` and therefore the `needs_improvement` verdict. Best-practice rules covering concepts the schema rules already own (root element, identifier, steps presence, `testItemId` integers, comparisonType enums) are intentionally **not** double-reported — the hand-coded schema rule is authoritative there. + **Warning rules:** - **DATA-001** — `testCase` declares a `` element. When the validator is called with `file_path` and the project's `provardx-properties.json` references that test case directly via top-level `testCase` or `testCases` (rather than via a `.testinstance` inside a plan), the warning carries the `WARNING [DATA-001]:` prefix and recommends wiring the test into a plan via `provar_testplan_add-instance`. When `file_path` is not supplied (or the project context cannot be resolved), the warning falls back to a structural advisory recommending `SetValues` (Test scope) steps. The warning is suppressed entirely when a `.testinstance` references the test case, because data-driven iteration works in that mode. See also [Data-driven execution](#data-driven-execution). @@ -1110,15 +1117,15 @@ Validates a Provar test suite — checks for empty suites, duplicate names (with **Input** -| Parameter | Type | Required | Description | -| ------------------- | --------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | -| `suite_name` | string | yes | Name of the test suite | -| `test_cases` | array | no | Test cases directly in this suite. Each item: `{ name, xml_content \| xml }` | -| `child_suites` | array | no | Child suites (up to 2 levels of nesting). Each item: `{ name, test_cases?, test_suites?, test_case_count? }` | -| `test_case_count` | integer | no | Override total count for the size check (useful when not sending full XML) | -| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be "valid" (default: 80) | -| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: name, scores, and stop signal only. `"standard"`/`"full"`: full violations and per-test-case results (default). | -| `baseline_run_id` | string | no | `run_id` from a previous call. Returns only new/resolved violations since that run. Returns `BASELINE_NOT_FOUND` if the run ID is unknown. | +| Parameter | Type | Required | Description | +| ------------------- | --------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `suite_name` | string | yes | Name of the test suite | +| `test_cases` | array | no | Test cases directly in this suite. Each item: `{ name, xml_content \| xml }` | +| `child_suites` | array | no | Child suites (up to 2 levels of nesting). Each item: `{ name, test_cases?, test_suites?, test_case_count? }` | +| `test_case_count` | integer | no | Override total count for the size check (useful when not sending full XML) | +| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be `valid` rather than `needs_improvement`. Precedence: this arg → `PROVAR_MCP_QUALITY_THRESHOLD` env → `90`. | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: name, scores, and stop signal only. `"standard"`/`"full"`: full violations and per-test-case results (default). | +| `baseline_run_id` | string | no | `run_id` from a previous call. Returns only new/resolved violations since that run. Returns `BASELINE_NOT_FOUND` if the run ID is unknown. | **Output** — `{ run_id, completeness_score, recommended_next_action, name, level: "suite", quality_score, violations[], test_cases[], test_suites[], summary }` @@ -1145,7 +1152,7 @@ Validates a Provar test plan — checks for empty plans, duplicate suite names, | `test_cases` | array | no | Test cases directly in this plan | | `test_suite_count` | integer | no | Override suite count for the size check | | `metadata` | object | no | Plan completeness metadata (see below) | -| `quality_threshold` | number (0–100) | no | Minimum quality score (default: 80) | +| `quality_threshold` | number (0–100) | no | Minimum quality score for `valid` vs `needs_improvement`. Precedence: this arg → `PROVAR_MCP_QUALITY_THRESHOLD` env → `90`. | | `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: name, scores, and stop signal only. `"standard"`/`"full"`: full violations and hierarchy results (default). | **`metadata` fields** @@ -1183,7 +1190,7 @@ Validates a Provar project directly from its directory on disk. Reads the plan/s | Parameter | Type | Required | Description | | ---------------------- | --------------------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `project_path` | string | yes | Absolute path to the Provar project root (directory containing `.testproject`) | -| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be considered valid (default: 80) | +| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be considered valid. Precedence: this arg → `PROVAR_MCP_QUALITY_THRESHOLD` env → `90`. | | `save_results` | boolean | no | Write a QH-compatible JSON report to `{project_path}/provardx/validation/` (default: true) | | `results_dir` | string | no | Override the output directory for the saved report (must be within `allowed-paths`) | | `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: key scores and stop signal only. `"standard"`: slim violation summary (default). `"full"`: full per-suite and per-test-case data. | diff --git a/src/mcp/rules/provar_best_practices_rules.json b/src/mcp/rules/provar_best_practices_rules.json index c8b79897..228d3239 100644 --- a/src/mcp/rules/provar_best_practices_rules.json +++ b/src/mcp/rules/provar_best_practices_rules.json @@ -2963,7 +2963,7 @@ "id": "VAR-STRING-LITERAL-001", "category": "TestCaseDesign", "name": "Variable reference stored as plain string", - "description": "An argument value contains a {VarName} or {Obj.Field} pattern stored as class=\"value\" valueClass=\"string\". Provar passes the literal string to the API instead of resolving the variable — the step will silently receive the wrong input.", + "description": "An argument value contains a {VarName} or {Obj.Field} pattern stored as class=\"value\" valueClass=\"string\". Provar passes the literal string to the API instead of resolving the variable. In data arguments the step silently receives the wrong input; in UI target arguments (sfUiTargetObjectId, sfUiTargetResultName) the literal {VarName} lands in the URL as %7BVarName%7D and the step fails at runtime (record 'no longer available'). The test case still loads, so this is a runtime execution error (major), not a load failure (critical) or a style warning (minor).", "appliesTo": ["Step"], "severity": "major", "weight": 5, diff --git a/src/mcp/tools/bestPracticesEngine.ts b/src/mcp/tools/bestPracticesEngine.ts index 0829f5b1..8a7c0a6e 100644 --- a/src/mcp/tools/bestPracticesEngine.ts +++ b/src/mcp/tools/bestPracticesEngine.ts @@ -1327,6 +1327,12 @@ const VALUE_CLASS_CASING_VALID: ReadonlySet = new Set([ 'not', ]); +// Canonical Provar spelling for valueClasses whose correct form is NOT all-lowercase. +// The corpus and RENDER-DATE-VALUECLASS-001 / VALUE-CLASS-001 all use camelCase `dateTime`, +// so the lowercase-enforcement below must expect `dateTime`, not `datetime`. (The QH back-end +// lowercases all classes — flag a matching correction there; PDX-509.) +const VALUE_CLASS_CANONICAL_CASE: Record = { datetime: 'dateTime' }; + /** RENDER-CASE-001 — a known valueClass spelled with wrong case (e.g. `Boolean` → `boolean`). */ function validateValueClassCasing(tc: XmlNode, rule: BPRule): BPViolation | null { const offenders: Array<{ valueClass: string; expected: string }> = []; @@ -1334,7 +1340,9 @@ function validateValueClassCasing(tc: XmlNode, rule: BPRule): BPViolation | null const vc = v['@_valueClass'] as string | undefined; if (!vc) continue; const lower = vc.toLowerCase(); - if (VALUE_CLASS_CASING_VALID.has(lower) && vc !== lower) offenders.push({ valueClass: vc, expected: lower }); + if (!VALUE_CLASS_CASING_VALID.has(lower)) continue; + const expected = VALUE_CLASS_CANONICAL_CASE[lower] ?? lower; + if (vc !== expected) offenders.push({ valueClass: vc, expected }); } if (!offenders.length) return null; const MAX = 5; @@ -1668,23 +1676,35 @@ function resolvedArgText(call: XmlNode, argId: string): string { return nodeText(v); } +// Matches a bare variable token only: `{Name}` or `{Obj.Field}`. The character +// class `[\w.]` excludes `:`, so binding-style expressions such as +// `{targetUrl:object}` never match — they are inherently safe and need no +// argument-name exemption. const VAR_LITERAL_PATTERN = /^\{[\w.]+\}$/; -const VAR_LITERAL_TOLERATED_ARGS: ReadonlySet = new Set(['sfUiTargetObjectId', 'sfUiTargetResultName']); /** * VAR-STRING-LITERAL-001 — a `{Var}`/`{Obj.Field}` token stored as - * `class="value" valueClass="string"` instead of `class="variable"`. Emits ONE - * violation per offending value (the back-end returns a list), so score parity - * is preserved; the two interpolation-tolerant target args are exempt. + * `class="value" valueClass="string"` instead of `class="variable"`. Provar does + * not resolve it at runtime, so the API silently receives the literal text. Emits + * ONE violation per offending value (the back-end returns a list). + * + * Local correction (PDX-508): the back-end exempts `sfUiTargetObjectId` / + * `sfUiTargetResultName` from this check, but field evidence shows a bare + * `{Variable}` in those UI-target args is NOT interpolated — it lands in the URL + * as `%7B…%7D` and the step hard-fails (a load/exec stopper, not a warning). We + * therefore do NOT exempt those args. Binding-style `{ns:key}` expressions stay + * safe because {@link VAR_LITERAL_PATTERN} already excludes them (the colon). A + * matching change is queued for the Quality Hub back-end so the score parity is + * restored once both ship. */ function validateVarStringLiteral(tc: XmlNode, rule: BPRule): BPViolation[] { const violations: BPViolation[] = []; for (const call of getAllApiCalls(tc)) { const ctx = stepContext(call); - for (const { value, argId } of getStepValueElements(call)) { + for (const { value } of getStepValueElements(call)) { if (value['@_class'] !== 'value' || value['@_valueClass'] !== 'string') continue; const text = nodeText(value); - if (!VAR_LITERAL_PATTERN.test(text) || VAR_LITERAL_TOLERATED_ARGS.has(argId)) continue; + if (!VAR_LITERAL_PATTERN.test(text)) continue; violations.push( makeViolation( rule, diff --git a/src/mcp/tools/testCaseGenerate.ts b/src/mcp/tools/testCaseGenerate.ts index 49517d90..b61afe93 100644 --- a/src/mcp/tools/testCaseGenerate.ts +++ b/src/mcp/tools/testCaseGenerate.ts @@ -226,9 +226,9 @@ const TOOL_DESCRIPTION = [ 'target argument (UiWithScreen/UiWithRow): pass the URI value; emitted as class="uiTarget" uri="...".', 'locator argument (UiDoAction/UiAssert/UiRead/UiFill): pass the URI value; emitted as class="uiLocator" uri="...".', 'valueClass auto-detection: argument values are typed automatically before XML emission. ' + - 'ISO-8601 date "YYYY-MM-DD" → valueClass="date"; ISO-8601 datetime "YYYY-MM-DDTHH:MM:SS" (optional fractional seconds + timezone) → "datetime"; ' + + 'ISO-8601 date "YYYY-MM-DD" → valueClass="date"; ISO-8601 datetime "YYYY-MM-DDTHH:MM:SS" (optional fractional seconds + timezone) → "dateTime"; ' + '"true"/"false" → "boolean"; numeric string (e.g. "42", "-5", "3.14") → "decimal"; otherwise "string". ' + - 'Pass dates / booleans / numbers in those formats — Provar runtime silently discards date fields emitted as valueClass="string". ' + + 'Pass dates in ISO-8601 — the generator converts them to the epoch-millisecond value Provar requires (a date/dateTime field emitted as an ISO string, or as valueClass="string", fails to load in the IDE). A bare date is treated as UTC midnight; a datetime without a timezone is treated as UTC. ' + 'Note: numbers always emit valueClass="decimal" per the canonical Provar reference (there is no separate "integer" valueClass).', 'Edit page objects: action=Edit targets require a compiled page object for the SF object. ' + 'If none exists in the project page-objects directory, the locator binding will fail at runtime. ' + @@ -569,9 +569,32 @@ function buildArgumentValue(key: string, val: string, indent: string, inNamedVal // `fieldTypeHint` parameter on `inferSalesforceValueClass` is intentionally not threaded // through here yet — it lands in PDX-492 (H2b) along with the `field_type_hints` tool input. const inferred = inferSalesforceValueClass(key, val); + if (inferred === 'date' || inferred === 'datetime') { + // PDX-509: Provar stores date/dateTime as epoch MILLISECONDS, never an ISO string — + // an ISO string fails to load in the IDE (RENDER-DATE-VALUECLASS-001), and the real + // corpus uses epoch ms exclusively with camelCase `dateTime`. Convert here. + const ms = isoToEpochMs(val, inferred); + const valueClass = inferred === 'datetime' ? 'dateTime' : 'date'; + const emitted = ms !== null ? String(ms) : escapeXmlContent(val); + return `${indent}${emitted}`; + } return `${indent}${escapeXmlContent(val)}`; } +/** + * Convert a validated ISO-8601 date / datetime string to epoch milliseconds. + * A date-only string ("YYYY-MM-DD") is parsed as UTC midnight per the ES spec. A + * datetime WITHOUT an explicit timezone would otherwise parse as machine-local + * time (non-deterministic), so we pin it to UTC by appending 'Z' — matching the + * Provar corpus, where date/dateTime values are stored as UTC epoch ms. + * Returns null if the value cannot be parsed (caller falls back to the raw text). + */ +function isoToEpochMs(val: string, kind: 'date' | 'datetime'): number | null { + const needsUtc = kind === 'datetime' && !/(Z|[+-]\d{2}:?\d{2})$/.test(val); + const ms = Date.parse(needsUtc ? `${val}Z` : val); + return Number.isNaN(ms) ? null : ms; +} + function buildArgumentsXml(attributes: Record, baseIndent = ' ', apiId = ''): string { const entries = Object.entries(attributes); if (entries.length === 0) return ''; diff --git a/src/mcp/tools/testCaseValidate.ts b/src/mcp/tools/testCaseValidate.ts index 189e17ac..94f8336d 100644 --- a/src/mcp/tools/testCaseValidate.ts +++ b/src/mcp/tools/testCaseValidate.ts @@ -968,25 +968,34 @@ function validateComparisonTypes(tc: Record, issues: Validation * historically that severity only moved `quality_score` and never gated * `is_valid`. Per the canonical taxonomy, `critical` means "Provar will not * load/render the test case" — exactly the class of defect that MUST flip - * validity. This map records, for each `critical` BP rule, the hand-coded - * Layer-1 rule(s) that already cover the same defect; when one of those issues - * is already present we suppress the bridge so the failure is reported once, - * not twice. + * validity. The bridge surfaces every un-suppressed `critical` BP violation as + * an ERROR issue so it gates `is_valid`. * * `major`/`minor`/`info` are intentionally NOT bridged — a major is a runtime * ERROR that still loads, so it influences the quality score (and the * `needs_improvement` verdict) without flipping `is_valid`. + * + * These BP rules cover concepts the hand-coded Layer-1 checks already OWN + * (root element, identifier, steps presence, testItemId integers, comparisonType + * enums). Layer-1 is authoritative for those: it fires only on the genuinely + * load-blocking condition (e.g. TC_020 fires on a MISSING , but an + * empty-but-present still loads — it just does nothing), whereas the + * ported BP rule is coarser (VALID-STEPS-001 also flags an empty , a + * "does nothing" design smell, not a load failure). So we never bridge these — + * Layer-1 already gates the load-blocking case, and the BP version still counts + * toward quality_score. The bridge's real value is the criticals Layer-1 does + * NOT check (unknown apiId, missing required control/connection args, invalid + * render value-class casing, NitroX connect args…). */ -const BRIDGE_SUPPRESSED_BY: Record = { - 'SCHEMA-ROOT-001': ['TC_003'], - 'SCHEMA-STEPS-001': ['TC_020'], - 'VALID-STEPS-001': ['TC_020'], - 'SCHEMA-ID-001': ['TC_010', 'TC_011', 'TC_012'], - 'VALID-GUID-001': ['TC_010', 'TC_011', 'TC_012'], - 'STEP-ITEMID-001': ['TC_034', 'TC_035'], - // comparisonType enum BP rule (deferred) maps to the hand-coded Layer-1 check. - 'COMPARISON-TYPE-ENUM-001': ['COMPARISON-TYPE-001'], -}; +const LAYER1_OWNED_BP_RULES: ReadonlySet = new Set([ + 'SCHEMA-ROOT-001', // TC_003 owns root element + 'SCHEMA-STEPS-001', // TC_020 owns steps presence + 'VALID-STEPS-001', + 'SCHEMA-ID-001', // TC_010/TC_011/TC_012 own identifier + 'VALID-GUID-001', + 'STEP-ITEMID-001', // TC_034/TC_035 own testItemId + 'COMPARISON-TYPE-ENUM-001', // COMPARISON-TYPE-001 owns comparisonType enums (deferred BP rule) +]); /** Map a Best-Practices `appliesTo` token to the issue `applies_to` vocabulary. */ const BP_APPLIES_TO_ISSUE: Record = { @@ -1011,8 +1020,7 @@ function bridgeCriticalViolations( for (const v of bpViolations) { if (v.severity !== 'critical') continue; if (present.has(v.rule_id)) continue; - const suppressors = BRIDGE_SUPPRESSED_BY[v.rule_id]; - if (suppressors && suppressors.some((id) => present.has(id))) continue; + if (LAYER1_OWNED_BP_RULES.has(v.rule_id)) continue; // Layer-1 is authoritative for these issues.push({ rule_id: v.rule_id, severity: 'ERROR', diff --git a/test/unit/mcp/bestPracticesEngine.test.ts b/test/unit/mcp/bestPracticesEngine.test.ts index 15eb708c..283a5252 100644 --- a/test/unit/mcp/bestPracticesEngine.test.ts +++ b/test/unit/mcp/bestPracticesEngine.test.ts @@ -1051,7 +1051,11 @@ ${stepsXml} 'VAR-STRING-LITERAL-001' ); assert.ok(v, 'Expected VAR-STRING-LITERAL-001 to fire for {AccountId}'); - assert.equal(v?.severity, 'major'); + assert.equal( + v?.severity, + 'major', + 'a runtime execution error (the test still loads) — major, not minor/critical' + ); }); it('emits one violation per offending value (back-end returns a list)', () => { @@ -1076,14 +1080,28 @@ ${stepsXml} assert.ok(!v, `A class="variable" reference should pass, got: ${v?.message}`); }); - it('does not fire for the interpolation-tolerant args (sfUiTargetObjectId)', () => { + it('fires for UI-target args (sfUiTargetObjectId) — a bare {Var} is NOT interpolated there', () => { + // Field evidence: a literal {AccountId} in sfUiTargetObjectId lands in the + // URL as %7BAccountId%7D and the step hard-fails. The back-end exemption was + // wrong; we removed it locally, so the rule must now fire. const xml = buildTc(` {AccountId} `); const v = find(runBestPractices(xml).violations, 'VAR-STRING-LITERAL-001'); - assert.ok(!v, 'sfUiTargetObjectId tolerates {Var} interpolation'); + assert.ok(v, 'sfUiTargetObjectId must flag a bare {Var} literal — it is not interpolated at runtime'); + assert.equal(v?.severity, 'major'); + }); + + it('still passes for binding-style {ns:key} expressions (the colon is excluded)', () => { + const xml = buildTc(` + + {targetUrl:object} + + `); + const v = find(runBestPractices(xml).violations, 'VAR-STRING-LITERAL-001'); + assert.ok(!v, 'binding-style {ns:key} is safe and must not be flagged'); }); it('does not fire when the surrounding text is more than a bare token', () => { diff --git a/test/unit/mcp/qualityThreshold.test.ts b/test/unit/mcp/qualityThreshold.test.ts new file mode 100644 index 00000000..34008e66 --- /dev/null +++ b/test/unit/mcp/qualityThreshold.test.ts @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +import { strict as assert } from 'node:assert'; +import { describe, it, afterEach } from 'mocha'; +import { resolveQualityThreshold, DEFAULT_QUALITY_THRESHOLD } from '../../../src/mcp/utils/qualityThreshold.js'; + +describe('resolveQualityThreshold (PDX-509)', () => { + const saved = process.env.PROVAR_MCP_QUALITY_THRESHOLD; + + afterEach(() => { + if (saved !== undefined) process.env.PROVAR_MCP_QUALITY_THRESHOLD = saved; + else delete process.env.PROVAR_MCP_QUALITY_THRESHOLD; + }); + + it('defaults to 90', () => { + delete process.env.PROVAR_MCP_QUALITY_THRESHOLD; + assert.equal(DEFAULT_QUALITY_THRESHOLD, 90); + assert.equal(resolveQualityThreshold(), 90); + }); + + it('honours a valid per-call arg over everything', () => { + process.env.PROVAR_MCP_QUALITY_THRESHOLD = '50'; + assert.equal(resolveQualityThreshold(80), 80); + }); + + it('falls back to the env var when no arg is given', () => { + process.env.PROVAR_MCP_QUALITY_THRESHOLD = '70'; + assert.equal(resolveQualityThreshold(), 70); + }); + + it('ignores an out-of-range arg and falls through to env, then default', () => { + process.env.PROVAR_MCP_QUALITY_THRESHOLD = '65'; + assert.equal(resolveQualityThreshold(150), 65, 'arg 150 is out of range → env'); + delete process.env.PROVAR_MCP_QUALITY_THRESHOLD; + assert.equal(resolveQualityThreshold(-5), 90, 'arg -5 is out of range, no env → default'); + }); + + it('ignores an unparseable or out-of-range env var', () => { + process.env.PROVAR_MCP_QUALITY_THRESHOLD = 'not-a-number'; + assert.equal(resolveQualityThreshold(), 90); + process.env.PROVAR_MCP_QUALITY_THRESHOLD = '999'; + assert.equal(resolveQualityThreshold(), 90); + }); + + it('accepts the boundary values 0 and 100', () => { + delete process.env.PROVAR_MCP_QUALITY_THRESHOLD; + assert.equal(resolveQualityThreshold(0), 0); + assert.equal(resolveQualityThreshold(100), 100); + }); +}); diff --git a/test/unit/mcp/testCaseGenerate.test.ts b/test/unit/mcp/testCaseGenerate.test.ts index 98e32a8a..ec262b58 100644 --- a/test/unit/mcp/testCaseGenerate.test.ts +++ b/test/unit/mcp/testCaseGenerate.test.ts @@ -407,17 +407,20 @@ describe('provar_testcase_generate', () => { assert.ok(!xml.includes('Test &'), 'escaped name must not appear in XML'); }); - it('escapes XML special characters in step api_id and name', () => { + it('escapes XML special characters in the step name', () => { + // A real apiId never contains XML metacharacters (and an unknown apiId is now + // a load-blocking error via the bridge), so escaping is exercised through the + // step name — which legitimately can contain &, <, > and ". const result = server.call('provar_testcase_generate', { test_case_name: 'Escape Step Test', - steps: [{ api_id: 'Api', name: 'Step & "Name"', attributes: {} }], + steps: [{ api_id: 'UiConnect', name: 'Step & ', attributes: {} }], dry_run: true, overwrite: false, }); const xml = parseText(result)['xml_content'] as string; - assert.ok(xml.includes('<') && xml.includes('>'), 'Expected < > escaped in apiId'); assert.ok(xml.includes('&'), 'Expected & escaped in step name'); + assert.ok(xml.includes('<') && xml.includes('>'), 'Expected < > escaped in step name'); }); }); @@ -794,9 +797,10 @@ describe('provar_testcase_generate', () => { }); const xml = parseText(result)['xml_content'] as string; + // Provar stores dates as epoch milliseconds (UTC midnight), not an ISO string. assert.ok( - xml.includes('valueClass="date">2026-05-19'), - `Expected valueClass="date" for ISO date; got: ${xml}` + xml.includes('valueClass="date">1779148800000'), + `Expected valueClass="date" with epoch ms for ISO date; got: ${xml}` ); }); @@ -815,9 +819,10 @@ describe('provar_testcase_generate', () => { }); const xml = parseText(result)['xml_content'] as string; + // Provar uses camelCase valueClass="dateTime" with an epoch-ms value (UTC when no tz). assert.ok( - xml.includes('valueClass="datetime">2026-05-19T10:30:00'), - `Expected valueClass="datetime" for ISO datetime; got: ${xml}` + xml.includes('valueClass="dateTime">1779186600000'), + `Expected valueClass="dateTime" with epoch ms for ISO datetime; got: ${xml}` ); }); diff --git a/test/unit/mcp/testCaseValidate.test.ts b/test/unit/mcp/testCaseValidate.test.ts index b568aa02..6adc8b7f 100644 --- a/test/unit/mcp/testCaseValidate.test.ts +++ b/test/unit/mcp/testCaseValidate.test.ts @@ -11,6 +11,7 @@ import { registerTestCaseValidate, validateTestCaseXml, } from '../../../src/mcp/tools/testCaseValidate.js'; +import { runBestPractices } from '../../../src/mcp/tools/bestPracticesEngine.js'; import type { ServerConfig } from '../../../src/mcp/server.js'; import { ASSERT_VALUES_COMPARISON_TYPES, @@ -31,8 +32,8 @@ const VALID_TC = ` - - + + `; @@ -1311,6 +1312,75 @@ describe('validateTestCase', () => { }); }); +// ── PDX-509: validity bridge (critical BP → is_valid) ───────────────────────── + +describe('PDX-509 — validity bridge', () => { + const UNKNOWN_API_TC = ` + + + + +`; + + // Empty-but-present : Layer-1 TC_020 does NOT fire (steps element exists), + // and the critical VALID-STEPS-001 BP rule is Layer-1-owned, so it must NOT be bridged. + const EMPTY_STEPS_TC = ` + + +`; + + // A {Var} stored as a plain string is VAR-STRING-LITERAL-001 (major) — must NOT gate is_valid. + const MAJOR_ONLY_TC = ` + + + + + {AccountId} + + + +`; + + it('a critical best-practice violation (unknown apiId) flips is_valid=false and surfaces in issues[]', () => { + const r = validateTestCase(UNKNOWN_API_TC); + assert.equal(r.is_valid, false, 'unknown apiId is load-blocking → is_valid must be false'); + const issue = r.issues.find((i) => i.rule_id === 'API-UNKNOWN-001'); + assert.ok(issue, 'API-UNKNOWN-001 should be bridged into issues[]'); + assert.equal(issue?.severity, 'ERROR'); + // The bridged critical also remains in best_practices_violations[] for scoring parity. + assert.ok( + (r.best_practices_violations ?? []).some((v) => v.rule_id === 'API-UNKNOWN-001'), + 'bridged critical stays in best_practices_violations[] (score parity)' + ); + }); + + it('a major best-practice violation does NOT flip is_valid', () => { + const r = validateTestCase(MAJOR_ONLY_TC); + assert.equal(r.is_valid, true, 'a major (runtime) violation loads — is_valid stays true'); + assert.ok( + (r.best_practices_violations ?? []).some((v) => v.rule_id === 'VAR-STRING-LITERAL-001'), + 'the major violation is still reported' + ); + }); + + it('a Layer-1-owned critical (empty ) is NOT bridged — Layer-1 is authoritative', () => { + const r = validateTestCase(EMPTY_STEPS_TC); + // TC_020 only fires on a MISSING ; an empty-but-present loads. + assert.equal(r.is_valid, true, 'empty-but-present loads — is_valid stays true'); + assert.equal( + r.issues.find((i) => i.rule_id === 'VALID-STEPS-001'), + undefined, + 'VALID-STEPS-001 must not be surfaced into issues[] (Layer-1 owns steps presence)' + ); + }); + + it('bridging does not perturb quality_score (Lambda parity preserved)', () => { + const r = validateTestCase(UNKNOWN_API_TC); + const bp = runBestPractices(UNKNOWN_API_TC); + assert.equal(r.quality_score, bp.quality_score, 'quality_score is computed from the untouched BP list'); + }); +}); + // ── Handler-level tests (registerTestCaseValidate) ──────────────────────────── describe('registerTestCaseValidate handler', () => { @@ -1372,6 +1442,79 @@ describe('registerTestCaseValidate handler', () => { assert.ok(warning.includes('Quality Hub'), 'Warning must mention Quality Hub'); }); + describe('PDX-509 — tri-state status + quality_threshold', () => { + const BAD_TC = ` + + + + +`; + + // Loadable (is_valid true) but carries a major (VAR-STRING-LITERAL-001) so quality_score < 100. + const NEEDS_IMPROVEMENT_TC = ` + + + + + {AccountId} + + + +`; + + async function validate(args: Record): Promise> { + const res = (await capServer.capturedHandler!(args)) as { content: Array<{ text: string }> }; + return JSON.parse(res.content[0].text) as Record; + } + + it('status="invalid" + default quality_threshold 90 when a critical defect blocks loading', async () => { + const r = await validate({ content: BAD_TC }); + assert.equal(r['is_valid'], false); + assert.equal(r['status'], 'invalid'); + assert.equal(r['quality_threshold'], 90, 'default threshold is 90'); + }); + + it('status="valid" when loadable and meets the threshold', async () => { + const r = await validate({ content: VALID_TC, quality_threshold: 0 }); + assert.equal(r['is_valid'], true); + assert.equal(r['status'], 'valid'); + assert.equal(r['meets_quality_threshold'], true); + }); + + it('status="needs_improvement" when loadable but below the threshold', async () => { + // Loadable, but a major violation keeps quality_score below the strict bar of 100. + const r = await validate({ content: NEEDS_IMPROVEMENT_TC, quality_threshold: 100 }); + assert.equal(r['is_valid'], true); + assert.ok((r['quality_score'] as number) < 100, 'fixture must score below 100'); + assert.equal(r['status'], 'needs_improvement'); + assert.equal(r['meets_quality_threshold'], false); + }); + + it('per-call quality_threshold overrides the PROVAR_MCP_QUALITY_THRESHOLD env var', async () => { + const saved = process.env.PROVAR_MCP_QUALITY_THRESHOLD; + process.env.PROVAR_MCP_QUALITY_THRESHOLD = '50'; + try { + const r = await validate({ content: VALID_TC, quality_threshold: 80 }); + assert.equal(r['quality_threshold'], 80, 'per-call arg wins over the env var'); + } finally { + if (saved !== undefined) process.env.PROVAR_MCP_QUALITY_THRESHOLD = saved; + else delete process.env.PROVAR_MCP_QUALITY_THRESHOLD; + } + }); + + it('PROVAR_MCP_QUALITY_THRESHOLD is used when no per-call arg is given', async () => { + const saved = process.env.PROVAR_MCP_QUALITY_THRESHOLD; + process.env.PROVAR_MCP_QUALITY_THRESHOLD = '75'; + try { + const r = await validate({ content: VALID_TC }); + assert.equal(r['quality_threshold'], 75, 'env var is the effective threshold'); + } finally { + if (saved !== undefined) process.env.PROVAR_MCP_QUALITY_THRESHOLD = saved; + else delete process.env.PROVAR_MCP_QUALITY_THRESHOLD; + } + }); + }); + it('key + API success → validation_source "quality_hub" with local metadata', async () => { process.env.PROVAR_API_KEY = 'pv_k_testkey12345'; apiStub = sinon.stub(qualityHubClient, 'validateTestCaseViaApi').resolves({ diff --git a/test/unit/mcp/testPlanValidate.test.ts b/test/unit/mcp/testPlanValidate.test.ts index 2c48e36d..6deef85f 100644 --- a/test/unit/mcp/testPlanValidate.test.ts +++ b/test/unit/mcp/testPlanValidate.test.ts @@ -62,7 +62,7 @@ function makeXml(tcGuid: string, stepGuid: string, id: string): string { '', ``, ' ', - ` `, + ` `, ' ', '', ].join('\n'); diff --git a/test/unit/mcp/testSuiteValidate.test.ts b/test/unit/mcp/testSuiteValidate.test.ts index 8afeeb3e..77ae1731 100644 --- a/test/unit/mcp/testSuiteValidate.test.ts +++ b/test/unit/mcp/testSuiteValidate.test.ts @@ -60,7 +60,7 @@ function makeXml(tcGuid: string, stepGuid: string, id: string): string { '', ``, ' ', - ` `, + ` `, ' ', '', ].join('\n'); @@ -345,7 +345,7 @@ describe('provar_testsuite_validate', () => { }); describe('quality_threshold', () => { - it('uses default threshold of 80 when not specified', () => { + it('uses default threshold of 90 (PDX-509) when not specified', () => { // Just verify no error and score is present const result = server.call('provar_testsuite_validate', { suite_name: 'ThresholdDefault', @@ -363,6 +363,38 @@ describe('provar_testsuite_validate', () => { }); assert.equal(isError(result), false); }); + + it('PDX-509: a loadable but sub-threshold case is "needs_improvement", not "invalid"', () => { + // A {Var} stored as a plain string is a major (VAR-STRING-LITERAL-001): the case + // still loads (is_valid true) but scores below 100. + const TC_MAJOR = { + name: 'Major.testcase', + xml_content: [ + '', + ``, + ' ', + ` `, + ' ', + ' {AccountId}', + ' ', + ' ', + ' ', + '', + ].join('\n'), + }; + const result = server.call('provar_testsuite_validate', { + suite_name: 'NeedsImprovement', + test_cases: [TC_MAJOR], + quality_threshold: 100, + }); + const body = parseText(result); + const cases = body['test_cases'] as Array>; + assert.equal(cases[0]['is_valid'], true, 'a major violation still loads'); + assert.equal(cases[0]['status'], 'needs_improvement'); + const summary = body['summary'] as Record; + assert.equal(summary['test_cases_needs_improvement'], 1); + assert.equal(summary['test_cases_invalid'], 0, 'sub-threshold is no longer collapsed to invalid'); + }); }); describe('PDX-470 — detail level', () => { From 5144949746a082bec82ffbe19dafa859b9985288 Mon Sep 17 00:00:00 2001 From: Michael Dailey Date: Thu, 4 Jun 2026 14:16:38 -0500 Subject: [PATCH 3/4] PDX-509: fix(mcp): address Copilot review on date fallback + diff comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RCA: Copilot flagged two issues. (1) When a value matches the ISO date/datetime shape but is not a real date (e.g. "2026-99-99"), isoToEpochMs returns null and the generator still emitted valueClass="date"/"dateTime" with the raw text — a load-breaking value. (2) The diff-dedup comment was internally inconsistent: it said the issue/BP rule_id namespaces are disjoint yet that a bridged critical shares its rule_id with the surfaced issue. Fix: Fall back to valueClass="string" when a date/datetime value cannot be converted to epoch ms, with a regression test. Reword the diff-dedup comment to state plainly that hand-coded issue rule_ids never collide with BP rule_ids, so a BP rule_id in issues[] can only be a bridged critical. --- src/mcp/tools/testCaseGenerate.ts | 11 ++++++++--- src/mcp/tools/testCaseValidate.ts | 8 +++++--- test/unit/mcp/testCaseGenerate.test.ts | 22 ++++++++++++++++++++++ 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/mcp/tools/testCaseGenerate.ts b/src/mcp/tools/testCaseGenerate.ts index b61afe93..0d76b7fa 100644 --- a/src/mcp/tools/testCaseGenerate.ts +++ b/src/mcp/tools/testCaseGenerate.ts @@ -574,9 +574,14 @@ function buildArgumentValue(key: string, val: string, indent: string, inNamedVal // an ISO string fails to load in the IDE (RENDER-DATE-VALUECLASS-001), and the real // corpus uses epoch ms exclusively with camelCase `dateTime`. Convert here. const ms = isoToEpochMs(val, inferred); - const valueClass = inferred === 'datetime' ? 'dateTime' : 'date'; - const emitted = ms !== null ? String(ms) : escapeXmlContent(val); - return `${indent}${emitted}`; + if (ms !== null) { + const valueClass = inferred === 'datetime' ? 'dateTime' : 'date'; + return `${indent}${ms}`; + } + // A value that matches the ISO shape but is not a real date (e.g. "2026-99-99") + // cannot be converted — fall back to a plain string rather than emit a + // load-breaking date/dateTime with a non-epoch value. + return `${indent}${escapeXmlContent(val)}`; } return `${indent}${escapeXmlContent(val)}`; } diff --git a/src/mcp/tools/testCaseValidate.ts b/src/mcp/tools/testCaseValidate.ts index 94f8336d..c1035664 100644 --- a/src/mcp/tools/testCaseValidate.ts +++ b/src/mcp/tools/testCaseValidate.ts @@ -242,9 +242,11 @@ export function registerTestCaseValidate(server: McpServer, config: ServerConfig const context = tcRunContext(file_path, source); const contextHash = computeContextHash('tc', context); const runId = generateRunId(context); - // A critical BP violation bridged into issues[] shares its rule_id with the - // surfaced issue (the issue/BP rule_id namespaces are disjoint), so drop it - // from the BP list here to avoid double-counting it in the baseline diff. + // A bridged critical appears twice — as the surfaced issue AND as its original + // BP violation — both carrying the same BP rule_id. Hand-coded issue rule_ids + // (TC_*, UI-*, COMPARISON-TYPE-001) never collide with BP rule_ids, so a BP + // rule_id appearing in issues[] can only mean it was bridged. Drop those from + // the BP list here so the baseline diff counts each finding once. const issueRuleIds = new Set(baseResult.issues.map((i) => i.rule_id)); const bpViolations = (baseResult.best_practices_violations ?? []).filter( (v) => !issueRuleIds.has(v.rule_id) diff --git a/test/unit/mcp/testCaseGenerate.test.ts b/test/unit/mcp/testCaseGenerate.test.ts index ec262b58..2cfde3c1 100644 --- a/test/unit/mcp/testCaseGenerate.test.ts +++ b/test/unit/mcp/testCaseGenerate.test.ts @@ -826,6 +826,28 @@ describe('provar_testcase_generate', () => { ); }); + it('falls back to valueClass="string" for an ISO-shaped but invalid date (no load-breaking date)', () => { + const result = server.call('provar_testcase_generate', { + test_case_name: 'BadDateField', + steps: [ + { + api_id: 'ApexCreateObject', + name: 'Create', + attributes: { CloseDate: '2026-99-99' }, + }, + ], + dry_run: true, + overwrite: false, + }); + + const xml = parseText(result)['xml_content'] as string; + assert.ok( + xml.includes('valueClass="string">2026-99-99'), + `Unparseable date must emit valueClass="string", not a non-epoch date; got: ${xml}` + ); + assert.ok(!xml.includes('valueClass="date"'), 'must not emit a load-breaking valueClass="date"'); + }); + it('emits valueClass="boolean" for "true" / "false" literals', () => { const result = server.call('provar_testcase_generate', { test_case_name: 'BoolField', From a4c51f3d298dcb1234d0f160d991bc059f948212 Mon Sep 17 00:00:00 2001 From: Michael Dailey Date: Thu, 4 Jun 2026 14:30:59 -0500 Subject: [PATCH 4/4] PDX-509: fix(mcp): VAR-NAMING-001 critical -> major to match Quality Hub RCA: VAR-NAMING-001 was tagged critical/weight 8, but a variable name with a space is a RUNTIME failure (the test still loads and fails at execution), which the severity taxonomy classifies as major, not load-blocking critical. As a critical it was being surfaced by the PDX-509 bridge and wrongly flipping is_valid for a runtime defect. The Quality Hub backend is making the same change (major / weight 5) so score parity is preserved. Fix: Set VAR-NAMING-001 severity=major, weight=5 in the local rules JSON, and note in the description that it is a runtime defect, not a load-blocking one. It is no longer bridged, so a bad variable name no longer flips is_valid (it docks quality_score and surfaces as a major best-practice violation). --- src/mcp/rules/provar_best_practices_rules.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mcp/rules/provar_best_practices_rules.json b/src/mcp/rules/provar_best_practices_rules.json index 228d3239..7d778df3 100644 --- a/src/mcp/rules/provar_best_practices_rules.json +++ b/src/mcp/rules/provar_best_practices_rules.json @@ -338,10 +338,10 @@ "id": "VAR-NAMING-001", "category": "DataDrivenTesting", "name": "Variable names must use valid identifiers", - "description": "Variable names, field references, and result names must contain only letters, digits, and underscores. Spaces, hyphens, and special characters cause RUNTIME FAILURES in Provar. Examples: {Account.Name} ✅ | {Account.Account Name} ❌ (space will fail)", + "description": "Variable names, field references, and result names must contain only letters, digits, and underscores. Spaces, hyphens, and special characters cause RUNTIME FAILURES in Provar (the test case still loads, so this is a major runtime defect, not a load-blocking critical). Examples: {Account.Name} ✅ | {Account.Account Name} ❌ (space will fail)", "appliesTo": ["Step"], - "severity": "critical", - "weight": 8, + "severity": "major", + "weight": 5, "recommendation": "Replace spaces and special characters with underscores. Use API field names (e.g., Account.Name) instead of field labels (e.g., Account.Account Name).", "check": { "type": "variableNaming",