diff --git a/docs/content/releases/os_upgrading/2.59.2.md b/docs/content/releases/os_upgrading/2.59.2.md new file mode 100644 index 00000000000..c78a8e557fb --- /dev/null +++ b/docs/content/releases/os_upgrading/2.59.2.md @@ -0,0 +1,21 @@ +--- +title: 'Upgrading to DefectDojo Version 2.59.2' +toc_hide: true +weight: -20260615 +description: Xygeni parser keeps repeated SAST/Secrets occurrences in the same file as distinct findings. +--- + +## Xygeni parser: repeated SAST/Secrets occurrences are now distinct findings + +The Xygeni parser previously deduplicated away legitimate findings when the same secret value or code pattern appeared more than once in a single file, so only the first occurrence survived an import. + +Xygeni reuses one `uniqueHash` across every occurrence of the same value in a file (it hashes the value, not the location) while giving each occurrence a distinct `issueId` that encodes the file path and line. The SAST and Secrets scan types deduplicate on `unique_id_from_tool`, which was set to `uniqueHash`, so occurrences after the first were treated as duplicates and hidden. + +Starting in 2.59.2, for SAST and Secrets findings the parser keys `unique_id_from_tool` on the per-occurrence `issueId` (falling back to `uniqueHash` when `issueId` is absent) and keeps `uniqueHash` as `vuln_id_from_tool`. Each occurrence is now its own finding, and `vuln_id_from_tool` still groups occurrences of the same value. SCA findings are unchanged: there `uniqueHash` is unique per finding while `issueId` collides across packages, so `uniqueHash` remains the correct dedup key. + +### Required actions + +- **No action required for new imports.** Repeated occurrences that were previously collapsed now appear as separate findings. +- **Reimport behavior:** on the first reimport of an existing Xygeni SAST or Secrets test after upgrading, the previously-imported findings carry the old `uniqueHash`-based `unique_id_from_tool` and will not match the new `issueId`-based ids. Those findings are closed as no longer present and a fresh set is created with the corrected ids. This is a one-time effect; subsequent reimports match normally. SCA tests are not affected. + +For more information, check the [Release Notes](https://github.com/DefectDojo/django-DefectDojo/releases/tag/2.59.2). diff --git a/docs/content/supported_tools/parsers/file/xygeni.md b/docs/content/supported_tools/parsers/file/xygeni.md index 1b8ec32f116..54cf2c7f50e 100644 --- a/docs/content/supported_tools/parsers/file/xygeni.md +++ b/docs/content/supported_tools/parsers/file/xygeni.md @@ -64,16 +64,24 @@ Sample Xygeni JSON reports can be found ### Deduplication -Every finding carries `unique_id_from_tool` (set from Xygeni's vendor-stable -`uniqueHash`) and `vuln_id_from_tool` (set from `issueId`). The deduplication -algorithm is configured per scan type: +Every finding carries both `unique_id_from_tool` and `vuln_id_from_tool`, and +the deduplication algorithm is configured per scan type: -| Scan type | Algorithm | Hash-code fields (fallback) | -| -------------------- | ---------------------------------- | -------------------------------------------------------------- | -| Xygeni SAST Scan | `unique_id_from_tool` | n/a | -| Xygeni SCA Scan | `unique_id_from_tool_or_hash_code` | `vulnerability_ids`, `component_name`, `component_version` | -| Xygeni Secrets Scan | `unique_id_from_tool` | n/a | +| Scan type | Algorithm | `unique_id_from_tool` | `vuln_id_from_tool` | Hash-code fields (fallback) | +| -------------------- | ---------------------------------- | --------------------- | ------------------- | --------------------------------------------------------- | +| Xygeni SAST Scan | `unique_id_from_tool` | `issueId` | `uniqueHash` | n/a | +| Xygeni SCA Scan | `unique_id_from_tool_or_hash_code` | `uniqueHash` | `issueId` | `vulnerability_ids`, `component_name`, `component_version` | +| Xygeni Secrets Scan | `unique_id_from_tool` | `issueId` | `uniqueHash` | n/a | -For SCA the hash-code fallback enables cross-tool deduplication: the same -CVE on the same package@version reported by Xygeni and another SCA scanner -(Snyk, Trivy, etc.) collapse into a single Finding. +For SAST and Secrets the dedup key is the per-occurrence `issueId` (which +encodes the file path and line). The same secret value or code pattern can +appear several times in one file; Xygeni reuses a single `uniqueHash` across +those occurrences, so keying dedup on `uniqueHash` would collapse them into one +Finding and underreport the occurrences. Keying on `issueId` keeps each +occurrence as its own Finding, while `uniqueHash` is retained as the +`vuln_id_from_tool` that groups occurrences of the same value. + +For SCA the dedup key stays `uniqueHash` (it encodes CVE + package + version, +unique per finding) and the hash-code fallback enables cross-tool +deduplication: the same CVE on the same package@version reported by Xygeni and +another SCA scanner (Snyk, Trivy, etc.) collapse into a single Finding. diff --git a/dojo/tools/xygeni/sast.py b/dojo/tools/xygeni/sast.py index 1e7782e5100..62b0048cb1b 100644 --- a/dojo/tools/xygeni/sast.py +++ b/dojo/tools/xygeni/sast.py @@ -35,8 +35,13 @@ def _build_finding(vuln, test): cwe=parse_cwe(cwes=vuln.get("cwes"), cwe=vuln.get("cwe"), tags=vuln.get("tags")), static_finding=True, dynamic_finding=False, - unique_id_from_tool=vuln.get("uniqueHash"), - vuln_id_from_tool=vuln.get("issueId"), + # One detector can flag the same code pattern at several locations. Xygeni reuses a + # single ``uniqueHash`` across those occurrences but gives each a distinct ``issueId`` + # (which encodes filepath + line). Dedup is keyed on ``unique_id_from_tool``, so use + # the per-occurrence ``issueId`` to keep each occurrence as its own Finding; + # ``uniqueHash`` groups them as the vuln id. + unique_id_from_tool=vuln.get("issueId") or vuln.get("uniqueHash"), + vuln_id_from_tool=vuln.get("uniqueHash"), ) _apply_code_flow_fields(finding, vuln.get("codeFlows") or []) diff --git a/dojo/tools/xygeni/secrets.py b/dojo/tools/xygeni/secrets.py index ac5b6584b71..c35a5aecb67 100644 --- a/dojo/tools/xygeni/secrets.py +++ b/dojo/tools/xygeni/secrets.py @@ -44,6 +44,11 @@ def _build_finding(secret, test): mitigation=f"Rotate this {secret_type} secret immediately and remove it from version-control history.", static_finding=True, dynamic_finding=False, - unique_id_from_tool=secret.get("uniqueHash"), - vuln_id_from_tool=secret.get("issueId"), + # The same secret value can appear several times in one file. Xygeni assigns every + # occurrence the same ``uniqueHash`` (it hashes the secret value, not the location) + # but a distinct ``issueId`` (which encodes filepath + line). Dedup is keyed on + # ``unique_id_from_tool``, so use the per-occurrence ``issueId`` to keep each + # occurrence as its own Finding; ``uniqueHash`` groups them as the vuln id. + unique_id_from_tool=secret.get("issueId") or secret.get("uniqueHash"), + vuln_id_from_tool=secret.get("uniqueHash"), ) diff --git a/unittests/tools/test_xygeni_parser.py b/unittests/tools/test_xygeni_parser.py index ea161a096bd..e1caf9a0f44 100644 --- a/unittests/tools/test_xygeni_parser.py +++ b/unittests/tools/test_xygeni_parser.py @@ -42,10 +42,15 @@ def test_sast_many_findings(self): self.assertIsNotNone(finding.unique_id_from_tool) match = next( - (f for f in findings if f.unique_id_from_tool == "nsXRi+PTLom/sG8m6weOXw"), + ( + f for f in findings + if f.unique_id_from_tool == "SAS.injection.python.code_injection_deserialization.dockerized_labs/insec_des_lab/main.py.36" + ), None, ) - self.assertIsNotNone(match, "expected the deserialization SAST finding by uniqueHash") + self.assertIsNotNone(match, "expected the deserialization SAST finding by issueId") + # uniqueHash is kept as the vuln id; the per-occurrence issueId drives dedup + self.assertEqual("nsXRi+PTLom/sG8m6weOXw", match.vuln_id_from_tool) self.assertEqual("python.code_injection_deserialization", match.title) self.assertEqual("Critical", match.severity) self.assertEqual(502, match.cwe) @@ -57,6 +62,20 @@ def test_sast_many_findings(self): self.assertEqual("serialized_data", match.sast_source_object) self.assertIn("Data flow", match.description) + def test_sast_repeated_detector_in_same_file_stays_distinct(self): + # One detector flagging the same pattern at several lines shares a uniqueHash but + # carries a distinct issueId per line. Each occurrence must remain its own Finding. + with self._load("sast_many_findings.json") as testfile: + findings = XygeniParser().get_findings(testfile, Test()) + + occurrences = [ + f for f in findings + if f.vuln_id_from_tool == "VsqoC9U6q8EYG0QZ5UqxXw" # forms_without_csrf_protection, 4 lines + ] + self.assertEqual(4, len(occurrences), "all occurrences of the repeated detector must be kept") + unique_ids = {f.unique_id_from_tool for f in occurrences} + self.assertEqual(4, len(unique_ids), "each occurrence needs a distinct unique_id_from_tool") + def test_sca_many_findings(self): with self._load("sca_many_findings.json") as testfile: findings = XygeniParser().get_findings(testfile, Test()) @@ -97,16 +116,34 @@ def test_secrets_many_findings(self): self.assertIsNotNone(finding.cwe) match = next( - (f for f in findings if f.unique_id_from_tool == "LVAjuA4Z40VxktixjtztXg"), + (f for f in findings if f.unique_id_from_tool == "SEC.private_key.private_key..ssh/id_rsa.1"), None, ) self.assertIsNotNone(match, "expected the .ssh/id_rsa private-key secret finding") + # uniqueHash is kept as the vuln id; the per-occurrence issueId drives dedup + self.assertEqual("LVAjuA4Z40VxktixjtztXg", match.vuln_id_from_tool) self.assertEqual("Critical", match.severity) self.assertEqual(".ssh/id_rsa", match.file_path) self.assertEqual(1, match.line) self.assertIn("private_key", match.title) self.assertIn("Rotate", match.mitigation) + def test_secrets_repeated_in_same_file_stay_distinct(self): + # A secret value repeated in one file shares a uniqueHash across occurrences but + # carries a distinct issueId per line. The parser must surface each occurrence as + # its own Finding (distinct unique_id_from_tool) so dedup does not collapse them. + with self._load("secrets_many_findings.json") as testfile: + findings = XygeniParser().get_findings(testfile, Test()) + + occurrences = [ + f for f in findings + if f.vuln_id_from_tool == "1yvAV2ndtW4yYG+TJQhhXg" # .docker/.dockercfg, lines 9 and 29 + ] + self.assertEqual(2, len(occurrences), "both occurrences of the repeated secret must be kept") + unique_ids = {f.unique_id_from_tool for f in occurrences} + self.assertEqual(2, len(unique_ids), "each occurrence needs a distinct unique_id_from_tool") + self.assertEqual({9, 29}, {f.line for f in occurrences}) + # ----- dispatch + error cases ----- def test_dispatches_on_metadata_scan_type(self): @@ -127,7 +164,8 @@ def test_dispatches_on_metadata_scan_type(self): } findings = XygeniParser().get_findings(io.StringIO(json.dumps(report)), Test()) self.assertEqual(1, len(findings)) - self.assertEqual("abc123", findings[0].unique_id_from_tool) + self.assertEqual("SECRETS.aws-access-key.config.ini:12", findings[0].unique_id_from_tool) + self.assertEqual("abc123", findings[0].vuln_id_from_tool) self.assertEqual(798, findings[0].cwe) def test_raises_on_missing_scan_type(self):