Skip to content

Commit 5262c1f

Browse files
damienbarkerjenkins
authored andcommitted
QPR-12225 -- Extend regression test JSON comparison to allow for differently ordered results
1 parent 18baa2d commit 5262c1f

3 files changed

Lines changed: 241 additions & 9 deletions

File tree

Tools/PythonTools/Readme.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@ Each object under a given key, `file_name`, has the following format (note that
6464
"key2",
6565
"key3/subkey1"
6666
],
67+
"keys": {
68+
"key1/": [
69+
"subkey1",
70+
"subkey2"
71+
],
72+
"": [
73+
"subkey1",
74+
"subkey2"
75+
]
76+
},
6777
"settings": [
6878
{
6979
"names": [
@@ -100,6 +110,7 @@ For `csv_settings`:
100110
For `json_settings`:
101111
- Any key value (i.e. in `ignore_keys`, `settings.names`, etc.) must include the parent, if any. Using the sample comparison_config.json template above, we would ignore "key1" and "key2" at the top level in a JSON file comparison, and "subkey1" only if it appears inside of "key3".
102112
- The `ignore_keys` is an array of strings, each string being a key in the JSON file. If the key is found in one or both of the files, any diffs will be ignored for this key and its children (i.e. if the value is itself a nested object).
113+
- The `keys` is a dictionary whose keys consist of paths, and the values **must consist of** arrays. This will allow for a fallback comparison using `datacompy.core.Compare()` (which is already used for CSV comparisons) and will allow a JSON diff to still pass if the only reason for the diff is that the results are not given in the same order. A blank key ("") means that we expect the whole JSON file consists of an array of unnested JSON objects (and is usually a CSV report that was converted directly to a JSON report). Hence, it would never make sense to have a blank ("") key along with other non-blank keys for the same file comparison config. **NOTE:** A non-blank key must end in a "/".
103114
- The `settings` object works the same as the `column_settings` file in `csv_settings`, except that the keys must include the parent in the JSON `settings`.
104115
- **NOTE:** In order for a JSON check to be applied, a comp config must be provided for the filename, even if the config is empty (see e.g. `all_string_filename` in the template above). Otherwise a direct file comparison will be done.
105116
- **NOTE:** String diffs are automatically processed (i.e. unless they are in `ignore_keys`, then a string diff will be a failing diff) (see e.g. `all_string_filename` in the template above). Only numerical differences need to be handled in `settings`.

Tools/PythonTools/compare_files.py

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Function for comparing two files.
2+
from pprint import pprint
23
import os
34
import argparse
45
import collections
@@ -414,7 +415,7 @@ def compare_files_df(name, file_1, file_2, config):
414415
logger.warning('Failing test, because require_equal_optional_cols is true')
415416
return False
416417
else:
417-
logger.warning('Ignore unequal optional cols, because because require_equal_optional_cols is false')
418+
logger.warning('Ignore unequal optional cols, because require_equal_optional_cols is false')
418419
else:
419420
logger.warning('Failing test, because require_equal_optional_cols is not given, defaults to true')
420421
return False
@@ -594,7 +595,7 @@ def compare_files_json(name, file_1, file_2, config) -> bool:
594595
json_diff = jsondiff.diff(json_1, json_2, syntax='symmetric', marshal=True)
595596

596597
# Filter out differences that can be ignored, or are within tolerances
597-
validate_json_diff(json_diff, config, '')
598+
validate_json_diff(json_1, json_2, json_diff, config, '')
598599

599600
if json_diff:
600601
if isinstance(json_diff, dict):
@@ -607,7 +608,7 @@ def compare_files_json(name, file_1, file_2, config) -> bool:
607608
return True
608609

609610
# Modifies jsondif.diff output so that 'ignoreable' diffs and diffs within tolerance/s are removed
610-
def validate_json_diff(json_diff: dict, config: dict, path: str) -> None:
611+
def validate_json_diff(json_1, json_2, json_diff: dict, config: dict, path: str) -> None:
611612
logger = logging.getLogger(__name__)
612613

613614
if not json_diff:
@@ -620,7 +621,7 @@ def validate_json_diff(json_diff: dict, config: dict, path: str) -> None:
620621
# If the diff obj is a set of diffs between two arrays (denoted by dict with int keys)
621622
if all([isinstance(k, int) for k in json_diff.keys()]):
622623
for diff in json_diff.values():
623-
validate_json_diff(diff, config, path)
624+
validate_json_diff(json_1, json_2, diff, config, path)
624625

625626
# If the diff obj is a dict of diffs between two objects
626627
else:
@@ -634,18 +635,19 @@ def validate_json_diff(json_diff: dict, config: dict, path: str) -> None:
634635
del json_diff[key_to_check]
635636

636637
for key, diff in json_diff.items():
638+
# Insertions (key="$insert") and deletions (key="$delete") should count as a test failure
639+
if "$" in key:
640+
continue
641+
637642
if isinstance(diff, dict):
638643
new_path = ((path + str(key)) if path else str(key)) + '/'
639-
validate_json_diff(diff, config, new_path)
644+
validate_json_diff(json_1, json_2, diff, config, new_path)
640645

641646
else:
642-
# Insertions (key="$insert") and deletions (key="$delete") should count as a test failure
643-
if "$" in key:
644-
continue
645-
646647
# At this point, we expect diff to be a list
647648
if not isinstance(diff, list):
648649
logger.warning(f"diff has invalid type: {type(diff)=}")
650+
logger.warning(f"{diff=}")
649651
continue
650652
# with 2 elements
651653
if not (len(diff) == 2):
@@ -679,6 +681,46 @@ def validate_json_diff(json_diff: dict, config: dict, path: str) -> None:
679681
if abs_check or rel_check:
680682
diff.clear()
681683

684+
# Fallback comparison in the case of JSON array diffs due to differently ordering.
685+
if diff:
686+
keys = config.get("keys")
687+
if keys is not None and path in keys:
688+
# Only compare on columns defined in "settings"
689+
names_to_check = [n.replace(path, '') for n in names if n.startswith(path)]
690+
691+
logger.warning(f"jsondiff list comparison failed for {path=}. Falling back to datacompy.core.Compare()")
692+
path_tokens = [tok for tok in path.split('/') if tok]
693+
694+
# Get the original JSON objs
695+
json_1_ptr = json_1
696+
json_2_ptr = json_2
697+
698+
# Get the list/array within the JSON objs that failed the comparison
699+
if path_tokens:
700+
json_1_ptr = json_1_ptr[0]
701+
json_2_ptr = json_2_ptr[0]
702+
for pt in path_tokens:
703+
json_1_ptr = json_1_ptr[pt]
704+
json_2_ptr = json_2_ptr[pt]
705+
706+
# Convert these lists (we are implicitly assuming they are lists) to DataFrame for datacompy comparison, similar to the CSV reports
707+
json_1_df = pd.DataFrame(json_1_ptr)
708+
# json_1_df = json_1_df[[header for header in json_1_df.columns if header in names_to_check + keys[path]]]
709+
json_2_df = pd.DataFrame(json_2_ptr)
710+
# json_2_df = json_2_df[[header for header in json_2_df.columns if header in names_to_check + keys[path]]]
711+
712+
# Run comparison
713+
comp = Compare(json_1_df, json_2_df, join_columns=keys[path], abs_tol=abs_tol, rel_tol=rel_tol,
714+
df1_name='expected', df2_name='calculated')
715+
716+
if comp.matches():
717+
diff.clear()
718+
else:
719+
print(json_1_df)
720+
print(json_2_df)
721+
logger.warning("Fallback comparison failed.")
722+
logger.warning(comp.report())
723+
682724
# For the diffs that are now empty, we can remove them from the dict
683725
diffs_to_ignore = []
684726
for key, diff in json_diff.items():

Tools/PythonTools/comparison_config.json

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1424,6 +1424,11 @@
14241424
]
14251425
},
14261426
"curves\\.json": {
1427+
"keys": {
1428+
"data/": [
1429+
"dates"
1430+
]
1431+
},
14271432
"settings": [
14281433
{
14291434
"names": [
@@ -1440,6 +1445,16 @@
14401445
"ignore_keys": [
14411446
"simmResultsPath"
14421447
],
1448+
"keys": {
1449+
"": [
1450+
"portfolio",
1451+
"product_class",
1452+
"risk_class",
1453+
"margin_type",
1454+
"bucket",
1455+
"side"
1456+
]
1457+
},
14431458
"settings": [
14441459
{
14451460
"names": [
@@ -1454,6 +1469,66 @@
14541469
]
14551470
},
14561471
"im_impact\\.json": {
1472+
"keys": {
1473+
"impact/schedule_impact_report/": [
1474+
"portfolio",
1475+
"agreement_type",
1476+
"product_class",
1477+
"call_type",
1478+
"initial_margin_type",
1479+
"legal_entity_id",
1480+
"side"
1481+
],
1482+
"impact/simm_impact_report/": [
1483+
"portfolio",
1484+
"agreement_type",
1485+
"call_type",
1486+
"initial_margin_type",
1487+
"legal_entity_id",
1488+
"product_class",
1489+
"risk_class",
1490+
"margin_type",
1491+
"bucket",
1492+
"side"
1493+
],
1494+
"impact/total_impact_report/": [
1495+
"portfolio",
1496+
"agreement_type",
1497+
"call_type",
1498+
"initial_margin_type",
1499+
"legal_entity_id",
1500+
"side"
1501+
],
1502+
"impact/standalone_im_schedule_report/": [
1503+
"portfolio",
1504+
"agreement_type",
1505+
"call_type",
1506+
"initial_margin_type",
1507+
"legal_entity_id",
1508+
"product_class",
1509+
"side"
1510+
],
1511+
"impact/standalone_simm_report/": [
1512+
"portfolio",
1513+
"agreement_type",
1514+
"call_type",
1515+
"initial_margin_type",
1516+
"legal_entity_id",
1517+
"product_class",
1518+
"risk_class",
1519+
"margin_type",
1520+
"bucket",
1521+
"side"
1522+
],
1523+
"impact/standalone_total_im_report/": [
1524+
"portfolio",
1525+
"agreement_type",
1526+
"call_type",
1527+
"initial_margin_type",
1528+
"legal_entity_id",
1529+
"side"
1530+
]
1531+
},
14571532
"settings": [
14581533
{
14591534
"names": [
@@ -1465,6 +1540,7 @@
14651540
"impact/simm_impact_report/initial_margin_impact",
14661541
"impact/standalone_im_schedule_report/gross_im",
14671542
"impact/standalone_im_schedule_report/schedule_im",
1543+
"impact/standalone_im_schedule_report/gross_current_rc",
14681544
"impact/standalone_im_schedule_report/net_current_rc",
14691545
"impact/standalone_im_schedule_report/net_to_gross_ratio",
14701546
"impact/standalone_simm_report/initial_margin",
@@ -1491,6 +1567,14 @@
14911567
]
14921568
},
14931569
"frtb\\.json": {
1570+
"keys": {
1571+
"": [
1572+
"bucket",
1573+
"capitalRequirement",
1574+
"risk_class",
1575+
"correlationScenario"
1576+
]
1577+
},
14941578
"settings": [
14951579
{
14961580
"names": [
@@ -1502,6 +1586,12 @@
15021586
]
15031587
},
15041588
"total_im\\.json": {
1589+
"keys": {
1590+
"": [
1591+
"side",
1592+
"portfolio"
1593+
]
1594+
},
15051595
"settings": [
15061596
{
15071597
"names": [
@@ -1517,9 +1607,98 @@
15171607
"ignore_keys": [
15181608
"simmResultsPath"
15191609
],
1610+
"keys": {
1611+
"additionalResults/": [
1612+
"tradeId",
1613+
"resultId",
1614+
"resultType"
1615+
],
1616+
"bacva/": [
1617+
"analytic",
1618+
"counterparty",
1619+
"nettingSetId"
1620+
],
1621+
"frtb/": [
1622+
"bucket",
1623+
"capitalRequirement",
1624+
"risk_class",
1625+
"correlationScenario"
1626+
],
1627+
"impact/schedule_impact_report/": [
1628+
"portfolio",
1629+
"agreement_type",
1630+
"product_class",
1631+
"call_type",
1632+
"initial_margin_type",
1633+
"legal_entity_id",
1634+
"side"
1635+
],
1636+
"impact/simm_impact_report/": [
1637+
"portfolio",
1638+
"agreement_type",
1639+
"call_type",
1640+
"initial_margin_type",
1641+
"legal_entity_id",
1642+
"product_class",
1643+
"risk_class",
1644+
"margin_type",
1645+
"bucket",
1646+
"side"
1647+
],
1648+
"impact/total_impact_report/": [
1649+
"portfolio",
1650+
"agreement_type",
1651+
"call_type",
1652+
"initial_margin_type",
1653+
"legal_entity_id",
1654+
"side"
1655+
],
1656+
"impact/standalone_im_schedule_report/": [
1657+
"portfolio",
1658+
"agreement_type",
1659+
"call_type",
1660+
"initial_margin_type",
1661+
"legal_entity_id",
1662+
"product_class",
1663+
"side"
1664+
],
1665+
"impact/standalone_simm_report/": [
1666+
"portfolio",
1667+
"agreement_type",
1668+
"call_type",
1669+
"initial_margin_type",
1670+
"legal_entity_id",
1671+
"product_class",
1672+
"risk_class",
1673+
"margin_type",
1674+
"bucket",
1675+
"side"
1676+
],
1677+
"impact/standalone_total_im_report/": [
1678+
"portfolio",
1679+
"agreement_type",
1680+
"call_type",
1681+
"initial_margin_type",
1682+
"legal_entity_id",
1683+
"side"
1684+
],
1685+
"simmReport/": [
1686+
"portfolio",
1687+
"product_class",
1688+
"risk_class",
1689+
"margin_type",
1690+
"bucket",
1691+
"side"
1692+
],
1693+
"totalIMReport/": [
1694+
"side",
1695+
"portfolio"
1696+
]
1697+
},
15201698
"settings": [
15211699
{
15221700
"names": [
1701+
"additionalResults/resultValue",
15231702
"bacva/value",
15241703
"cashflow/accrual",
15251704
"cashflow/accruedAmount",

0 commit comments

Comments
 (0)