From c7e170ec461807c42f6eaa1fb0064b9e5a1ce95e Mon Sep 17 00:00:00 2001 From: kai lin Date: Tue, 12 May 2026 12:09:15 -0400 Subject: [PATCH 1/3] [Bugfix] - Auto-update smithy2c2j_service_map.json --- tools/scripts/codegen/legacy_c2j_cpp_gen.py | 34 +++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/scripts/codegen/legacy_c2j_cpp_gen.py b/tools/scripts/codegen/legacy_c2j_cpp_gen.py index a81d045bb99..df6621b79f2 100644 --- a/tools/scripts/codegen/legacy_c2j_cpp_gen.py +++ b/tools/scripts/codegen/legacy_c2j_cpp_gen.py @@ -151,6 +151,7 @@ def generate(self, executor: ProcessPoolExecutor, max_workers: int, args: dict) return -1 self._create_smithy_namespace_mapping(self.c2j_models) + self._update_smithy2c2j_service_map(self.c2j_models) print(f"Code generation done, (re)generated {len(done)} packages.") # Including defaults and partitions @@ -380,3 +381,36 @@ def _create_smithy_namespace_mapping(self, c2j_models: dict): with open(tmp_path, 'w') as f: json.dump(mapping, f, indent=2) os.replace(tmp_path, output_path) + + def _update_smithy2c2j_service_map(self, c2j_models: dict): + """Update smithy2c2j_service_map.json with entries for services where the + Smithy sdkId-derived name differs from the C2J filename-derived name. + Only processes the services being generated; merges with existing entries.""" + output_paths = [ + f"{self.path_to_api_definitions}/../smithy/cpp-codegen/smithy2c2j_service_map.json", + f"{self.path_to_api_definitions}/../smithy/codegen/smithy2c2j_service_map.json", + ] + + for output_path in output_paths: + if os.path.exists(output_path): + with open(output_path, 'r') as f: + mapping = json.load(f) + else: + mapping = {} + + for service, model_files in c2j_models.items(): + full_model_file_path = f"{self.path_to_api_definitions}/{model_files.c2j_model}" + with open(full_model_file_path, 'r') as f: + model = json.load(f) + service_id = model.get("metadata", {}).get("serviceId", "") + if not service_id: + continue + smithy_name = service_id.lower().replace(" ", "-").replace("_", "-") + if smithy_name != service: + mapping[smithy_name] = service + + tmp_path = output_path + ".tmp" + with open(tmp_path, 'w') as f: + json.dump(mapping, f, indent=4, sort_keys=True) + f.write("\n") + os.replace(tmp_path, output_path) From 3bd9ad042fc764e9c3bc9360891e5f78944995e9 Mon Sep 17 00:00:00 2001 From: kai lin Date: Tue, 12 May 2026 13:42:31 -0400 Subject: [PATCH 2/3] fix for s3 --- tools/scripts/codegen/legacy_c2j_cpp_gen.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/scripts/codegen/legacy_c2j_cpp_gen.py b/tools/scripts/codegen/legacy_c2j_cpp_gen.py index df6621b79f2..383baa57c5c 100644 --- a/tools/scripts/codegen/legacy_c2j_cpp_gen.py +++ b/tools/scripts/codegen/legacy_c2j_cpp_gen.py @@ -398,7 +398,13 @@ def _update_smithy2c2j_service_map(self, c2j_models: dict): else: mapping = {} + seen_models = set() for service, model_files in c2j_models.items(): + # Skip synthetic entries that share a model file with another service (e.g. s3-crt uses s3's model) + if model_files.c2j_model in seen_models: + continue + seen_models.add(model_files.c2j_model) + full_model_file_path = f"{self.path_to_api_definitions}/{model_files.c2j_model}" with open(full_model_file_path, 'r') as f: model = json.load(f) From 9c3ffd09af72ab36c46a49ce59a1eab9d42f296a Mon Sep 17 00:00:00 2001 From: kai lin Date: Tue, 12 May 2026 14:46:46 -0400 Subject: [PATCH 3/3] fix c2j codegen file to use sdkid --- tools/scripts/codegen/legacy_c2j_cpp_gen.py | 40 ---------------- tools/scripts/codegen/model_utils.py | 53 +++++++++++++++++++-- 2 files changed, 48 insertions(+), 45 deletions(-) diff --git a/tools/scripts/codegen/legacy_c2j_cpp_gen.py b/tools/scripts/codegen/legacy_c2j_cpp_gen.py index 383baa57c5c..a81d045bb99 100644 --- a/tools/scripts/codegen/legacy_c2j_cpp_gen.py +++ b/tools/scripts/codegen/legacy_c2j_cpp_gen.py @@ -151,7 +151,6 @@ def generate(self, executor: ProcessPoolExecutor, max_workers: int, args: dict) return -1 self._create_smithy_namespace_mapping(self.c2j_models) - self._update_smithy2c2j_service_map(self.c2j_models) print(f"Code generation done, (re)generated {len(done)} packages.") # Including defaults and partitions @@ -381,42 +380,3 @@ def _create_smithy_namespace_mapping(self, c2j_models: dict): with open(tmp_path, 'w') as f: json.dump(mapping, f, indent=2) os.replace(tmp_path, output_path) - - def _update_smithy2c2j_service_map(self, c2j_models: dict): - """Update smithy2c2j_service_map.json with entries for services where the - Smithy sdkId-derived name differs from the C2J filename-derived name. - Only processes the services being generated; merges with existing entries.""" - output_paths = [ - f"{self.path_to_api_definitions}/../smithy/cpp-codegen/smithy2c2j_service_map.json", - f"{self.path_to_api_definitions}/../smithy/codegen/smithy2c2j_service_map.json", - ] - - for output_path in output_paths: - if os.path.exists(output_path): - with open(output_path, 'r') as f: - mapping = json.load(f) - else: - mapping = {} - - seen_models = set() - for service, model_files in c2j_models.items(): - # Skip synthetic entries that share a model file with another service (e.g. s3-crt uses s3's model) - if model_files.c2j_model in seen_models: - continue - seen_models.add(model_files.c2j_model) - - full_model_file_path = f"{self.path_to_api_definitions}/{model_files.c2j_model}" - with open(full_model_file_path, 'r') as f: - model = json.load(f) - service_id = model.get("metadata", {}).get("serviceId", "") - if not service_id: - continue - smithy_name = service_id.lower().replace(" ", "-").replace("_", "-") - if smithy_name != service: - mapping[smithy_name] = service - - tmp_path = output_path + ".tmp" - with open(tmp_path, 'w') as f: - json.dump(mapping, f, indent=4, sort_keys=True) - f.write("\n") - os.replace(tmp_path, output_path) diff --git a/tools/scripts/codegen/model_utils.py b/tools/scripts/codegen/model_utils.py index 9e3ce817af4..88a29413c97 100644 --- a/tools/scripts/codegen/model_utils.py +++ b/tools/scripts/codegen/model_utils.py @@ -103,6 +103,39 @@ def _get_models_to_generate(self): return dict((k, self.models_available[k]) for k in clients_to_build if k in clients_to_build) + @staticmethod + def _resolve_service_key(raw_key: str, model_filename: str, models_dir: str, legacy_services: set) -> str: + """Resolve the package directory name for a service. + + For legacy services (in the frozen set), use the filename-based key. + For new services, use the normalized serviceId from the model + so it naturally aligns with what Smithy codegen expects. + """ + key = SERVICE_NAME_REMAPS.get(raw_key, raw_key) + if "." in key: + key = "-".join(reversed(key.split("."))) + if ";" in key: + key = key.replace(";", "-") + + # Legacy service: keep filename-based name + if key in legacy_services: + return key + + # New service: use normalized serviceId if it differs from filename-based key + full_model_path = os.path.join(models_dir, model_filename) + try: + with open(full_model_path, 'r') as f: + model = json.load(f) + service_id = model.get("metadata", {}).get("serviceId", "") + if service_id: + sdk_id_key = service_id.lower().replace(" ", "-").replace("_", "-") + if sdk_id_key != key: + return sdk_id_key + except (json.JSONDecodeError, IOError): + pass + + return key + @staticmethod def _collect_available_models(models_dir: str, endpoint_rules_dir: str) -> dict: """Return a dict of with all available c2j models in a models_dir @@ -111,6 +144,20 @@ def _collect_available_models(models_dir: str, endpoint_rules_dir: str) -> dict: :param endpoint_rules_dir: path to the directory with endpoints dir models :return: dict in models dir """ + # Load the smithy2c2j service map to determine which services are legacy + # (any service whose sdkId-derived name appears as a KEY in this map has a known + # mismatch and uses the filename-based value) + service_map_path = os.path.normpath( + os.path.join(models_dir, "../smithy/cpp-codegen/smithy2c2j_service_map.json")) + legacy_services = set() + if os.path.exists(service_map_path): + try: + with open(service_map_path, 'r') as f: + service_map = json.load(f) + legacy_services = set(service_map.values()) + except (json.JSONDecodeError, IOError): + pass + model_files = os.listdir(models_dir) service_name_to_model_filename_date = dict() @@ -133,11 +180,7 @@ def _collect_available_models(models_dir: str, endpoint_rules_dir: str) -> dict: service_name_to_model_filename = dict() missing = set() for raw_key, model_file_date in service_name_to_model_filename_date.items(): - key = SERVICE_NAME_REMAPS.get(raw_key, raw_key) - if "." in key: - key = "-".join(reversed(key.split("."))) # just replicating existing legacy behavior - if ";" in key: - key = key.replace(";", "-") # just in case... just replicating existing legacy behavior + key = ModelUtils._resolve_service_key(raw_key, model_file_date[0], models_dir, legacy_services) # fetch endpoint-rules filename which is based on ServiceId in c2j models: try: