From 5f79b474ec1bcb2229a5bf2f3011ae56ecea7777 Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Thu, 28 May 2026 08:49:32 +0530 Subject: [PATCH 1/3] fix: decode JSON string and support flat env-var properties for REST auth config Fixes #3422. RestCatalog._create_session() expected auth to be a dict, but environment variables always produce string values. This caused auth initialization to fail for all pluggable auth types (basic, oauth2, google, entra, custom) when configured via PYICEBERG_CATALOG____AUTH. Two complementary fixes: 1. JSON string decode: if the 'auth' property is a string, JSON-parse it before processing. This supports: export PYICEBERG_CATALOG__REST__AUTH='{"type":"oauth2",...}' 2. Flat env-var property support: if 'auth' is absent but 'auth.type' is present, build the auth config from flat 'auth.*' properties. This is the canonical env-var style that avoids JSON-in-env quoting issues: export PYICEBERG_CATALOG__REST__AUTH__TYPE=oauth2 export PYICEBERG_CATALOG__REST__AUTH__OAUTH2__CLIENT_ID=id export PYICEBERG_CATALOG__REST__AUTH__OAUTH2__CLIENT_SECRET=secret export PYICEBERG_CATALOG__REST__AUTH__OAUTH2__TOKEN_URL=https://... Kebab-case keys (e.g. 'client-id') produced by the env-var parser are normalised to snake_case ('client_id') to match AuthManager constructor parameters. Regression tests added for both code paths (basic and oauth2) as well as invalid JSON detection. --- pyiceberg/catalog/rest/__init__.py | 35 +++++++- tests/catalog/test_rest.py | 132 +++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index d085c6fd87..76dea6cd0c 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +import json from collections import deque from enum import Enum from typing import ( @@ -435,7 +436,16 @@ def _create_session(self) -> Session: elif ssl_client_cert := ssl_client.get(CERT): session.cert = ssl_client_cert - if auth_config := self.properties.get(AUTH): + if raw_auth := self.properties.get(AUTH): + # When auth is configured via an environment variable (e.g. PYICEBERG_CATALOG____AUTH), + # the value arrives as a JSON string rather than a dict. Decode it before processing. + if isinstance(raw_auth, str): + try: + auth_config: dict[str, Any] = json.loads(raw_auth) + except json.JSONDecodeError as e: + raise ValueError(f"Failed to parse auth configuration as JSON: {raw_auth!r}") from e + else: + auth_config = raw_auth auth_type = auth_config.get("type") if auth_type is None: raise ValueError("auth.type must be defined") @@ -448,6 +458,29 @@ def _create_session(self) -> Session: if auth_type != CUSTOM and auth_impl: raise ValueError("auth.impl can only be specified when using custom auth.type") + self._auth_manager = AuthManagerFactory.create(auth_impl or auth_type, auth_type_config) + session.auth = AuthManagerAdapter(self._auth_manager) + elif auth_type := self.properties.get(f"{AUTH}.type"): + # Support flattened env-var style configuration: + # PYICEBERG_CATALOG____AUTH__TYPE=oauth2 + # PYICEBERG_CATALOG____AUTH__OAUTH2__CLIENT_ID=id + # The env-var parser maps these to flat properties like "auth.type" and "auth.oauth2.client-id". + # Key names are converted from kebab-case to snake_case to match AuthManager constructor parameters. + auth_impl = self.properties.get(f"{AUTH}.impl") + + if auth_type == CUSTOM and not auth_impl: + raise ValueError("auth.impl must be specified when using custom auth.type") + + if auth_type != CUSTOM and auth_impl: + raise ValueError("auth.impl can only be specified when using custom auth.type") + + type_prefix = f"{AUTH}.{auth_type}." + auth_type_config = { + k[len(type_prefix):].replace("-", "_"): v + for k, v in self.properties.items() + if k.startswith(type_prefix) + } + self._auth_manager = AuthManagerFactory.create(auth_impl or auth_type, auth_type_config) session.auth = AuthManagerAdapter(self._auth_manager) else: diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 1eb9f26a56..615e579d50 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -3167,3 +3167,135 @@ def test_load_table_without_storage_credentials( ) assert actual.metadata.model_dump() == expected.metadata.model_dump() assert actual == expected + + +# Tests for issue #3422: REST catalog auth cannot be configured via environment +# variables unless auth JSON strings are decoded. + + +def test_rest_catalog_with_basic_auth_as_json_string(rest_mock: Mocker) -> None: + """When auth arrives as a JSON string (e.g. from an environment variable), it should be decoded correctly.""" + import json + + rest_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + auth_dict = { + "type": "basic", + "basic": { + "username": "one", + "password": "two", + }, + } + catalog_properties = { + "uri": TEST_URI, + "auth": json.dumps(auth_dict), + } + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI + + encoded_user_pass = base64.b64encode(b"one:two").decode() + expected_auth_header = f"Basic {encoded_user_pass}" + assert rest_mock.last_request.headers["Authorization"] == expected_auth_header + + +def test_rest_catalog_with_oauth2_auth_as_json_string(requests_mock: Mocker) -> None: + """OAuth2 auth configured as a JSON string (e.g. from an environment variable) should work correctly.""" + import json + + requests_mock.post( + f"{TEST_URI}oauth2/token", + json={ + "access_token": "MTQ0NjJkZmQ5OTM2NDE1ZTZjNGZmZjI3", + "token_type": "Bearer", + "expires_in": 3600, + }, + status_code=200, + ) + requests_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + auth_dict = { + "type": "oauth2", + "oauth2": { + "client_id": "some_client_id", + "client_secret": "some_client_secret", + "token_url": f"{TEST_URI}oauth2/token", + }, + } + catalog_properties = { + "uri": TEST_URI, + "auth": json.dumps(auth_dict), + } + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI + + +def test_rest_catalog_with_invalid_json_auth_string() -> None: + """An auth value that is a string but not valid JSON should raise a descriptive ValueError.""" + with pytest.raises(ValueError, match="Failed to parse auth configuration as JSON"): + RestCatalog("rest", uri=TEST_URI, auth="not-valid-json") # type: ignore + + +def test_rest_catalog_with_basic_auth_flat_properties(rest_mock: Mocker) -> None: + """Auth configured via flattened env-var properties (e.g. PYICEBERG_CATALOG____AUTH__TYPE=basic) + should initialise the correct AuthManager. + + The env-var parser converts PYICEBERG_CATALOG____AUTH__TYPE=basic into the flat property + 'auth.type' = 'basic' and PYICEBERG_CATALOG____AUTH__BASIC__USERNAME=one into + 'auth.basic.username' = 'one'. + """ + rest_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + catalog_properties = { + "uri": TEST_URI, + # Flat properties as produced by the env-var config parser + "auth.type": "basic", + "auth.basic.username": "one", + "auth.basic.password": "two", + } + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI + + encoded_user_pass = base64.b64encode(b"one:two").decode() + expected_auth_header = f"Basic {encoded_user_pass}" + assert rest_mock.last_request.headers["Authorization"] == expected_auth_header + + +def test_rest_catalog_with_oauth2_auth_flat_properties(requests_mock: Mocker) -> None: + """OAuth2 auth configured via flattened env-var properties should work correctly. + + PYICEBERG_CATALOG____AUTH__OAUTH2__CLIENT_ID maps to 'auth.oauth2.client-id'. + The dash is normalised to an underscore ('client_id') when forwarding to OAuth2AuthManager. + """ + requests_mock.post( + f"{TEST_URI}oauth2/token", + json={ + "access_token": "MTQ0NjJkZmQ5OTM2NDE1ZTZjNGZmZjI3", + "token_type": "Bearer", + "expires_in": 3600, + }, + status_code=200, + ) + requests_mock.get( + f"{TEST_URI}v1/config", + json={"defaults": {}, "overrides": {}}, + status_code=200, + ) + catalog_properties = { + "uri": TEST_URI, + # Flat properties as produced by the env-var config parser (note: kebab-case keys) + "auth.type": "oauth2", + "auth.oauth2.client-id": "some_client_id", + "auth.oauth2.client-secret": "some_client_secret", + "auth.oauth2.token-url": f"{TEST_URI}oauth2/token", + } + catalog = RestCatalog("rest", **catalog_properties) # type: ignore + assert catalog.uri == TEST_URI From b2a2e462fee048f4f557ba60b5b423d5e51f8477 Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Thu, 28 May 2026 08:56:42 +0530 Subject: [PATCH 2/3] style: apply ruff format to dict comprehension --- pyiceberg/catalog/rest/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 76dea6cd0c..c2b7b37a17 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -476,9 +476,7 @@ def _create_session(self) -> Session: type_prefix = f"{AUTH}.{auth_type}." auth_type_config = { - k[len(type_prefix):].replace("-", "_"): v - for k, v in self.properties.items() - if k.startswith(type_prefix) + k[len(type_prefix) :].replace("-", "_"): v for k, v in self.properties.items() if k.startswith(type_prefix) } self._auth_manager = AuthManagerFactory.create(auth_impl or auth_type, auth_type_config) From 11a28349ec5427fc94cc47f8298abec34af53830 Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Thu, 28 May 2026 09:02:38 +0530 Subject: [PATCH 3/3] fix: remove unused type: ignore comments flagged by mypy --- tests/catalog/test_rest.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 615e579d50..f642c51bd3 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -3193,7 +3193,7 @@ def test_rest_catalog_with_basic_auth_as_json_string(rest_mock: Mocker) -> None: "uri": TEST_URI, "auth": json.dumps(auth_dict), } - catalog = RestCatalog("rest", **catalog_properties) # type: ignore + catalog = RestCatalog("rest", **catalog_properties) assert catalog.uri == TEST_URI encoded_user_pass = base64.b64encode(b"one:two").decode() @@ -3231,14 +3231,14 @@ def test_rest_catalog_with_oauth2_auth_as_json_string(requests_mock: Mocker) -> "uri": TEST_URI, "auth": json.dumps(auth_dict), } - catalog = RestCatalog("rest", **catalog_properties) # type: ignore + catalog = RestCatalog("rest", **catalog_properties) assert catalog.uri == TEST_URI def test_rest_catalog_with_invalid_json_auth_string() -> None: """An auth value that is a string but not valid JSON should raise a descriptive ValueError.""" with pytest.raises(ValueError, match="Failed to parse auth configuration as JSON"): - RestCatalog("rest", uri=TEST_URI, auth="not-valid-json") # type: ignore + RestCatalog("rest", uri=TEST_URI, auth="not-valid-json") def test_rest_catalog_with_basic_auth_flat_properties(rest_mock: Mocker) -> None: @@ -3261,7 +3261,7 @@ def test_rest_catalog_with_basic_auth_flat_properties(rest_mock: Mocker) -> None "auth.basic.username": "one", "auth.basic.password": "two", } - catalog = RestCatalog("rest", **catalog_properties) # type: ignore + catalog = RestCatalog("rest", **catalog_properties) assert catalog.uri == TEST_URI encoded_user_pass = base64.b64encode(b"one:two").decode() @@ -3297,5 +3297,5 @@ def test_rest_catalog_with_oauth2_auth_flat_properties(requests_mock: Mocker) -> "auth.oauth2.client-secret": "some_client_secret", "auth.oauth2.token-url": f"{TEST_URI}oauth2/token", } - catalog = RestCatalog("rest", **catalog_properties) # type: ignore + catalog = RestCatalog("rest", **catalog_properties) assert catalog.uri == TEST_URI