From 147a0af5e8bb25bb7ed8de12f5348cb13d8cccf6 Mon Sep 17 00:00:00 2001 From: Anas Seth Date: Sat, 4 Apr 2026 07:51:58 +0500 Subject: [PATCH 1/9] feat(extensions): authenticate GitHub-hosted catalog and download requests with GITHUB_TOKEN/GH_TOKEN --- extensions/EXTENSION-USER-GUIDE.md | 17 +++- src/specify_cli/extensions.py | 27 ++++++- tests/test_extensions.py | 126 +++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+), 4 deletions(-) diff --git a/extensions/EXTENSION-USER-GUIDE.md b/extensions/EXTENSION-USER-GUIDE.md index 190e263af2..ca8adfbaab 100644 --- a/extensions/EXTENSION-USER-GUIDE.md +++ b/extensions/EXTENSION-USER-GUIDE.md @@ -421,7 +421,7 @@ In addition to extension-specific environment variables (`SPECKIT_{EXT_ID}_*`), | Variable | Description | Default | |----------|-------------|---------| | `SPECKIT_CATALOG_URL` | Override the full catalog stack with a single URL (backward compat) | Built-in default stack | -| `GH_TOKEN` / `GITHUB_TOKEN` | GitHub API token for downloads | None | +| `GH_TOKEN` / `GITHUB_TOKEN` | GitHub token for authenticated requests to GitHub-hosted URLs (`raw.githubusercontent.com`, `github.com`, `api.github.com`). Required when your catalog JSON or extension ZIPs are hosted in a private GitHub repository. | None | #### Example: Using a custom catalog for testing @@ -433,6 +433,21 @@ export SPECKIT_CATALOG_URL="http://localhost:8000/catalog.json" export SPECKIT_CATALOG_URL="https://example.com/staging/catalog.json" ``` +#### Example: Using a private GitHub-hosted catalog + +```bash +# Authenticate with a token (gh CLI, PAT, or GITHUB_TOKEN in CI) +export GITHUB_TOKEN=$(gh auth token) + +# Search a private catalog added via `specify extension catalog add` +specify extension search jira + +# Install from a private catalog +specify extension add jira-sync +``` + +The token is attached automatically to requests targeting GitHub domains. Non-GitHub catalog URLs are always fetched without credentials. + --- ## Extension Catalogs diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 3420a7651b..096721ddae 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -1411,6 +1411,27 @@ def _validate_catalog_url(self, url: str) -> None: if not parsed.netloc: raise ValidationError("Catalog URL must be a valid URL with a host.") + def _make_request(self, url: str) -> "urllib.request.Request": + """Build a urllib Request, adding a GitHub auth header when available. + + Reads GITHUB_TOKEN or GH_TOKEN from the environment and attaches an + ``Authorization: token `` header for requests to GitHub-hosted + domains (``raw.githubusercontent.com``, ``github.com``, + ``api.github.com``). Non-GitHub URLs are returned as plain requests + so credentials are never leaked to third-party hosts. + """ + import os + import urllib.request + + headers: Dict[str, str] = {} + token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + if token and any( + host in url + for host in ("raw.githubusercontent.com", "github.com", "api.github.com") + ): + headers["Authorization"] = f"token {token}" + return urllib.request.Request(url, headers=headers) + def _load_catalog_config(self, config_path: Path) -> Optional[List[CatalogEntry]]: """Load catalog stack configuration from a YAML file. @@ -1601,7 +1622,7 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False # Fetch from network try: - with urllib.request.urlopen(entry.url, timeout=10) as response: + with urllib.request.urlopen(self._make_request(entry.url), timeout=10) as response: catalog_data = json.loads(response.read()) if "schema_version" not in catalog_data or "extensions" not in catalog_data: @@ -1718,7 +1739,7 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: import urllib.request import urllib.error - with urllib.request.urlopen(catalog_url, timeout=10) as response: + with urllib.request.urlopen(self._make_request(catalog_url), timeout=10) as response: catalog_data = json.loads(response.read()) # Validate catalog structure @@ -1861,7 +1882,7 @@ def download_extension(self, extension_id: str, target_dir: Optional[Path] = Non # Download the ZIP file try: - with urllib.request.urlopen(download_url, timeout=60) as response: + with urllib.request.urlopen(self._make_request(download_url), timeout=60) as response: zip_data = response.read() zip_path.write_bytes(zip_data) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 350b368eac..039dfc1f29 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -2142,6 +2142,132 @@ def test_clear_cache(self, temp_dir): assert not catalog.cache_file.exists() assert not catalog.cache_metadata_file.exists() + # --- _make_request / GitHub auth --- + + def _make_catalog(self, temp_dir): + project_dir = temp_dir / "project" + project_dir.mkdir() + (project_dir / ".specify").mkdir() + return ExtensionCatalog(project_dir) + + def test_make_request_no_token_no_auth_header(self, temp_dir, monkeypatch): + """Without a token, requests carry no Authorization header.""" + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + monkeypatch.delenv("GH_TOKEN", raising=False) + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") + assert "Authorization" not in req.headers + + def test_make_request_github_token_added_for_raw_githubusercontent(self, temp_dir, monkeypatch): + """GITHUB_TOKEN is attached for raw.githubusercontent.com URLs.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + monkeypatch.delenv("GH_TOKEN", raising=False) + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") + assert req.get_header("Authorization") == "token ghp_testtoken" + + def test_make_request_gh_token_fallback(self, temp_dir, monkeypatch): + """GH_TOKEN is used when GITHUB_TOKEN is absent.""" + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + monkeypatch.setenv("GH_TOKEN", "ghp_ghtoken") + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://github.com/org/repo/releases/download/v1/ext.zip") + assert req.get_header("Authorization") == "token ghp_ghtoken" + + def test_make_request_github_token_takes_precedence_over_gh_token(self, temp_dir, monkeypatch): + """GITHUB_TOKEN takes precedence over GH_TOKEN when both are set.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_primary") + monkeypatch.setenv("GH_TOKEN", "ghp_secondary") + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://api.github.com/repos/org/repo") + assert req.get_header("Authorization") == "token ghp_primary" + + def test_make_request_token_not_added_for_non_github_url(self, temp_dir, monkeypatch): + """Auth header is never attached to non-GitHub URLs to prevent credential leakage.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://internal.example.com/catalog.json") + assert "Authorization" not in req.headers + + def test_make_request_token_added_for_api_github_com(self, temp_dir, monkeypatch): + """GITHUB_TOKEN is attached for api.github.com URLs.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://api.github.com/repos/org/repo/releases/assets/1") + assert req.get_header("Authorization") == "token ghp_testtoken" + + def test_fetch_single_catalog_sends_auth_header(self, temp_dir, monkeypatch): + """_fetch_single_catalog passes Authorization header to urlopen for GitHub URLs.""" + from unittest.mock import patch, MagicMock + import io + + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = self._make_catalog(temp_dir) + + catalog_data = {"schema_version": "1.0", "extensions": {}} + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(catalog_data).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + captured = {} + + def fake_urlopen(req, timeout=None): + captured["req"] = req + return mock_response + + entry = CatalogEntry( + url="https://raw.githubusercontent.com/org/repo/main/catalog.json", + name="private", + priority=1, + install_allowed=True, + ) + + with patch("urllib.request.urlopen", fake_urlopen): + catalog._fetch_single_catalog(entry, force_refresh=True) + + assert "Authorization" in captured["req"].headers + assert captured["req"].headers["Authorization"] == "token ghp_testtoken" + + def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): + """download_extension passes Authorization header to urlopen for GitHub URLs.""" + from unittest.mock import patch, MagicMock + import zipfile, io + + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = self._make_catalog(temp_dir) + + # Build a minimal valid ZIP in memory + zip_buf = io.BytesIO() + with zipfile.ZipFile(zip_buf, "w") as zf: + zf.writestr("extension.yml", "id: test-ext\nname: Test\nversion: 1.0.0\n") + zip_bytes = zip_buf.getvalue() + + mock_response = MagicMock() + mock_response.read.return_value = zip_bytes + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + captured = {} + + def fake_urlopen(req, timeout=None): + captured["req"] = req + return mock_response + + ext_info = { + "id": "test-ext", + "name": "Test Extension", + "version": "1.0.0", + "download_url": "https://github.com/org/repo/releases/download/v1/test-ext.zip", + } + + with patch.object(catalog, "get_extension_info", return_value=ext_info), \ + patch("urllib.request.urlopen", fake_urlopen): + catalog.download_extension("test-ext", target_dir=temp_dir) + + assert "Authorization" in captured["req"].headers + assert captured["req"].headers["Authorization"] == "token ghp_testtoken" + # ===== CatalogEntry Tests ===== From c4ef1d1222fbec67814bebaf9189f4e94467bb12 Mon Sep 17 00:00:00 2001 From: Anas Seth Date: Wed, 8 Apr 2026 06:25:49 +0500 Subject: [PATCH 2/9] feat(extensions): replaced host in url substring check with proper hostname parsing & added 3 new spoofing tests -> Issue # 2037 --- src/specify_cli/extensions.py | 8 ++++---- tests/test_extensions.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 096721ddae..08268cd8ec 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -1422,13 +1422,13 @@ def _make_request(self, url: str) -> "urllib.request.Request": """ import os import urllib.request + from urllib.parse import urlparse headers: Dict[str, str] = {} token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") - if token and any( - host in url - for host in ("raw.githubusercontent.com", "github.com", "api.github.com") - ): + hostname = (urlparse(url).hostname or "").lower() + github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"} + if token and hostname in github_hosts: headers["Authorization"] = f"token {token}" return urllib.request.Request(url, headers=headers) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 039dfc1f29..7e388dce90 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -2189,6 +2189,27 @@ def test_make_request_token_not_added_for_non_github_url(self, temp_dir, monkeyp req = catalog._make_request("https://internal.example.com/catalog.json") assert "Authorization" not in req.headers + def test_make_request_token_not_added_for_github_lookalike_host(self, temp_dir, monkeypatch): + """Auth header is not attached to hosts that include github.com as a suffix.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://github.com.evil.com/org/repo/releases/download/v1/ext.zip") + assert "Authorization" not in req.headers + + def test_make_request_token_not_added_for_github_in_path(self, temp_dir, monkeypatch): + """Auth header is not attached when github.com appears only in the URL path.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://evil.example.com/github.com/org/repo/releases/download/v1/ext.zip") + assert "Authorization" not in req.headers + + def test_make_request_token_not_added_for_github_in_query(self, temp_dir, monkeypatch): + """Auth header is not attached when github.com appears only in the query string.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://evil.example.com/download?source=https://github.com/org/repo/v1/ext.zip") + assert "Authorization" not in req.headers + def test_make_request_token_added_for_api_github_com(self, temp_dir, monkeypatch): """GITHUB_TOKEN is attached for api.github.com URLs.""" monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") From eb4f8948deffec5dc5d50f531389195ca47124b1 Mon Sep 17 00:00:00 2001 From: Anas Seth Date: Wed, 8 Apr 2026 06:34:22 +0500 Subject: [PATCH 3/9] fix(extensions): harden GitHub token auth against URL spoofing and redirect leakage -> Issue # 2037 --- src/specify_cli/extensions.py | 36 ++++++++++++++++++++++++++++++++--- tests/test_extensions.py | 25 ++++++++++++++---------- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index 08268cd8ec..a4715f31cb 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -1432,6 +1432,36 @@ def _make_request(self, url: str) -> "urllib.request.Request": headers["Authorization"] = f"token {token}" return urllib.request.Request(url, headers=headers) + def _open_url(self, url: str, timeout: int = 10): + """Open a URL using _make_request, stripping auth on cross-host redirects. + + When the request carries an Authorization header, a custom redirect + handler is used to drop that header if the redirect target is not a + GitHub-hosted domain, preventing token leakage to CDNs or other + third-party hosts that GitHub may redirect to. + """ + import urllib.request + from urllib.parse import urlparse + + req = self._make_request(url) + + if not req.get_header("Authorization"): + return urllib.request.urlopen(req, timeout=timeout) + + _github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"} + + class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): + def redirect_request(_self, req, fp, code, msg, headers, newurl): + new_req = super().redirect_request(req, fp, code, msg, headers, newurl) + if new_req is not None: + hostname = (urlparse(newurl).hostname or "").lower() + if hostname not in _github_hosts: + new_req.headers.pop("Authorization", None) + return new_req + + opener = urllib.request.build_opener(_StripAuthOnRedirect) + return opener.open(req, timeout=timeout) + def _load_catalog_config(self, config_path: Path) -> Optional[List[CatalogEntry]]: """Load catalog stack configuration from a YAML file. @@ -1622,7 +1652,7 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False # Fetch from network try: - with urllib.request.urlopen(self._make_request(entry.url), timeout=10) as response: + with self._open_url(entry.url, timeout=10) as response: catalog_data = json.loads(response.read()) if "schema_version" not in catalog_data or "extensions" not in catalog_data: @@ -1739,7 +1769,7 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: import urllib.request import urllib.error - with urllib.request.urlopen(self._make_request(catalog_url), timeout=10) as response: + with self._open_url(catalog_url, timeout=10) as response: catalog_data = json.loads(response.read()) # Validate catalog structure @@ -1882,7 +1912,7 @@ def download_extension(self, extension_id: str, target_dir: Optional[Path] = Non # Download the ZIP file try: - with urllib.request.urlopen(self._make_request(download_url), timeout=60) as response: + with self._open_url(download_url, timeout=60) as response: zip_data = response.read() zip_path.write_bytes(zip_data) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 7e388dce90..1e84cee6a8 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -2218,9 +2218,8 @@ def test_make_request_token_added_for_api_github_com(self, temp_dir, monkeypatch assert req.get_header("Authorization") == "token ghp_testtoken" def test_fetch_single_catalog_sends_auth_header(self, temp_dir, monkeypatch): - """_fetch_single_catalog passes Authorization header to urlopen for GitHub URLs.""" + """_fetch_single_catalog passes Authorization header via opener for GitHub URLs.""" from unittest.mock import patch, MagicMock - import io monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") catalog = self._make_catalog(temp_dir) @@ -2232,11 +2231,14 @@ def test_fetch_single_catalog_sends_auth_header(self, temp_dir, monkeypatch): mock_response.__exit__ = MagicMock(return_value=False) captured = {} + mock_opener = MagicMock() - def fake_urlopen(req, timeout=None): + def fake_open(req, timeout=None): captured["req"] = req return mock_response + mock_opener.open.side_effect = fake_open + entry = CatalogEntry( url="https://raw.githubusercontent.com/org/repo/main/catalog.json", name="private", @@ -2244,11 +2246,10 @@ def fake_urlopen(req, timeout=None): install_allowed=True, ) - with patch("urllib.request.urlopen", fake_urlopen): + with patch("urllib.request.build_opener", return_value=mock_opener): catalog._fetch_single_catalog(entry, force_refresh=True) - assert "Authorization" in captured["req"].headers - assert captured["req"].headers["Authorization"] == "token ghp_testtoken" + assert captured["req"].get_header("Authorization") == "token ghp_testtoken" def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): """download_extension passes Authorization header to urlopen for GitHub URLs.""" @@ -2271,10 +2272,14 @@ def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): captured = {} - def fake_urlopen(req, timeout=None): + mock_opener = MagicMock() + + def fake_open(req, timeout=None): captured["req"] = req return mock_response + mock_opener.open.side_effect = fake_open + ext_info = { "id": "test-ext", "name": "Test Extension", @@ -2283,11 +2288,11 @@ def fake_urlopen(req, timeout=None): } with patch.object(catalog, "get_extension_info", return_value=ext_info), \ - patch("urllib.request.urlopen", fake_urlopen): + patch("urllib.request.build_opener", return_value=mock_opener): catalog.download_extension("test-ext", target_dir=temp_dir) - assert "Authorization" in captured["req"].headers - assert captured["req"].headers["Authorization"] == "token ghp_testtoken" + assert captured["req"].get_header("Authorization") == "token ghp_testtoken" + # ===== CatalogEntry Tests ===== From f32d0598db04acac2542f9e4241f7f61b3c47d1c Mon Sep 17 00:00:00 2001 From: Anas Seth Date: Thu, 9 Apr 2026 14:54:26 +0500 Subject: [PATCH 4/9] fix(presets): authenticate GitHub-hosted catalog and download requests with GITHUB_TOKEN/GH_TOKEN -> Issue # 2037 --- src/specify_cli/presets.py | 56 ++++++++++++++- tests/test_presets.py | 136 +++++++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+), 3 deletions(-) diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index 0c8bba1757..804904faf0 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -1178,6 +1178,56 @@ def _validate_catalog_url(self, url: str) -> None: "Catalog URL must be a valid URL with a host." ) + def _make_request(self, url: str) -> "urllib.request.Request": + """Build a urllib Request, adding a GitHub auth header when available. + + Reads GITHUB_TOKEN or GH_TOKEN from the environment and attaches an + ``Authorization: token `` header for requests to GitHub-hosted + domains (``raw.githubusercontent.com``, ``github.com``, + ``api.github.com``). Non-GitHub URLs are returned as plain requests + so credentials are never leaked to third-party hosts. + """ + import urllib.request + from urllib.parse import urlparse + + headers: Dict[str, str] = {} + token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + hostname = (urlparse(url).hostname or "").lower() + github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"} + if token and hostname in github_hosts: + headers["Authorization"] = f"token {token}" + return urllib.request.Request(url, headers=headers) + + def _open_url(self, url: str, timeout: int = 10): + """Open a URL using _make_request, stripping auth on cross-host redirects. + + When the request carries an Authorization header, a custom redirect + handler is used to drop that header if the redirect target is not a + GitHub-hosted domain, preventing token leakage to CDNs or other + third-party hosts that GitHub may redirect to. + """ + import urllib.request + from urllib.parse import urlparse + + req = self._make_request(url) + + if not req.get_header("Authorization"): + return urllib.request.urlopen(req, timeout=timeout) + + _github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"} + + class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): + def redirect_request(_self, req, fp, code, msg, headers, newurl): + new_req = super().redirect_request(req, fp, code, msg, headers, newurl) + if new_req is not None: + hostname = (urlparse(newurl).hostname or "").lower() + if hostname not in _github_hosts: + new_req.headers.pop("Authorization", None) + return new_req + + opener = urllib.request.build_opener(_StripAuthOnRedirect) + return opener.open(req, timeout=timeout) + def _load_catalog_config(self, config_path: Path) -> Optional[List[PresetCatalogEntry]]: """Load catalog stack configuration from a YAML file. @@ -1363,7 +1413,7 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool = import urllib.request import urllib.error - with urllib.request.urlopen(entry.url, timeout=10) as response: + with self._open_url(entry.url, timeout=10) as response: catalog_data = json.loads(response.read()) if ( @@ -1459,7 +1509,7 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: import urllib.request import urllib.error - with urllib.request.urlopen(catalog_url, timeout=10) as response: + with self._open_url(catalog_url, timeout=10) as response: catalog_data = json.loads(response.read()) if ( @@ -1620,7 +1670,7 @@ def download_pack( zip_path = target_dir / zip_filename try: - with urllib.request.urlopen(download_url, timeout=60) as response: + with self._open_url(download_url, timeout=60) as response: zip_data = response.read() zip_path.write_bytes(zip_data) diff --git a/tests/test_presets.py b/tests/test_presets.py index d22264f806..ea0c184401 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -1276,6 +1276,142 @@ def test_env_var_catalog_url(self, project_dir, monkeypatch): catalog = PresetCatalog(project_dir) assert catalog.get_catalog_url() == "https://custom.example.com/catalog.json" + # --- _make_request / GitHub auth --- + + def test_make_request_no_token_no_auth_header(self, project_dir, monkeypatch): + """Without a token, requests carry no Authorization header.""" + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + monkeypatch.delenv("GH_TOKEN", raising=False) + catalog = PresetCatalog(project_dir) + req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") + assert "Authorization" not in req.headers + + def test_make_request_github_token_added_for_github_url(self, project_dir, monkeypatch): + """GITHUB_TOKEN is attached for raw.githubusercontent.com URLs.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + monkeypatch.delenv("GH_TOKEN", raising=False) + catalog = PresetCatalog(project_dir) + req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") + assert req.get_header("Authorization") == "token ghp_testtoken" + + def test_make_request_gh_token_fallback(self, project_dir, monkeypatch): + """GH_TOKEN is used when GITHUB_TOKEN is absent.""" + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + monkeypatch.setenv("GH_TOKEN", "ghp_ghtoken") + catalog = PresetCatalog(project_dir) + req = catalog._make_request("https://github.com/org/repo/releases/download/v1/pack.zip") + assert req.get_header("Authorization") == "token ghp_ghtoken" + + def test_make_request_github_token_takes_precedence(self, project_dir, monkeypatch): + """GITHUB_TOKEN takes precedence over GH_TOKEN when both are set.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_primary") + monkeypatch.setenv("GH_TOKEN", "ghp_secondary") + catalog = PresetCatalog(project_dir) + req = catalog._make_request("https://api.github.com/repos/org/repo") + assert req.get_header("Authorization") == "token ghp_primary" + + def test_make_request_token_not_added_for_non_github_url(self, project_dir, monkeypatch): + """Auth header is never attached to non-GitHub URLs to prevent credential leakage.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = PresetCatalog(project_dir) + req = catalog._make_request("https://internal.example.com/catalog.json") + assert "Authorization" not in req.headers + + def test_make_request_token_not_added_for_github_lookalike_host(self, project_dir, monkeypatch): + """Auth header is not attached to hosts that include github.com as a suffix.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = PresetCatalog(project_dir) + req = catalog._make_request("https://github.com.evil.com/org/repo/releases/download/v1/pack.zip") + assert "Authorization" not in req.headers + + def test_make_request_token_not_added_for_github_in_path(self, project_dir, monkeypatch): + """Auth header is not attached when github.com appears only in the URL path.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = PresetCatalog(project_dir) + req = catalog._make_request("https://evil.example.com/github.com/org/repo/releases/download/v1/pack.zip") + assert "Authorization" not in req.headers + + def test_make_request_token_not_added_for_github_in_query(self, project_dir, monkeypatch): + """Auth header is not attached when github.com appears only in the query string.""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = PresetCatalog(project_dir) + req = catalog._make_request("https://evil.example.com/download?source=https://github.com/org/repo/v1/pack.zip") + assert "Authorization" not in req.headers + + def test_fetch_single_catalog_sends_auth_header(self, project_dir, monkeypatch): + """_fetch_single_catalog passes Authorization header via opener for GitHub URLs.""" + from unittest.mock import patch, MagicMock + + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = PresetCatalog(project_dir) + + catalog_data = {"schema_version": "1.0", "presets": {}} + mock_response = MagicMock() + mock_response.read.return_value = json.dumps(catalog_data).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + captured = {} + mock_opener = MagicMock() + + def fake_open(req, timeout=None): + captured["req"] = req + return mock_response + + mock_opener.open.side_effect = fake_open + + entry = PresetCatalogEntry( + url="https://raw.githubusercontent.com/org/repo/main/presets/catalog.json", + name="private", + priority=1, + install_allowed=True, + ) + + with patch("urllib.request.build_opener", return_value=mock_opener): + catalog._fetch_single_catalog(entry, force_refresh=True) + + assert captured["req"].get_header("Authorization") == "token ghp_testtoken" + + def test_download_pack_sends_auth_header(self, project_dir, monkeypatch): + """download_pack passes Authorization header via opener for GitHub URLs.""" + from unittest.mock import patch, MagicMock + + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = PresetCatalog(project_dir) + + zip_buf = __import__("io").BytesIO() + with zipfile.ZipFile(zip_buf, "w") as zf: + zf.writestr("preset.yml", "id: test-pack\nname: Test\nversion: 1.0.0\n") + zip_bytes = zip_buf.getvalue() + + mock_response = MagicMock() + mock_response.read.return_value = zip_bytes + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + captured = {} + mock_opener = MagicMock() + + def fake_open(req, timeout=None): + captured["req"] = req + return mock_response + + mock_opener.open.side_effect = fake_open + + pack_info = { + "id": "test-pack", + "name": "Test Pack", + "version": "1.0.0", + "download_url": "https://github.com/org/repo/releases/download/v1/test-pack.zip", + "_install_allowed": True, + } + + with patch.object(catalog, "get_pack_info", return_value=pack_info), \ + patch("urllib.request.build_opener", return_value=mock_opener): + catalog.download_pack("test-pack", target_dir=project_dir) + + assert captured["req"].get_header("Authorization") == "token ghp_testtoken" + # ===== Integration Tests ===== From 2ccd213f5177a6c0e5a9ce51603d1434c28a7acf Mon Sep 17 00:00:00 2001 From: Anas Seth Date: Thu, 9 Apr 2026 16:10:13 +0500 Subject: [PATCH 5/9] fix(presets): Updated presets README.md file -> Issue # 2037 --- presets/README.md | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/presets/README.md b/presets/README.md index dd3997b239..2ccf85f1d0 100644 --- a/presets/README.md +++ b/presets/README.md @@ -93,9 +93,25 @@ See [scaffold/](scaffold/) for a scaffold you can copy to create your own preset ## Environment Variables -| Variable | Description | -|----------|-------------| -| `SPECKIT_PRESET_CATALOG_URL` | Override the catalog URL (replaces all defaults) | +| Variable | Description | Default | +|----------|-------------|---------| +| `SPECKIT_PRESET_CATALOG_URL` | Override the full catalog stack with a single URL (replaces all defaults) | Built-in default stack | +| `GH_TOKEN` / `GITHUB_TOKEN` | GitHub token for authenticated requests to GitHub-hosted URLs (`raw.githubusercontent.com`, `github.com`, `api.github.com`). Required when your catalog JSON or preset ZIPs are hosted in a private GitHub repository. | None | + +#### Example: Using a private GitHub-hosted catalog + +```bash +# Authenticate with a token (gh CLI, PAT, or GITHUB_TOKEN in CI) +export GITHUB_TOKEN=$(gh auth token) + +# Search a private catalog added via `specify preset catalog add` +specify preset search my-template + +# Install from a private catalog +specify preset add my-template +``` + +The token is attached automatically to requests targeting GitHub domains. Non-GitHub catalog URLs are always fetched without credentials. ## Configuration Files From 8a918e63971d7e075f1e3f6e584cc26323c3971f Mon Sep 17 00:00:00 2001 From: Anas Seth Date: Mon, 13 Apr 2026 14:43:35 +0500 Subject: [PATCH 6/9] =?UTF-8?q?fix(extensions,presets):=20harden=20GitHub?= =?UTF-8?q?=20token=20auth=20=E2=80=94=20codeload=20host,=20redirect=20saf?= =?UTF-8?q?ety,=20shared=20helper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/specify_cli/_github_http.py | 65 +++++++++++++++++++++++++++++++++ src/specify_cli/extensions.py | 49 ++++--------------------- src/specify_cli/presets.py | 48 ++++-------------------- tests/test_extensions.py | 9 ++++- tests/test_presets.py | 7 ++++ 5 files changed, 94 insertions(+), 84 deletions(-) create mode 100644 src/specify_cli/_github_http.py diff --git a/src/specify_cli/_github_http.py b/src/specify_cli/_github_http.py new file mode 100644 index 0000000000..6822c3f665 --- /dev/null +++ b/src/specify_cli/_github_http.py @@ -0,0 +1,65 @@ +"""Shared GitHub-authenticated HTTP helpers. + +Used by both ExtensionCatalog and PresetCatalog to attach +GITHUB_TOKEN / GH_TOKEN credentials to requests targeting +GitHub-hosted domains, while preventing token leakage to +third-party hosts on redirects. +""" + +import os +import urllib.request +from urllib.parse import urlparse +from typing import Dict + +# GitHub-owned hostnames that should receive the Authorization header. +# Includes codeload.github.com because GitHub archive URL downloads +# (e.g. /archive/refs/tags/.zip) redirect there and require auth +# for private repositories. +GITHUB_HOSTS = frozenset({ + "raw.githubusercontent.com", + "github.com", + "api.github.com", + "codeload.github.com", +}) + + +def build_github_request(url: str) -> urllib.request.Request: + """Build a urllib Request, adding a GitHub auth header when available. + + Reads GITHUB_TOKEN or GH_TOKEN from the environment and attaches an + ``Authorization: token `` header when the target hostname is one + of the known GitHub-owned domains. Non-GitHub URLs are returned as plain + requests so credentials are never leaked to third-party hosts. + """ + headers: Dict[str, str] = {} + token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + hostname = (urlparse(url).hostname or "").lower() + if token and hostname in GITHUB_HOSTS: + headers["Authorization"] = f"token {token}" + return urllib.request.Request(url, headers=headers) + + +def open_github_url(url: str, timeout: int = 10): + """Open a URL with GitHub auth, stripping the header on cross-host redirects. + + When the request carries an Authorization header, a custom redirect + handler drops that header if the redirect target is not a GitHub-owned + domain, preventing token leakage to CDNs or other third-party hosts + that GitHub may redirect to (e.g. S3 for release asset downloads). + """ + req = build_github_request(url) + + if not req.get_header("Authorization"): + return urllib.request.urlopen(req, timeout=timeout) + + class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): + def redirect_request(_self, req, fp, code, msg, headers, newurl): + new_req = super().redirect_request(req, fp, code, msg, headers, newurl) + if new_req is not None: + hostname = (urlparse(newurl).hostname or "").lower() + if hostname not in GITHUB_HOSTS: + new_req.headers.pop("Authorization", None) + return new_req + + opener = urllib.request.build_opener(_StripAuthOnRedirect) + return opener.open(req, timeout=timeout) diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index a4715f31cb..a7f8fb6619 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -1414,53 +1414,18 @@ def _validate_catalog_url(self, url: str) -> None: def _make_request(self, url: str) -> "urllib.request.Request": """Build a urllib Request, adding a GitHub auth header when available. - Reads GITHUB_TOKEN or GH_TOKEN from the environment and attaches an - ``Authorization: token `` header for requests to GitHub-hosted - domains (``raw.githubusercontent.com``, ``github.com``, - ``api.github.com``). Non-GitHub URLs are returned as plain requests - so credentials are never leaked to third-party hosts. + Delegates to :func:`specify_cli._github_http.build_github_request`. """ - import os - import urllib.request - from urllib.parse import urlparse - - headers: Dict[str, str] = {} - token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") - hostname = (urlparse(url).hostname or "").lower() - github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"} - if token and hostname in github_hosts: - headers["Authorization"] = f"token {token}" - return urllib.request.Request(url, headers=headers) + from specify_cli._github_http import build_github_request + return build_github_request(url) def _open_url(self, url: str, timeout: int = 10): - """Open a URL using _make_request, stripping auth on cross-host redirects. + """Open a URL with GitHub auth, stripping the header on cross-host redirects. - When the request carries an Authorization header, a custom redirect - handler is used to drop that header if the redirect target is not a - GitHub-hosted domain, preventing token leakage to CDNs or other - third-party hosts that GitHub may redirect to. + Delegates to :func:`specify_cli._github_http.open_github_url`. """ - import urllib.request - from urllib.parse import urlparse - - req = self._make_request(url) - - if not req.get_header("Authorization"): - return urllib.request.urlopen(req, timeout=timeout) - - _github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"} - - class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): - def redirect_request(_self, req, fp, code, msg, headers, newurl): - new_req = super().redirect_request(req, fp, code, msg, headers, newurl) - if new_req is not None: - hostname = (urlparse(newurl).hostname or "").lower() - if hostname not in _github_hosts: - new_req.headers.pop("Authorization", None) - return new_req - - opener = urllib.request.build_opener(_StripAuthOnRedirect) - return opener.open(req, timeout=timeout) + from specify_cli._github_http import open_github_url + return open_github_url(url, timeout) def _load_catalog_config(self, config_path: Path) -> Optional[List[CatalogEntry]]: """Load catalog stack configuration from a YAML file. diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index 804904faf0..1c967787e2 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -1181,52 +1181,18 @@ def _validate_catalog_url(self, url: str) -> None: def _make_request(self, url: str) -> "urllib.request.Request": """Build a urllib Request, adding a GitHub auth header when available. - Reads GITHUB_TOKEN or GH_TOKEN from the environment and attaches an - ``Authorization: token `` header for requests to GitHub-hosted - domains (``raw.githubusercontent.com``, ``github.com``, - ``api.github.com``). Non-GitHub URLs are returned as plain requests - so credentials are never leaked to third-party hosts. + Delegates to :func:`specify_cli._github_http.build_github_request`. """ - import urllib.request - from urllib.parse import urlparse - - headers: Dict[str, str] = {} - token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") - hostname = (urlparse(url).hostname or "").lower() - github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"} - if token and hostname in github_hosts: - headers["Authorization"] = f"token {token}" - return urllib.request.Request(url, headers=headers) + from specify_cli._github_http import build_github_request + return build_github_request(url) def _open_url(self, url: str, timeout: int = 10): - """Open a URL using _make_request, stripping auth on cross-host redirects. + """Open a URL with GitHub auth, stripping the header on cross-host redirects. - When the request carries an Authorization header, a custom redirect - handler is used to drop that header if the redirect target is not a - GitHub-hosted domain, preventing token leakage to CDNs or other - third-party hosts that GitHub may redirect to. + Delegates to :func:`specify_cli._github_http.open_github_url`. """ - import urllib.request - from urllib.parse import urlparse - - req = self._make_request(url) - - if not req.get_header("Authorization"): - return urllib.request.urlopen(req, timeout=timeout) - - _github_hosts = {"raw.githubusercontent.com", "github.com", "api.github.com"} - - class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): - def redirect_request(_self, req, fp, code, msg, headers, newurl): - new_req = super().redirect_request(req, fp, code, msg, headers, newurl) - if new_req is not None: - hostname = (urlparse(newurl).hostname or "").lower() - if hostname not in _github_hosts: - new_req.headers.pop("Authorization", None) - return new_req - - opener = urllib.request.build_opener(_StripAuthOnRedirect) - return opener.open(req, timeout=timeout) + from specify_cli._github_http import open_github_url + return open_github_url(url, timeout) def _load_catalog_config(self, config_path: Path) -> Optional[List[PresetCatalogEntry]]: """Load catalog stack configuration from a YAML file. diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 1e84cee6a8..0394e2734d 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -2217,6 +2217,13 @@ def test_make_request_token_added_for_api_github_com(self, temp_dir, monkeypatch req = catalog._make_request("https://api.github.com/repos/org/repo/releases/assets/1") assert req.get_header("Authorization") == "token ghp_testtoken" + def test_make_request_token_added_for_codeload_github_com(self, temp_dir, monkeypatch): + """GITHUB_TOKEN is attached for codeload.github.com URLs (GitHub archive redirects).""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://codeload.github.com/org/repo/zip/refs/tags/v1.0.0") + assert req.get_header("Authorization") == "token ghp_testtoken" + def test_fetch_single_catalog_sends_auth_header(self, temp_dir, monkeypatch): """_fetch_single_catalog passes Authorization header via opener for GitHub URLs.""" from unittest.mock import patch, MagicMock @@ -2252,7 +2259,7 @@ def fake_open(req, timeout=None): assert captured["req"].get_header("Authorization") == "token ghp_testtoken" def test_download_extension_sends_auth_header(self, temp_dir, monkeypatch): - """download_extension passes Authorization header to urlopen for GitHub URLs.""" + """download_extension passes Authorization header via opener for GitHub URLs.""" from unittest.mock import patch, MagicMock import zipfile, io diff --git a/tests/test_presets.py b/tests/test_presets.py index ea0c184401..2e9fc22d67 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -1310,6 +1310,13 @@ def test_make_request_github_token_takes_precedence(self, project_dir, monkeypat req = catalog._make_request("https://api.github.com/repos/org/repo") assert req.get_header("Authorization") == "token ghp_primary" + def test_make_request_token_added_for_codeload_github_com(self, project_dir, monkeypatch): + """GITHUB_TOKEN is attached for codeload.github.com URLs (GitHub archive redirects).""" + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + catalog = PresetCatalog(project_dir) + req = catalog._make_request("https://codeload.github.com/org/repo/zip/refs/tags/v1.0.0") + assert req.get_header("Authorization") == "token ghp_testtoken" + def test_make_request_token_not_added_for_non_github_url(self, project_dir, monkeypatch): """Auth header is never attached to non-GitHub URLs to prevent credential leakage.""" monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") From 9c0dc2e1e5b8c89d3bf9fc68402565232066979e Mon Sep 17 00:00:00 2001 From: Anas Seth Date: Mon, 13 Apr 2026 14:57:24 +0500 Subject: [PATCH 7/9] fix(auth): strip whitespace from GITHUB_TOKEN/GH_TOKEN and fall back correctly when token is blank --- src/specify_cli/_github_http.py | 4 +++- tests/test_extensions.py | 16 ++++++++++++++++ tests/test_presets.py | 16 ++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/specify_cli/_github_http.py b/src/specify_cli/_github_http.py index 6822c3f665..3e26685fa9 100644 --- a/src/specify_cli/_github_http.py +++ b/src/specify_cli/_github_http.py @@ -32,7 +32,9 @@ def build_github_request(url: str) -> urllib.request.Request: requests so credentials are never leaked to third-party hosts. """ headers: Dict[str, str] = {} - token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + github_token = (os.environ.get("GITHUB_TOKEN") or "").strip() + gh_token = (os.environ.get("GH_TOKEN") or "").strip() + token = github_token or gh_token or None hostname = (urlparse(url).hostname or "").lower() if token and hostname in GITHUB_HOSTS: headers["Authorization"] = f"token {token}" diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 0394e2734d..b9a16f7567 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -2158,6 +2158,22 @@ def test_make_request_no_token_no_auth_header(self, temp_dir, monkeypatch): req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") assert "Authorization" not in req.headers + def test_make_request_whitespace_only_github_token_ignored(self, temp_dir, monkeypatch): + """A whitespace-only GITHUB_TOKEN is treated as unset.""" + monkeypatch.setenv("GITHUB_TOKEN", " ") + monkeypatch.delenv("GH_TOKEN", raising=False) + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") + assert "Authorization" not in req.headers + + def test_make_request_whitespace_github_token_falls_back_to_gh_token(self, temp_dir, monkeypatch): + """When GITHUB_TOKEN is whitespace-only, GH_TOKEN is used as fallback.""" + monkeypatch.setenv("GITHUB_TOKEN", " ") + monkeypatch.setenv("GH_TOKEN", "ghp_fallback") + catalog = self._make_catalog(temp_dir) + req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") + assert req.get_header("Authorization") == "token ghp_fallback" + def test_make_request_github_token_added_for_raw_githubusercontent(self, temp_dir, monkeypatch): """GITHUB_TOKEN is attached for raw.githubusercontent.com URLs.""" monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") diff --git a/tests/test_presets.py b/tests/test_presets.py index 2e9fc22d67..89d0344215 100644 --- a/tests/test_presets.py +++ b/tests/test_presets.py @@ -1286,6 +1286,22 @@ def test_make_request_no_token_no_auth_header(self, project_dir, monkeypatch): req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") assert "Authorization" not in req.headers + def test_make_request_whitespace_only_github_token_ignored(self, project_dir, monkeypatch): + """A whitespace-only GITHUB_TOKEN is treated as unset.""" + monkeypatch.setenv("GITHUB_TOKEN", " ") + monkeypatch.delenv("GH_TOKEN", raising=False) + catalog = PresetCatalog(project_dir) + req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") + assert "Authorization" not in req.headers + + def test_make_request_whitespace_github_token_falls_back_to_gh_token(self, project_dir, monkeypatch): + """When GITHUB_TOKEN is whitespace-only, GH_TOKEN is used as fallback.""" + monkeypatch.setenv("GITHUB_TOKEN", " ") + monkeypatch.setenv("GH_TOKEN", "ghp_fallback") + catalog = PresetCatalog(project_dir) + req = catalog._make_request("https://raw.githubusercontent.com/org/repo/main/catalog.json") + assert req.get_header("Authorization") == "token ghp_fallback" + def test_make_request_github_token_added_for_github_url(self, project_dir, monkeypatch): """GITHUB_TOKEN is attached for raw.githubusercontent.com URLs.""" monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") From 40539228a7e35a5d4c745265b6396d06aeda2f2b Mon Sep 17 00:00:00 2001 From: Anas Seth Date: Mon, 13 Apr 2026 15:00:57 +0500 Subject: [PATCH 8/9] refactor(auth): lift _StripAuthOnRedirect to module level in _github_http --- src/specify_cli/_github_http.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/specify_cli/_github_http.py b/src/specify_cli/_github_http.py index 3e26685fa9..1d4d3d6b32 100644 --- a/src/specify_cli/_github_http.py +++ b/src/specify_cli/_github_http.py @@ -41,6 +41,23 @@ def build_github_request(url: str) -> urllib.request.Request: return urllib.request.Request(url, headers=headers) +class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): + """Redirect handler that drops the Authorization header when leaving GitHub. + + Prevents token leakage to CDNs or other third-party hosts that GitHub + may redirect to (e.g. S3 for release asset downloads, objects.githubusercontent.com). + Auth is preserved as long as the redirect target remains within GITHUB_HOSTS. + """ + + def redirect_request(self, req, fp, code, msg, headers, newurl): + new_req = super().redirect_request(req, fp, code, msg, headers, newurl) + if new_req is not None: + hostname = (urlparse(newurl).hostname or "").lower() + if hostname not in GITHUB_HOSTS: + new_req.headers.pop("Authorization", None) + return new_req + + def open_github_url(url: str, timeout: int = 10): """Open a URL with GitHub auth, stripping the header on cross-host redirects. @@ -54,14 +71,5 @@ def open_github_url(url: str, timeout: int = 10): if not req.get_header("Authorization"): return urllib.request.urlopen(req, timeout=timeout) - class _StripAuthOnRedirect(urllib.request.HTTPRedirectHandler): - def redirect_request(_self, req, fp, code, msg, headers, newurl): - new_req = super().redirect_request(req, fp, code, msg, headers, newurl) - if new_req is not None: - hostname = (urlparse(newurl).hostname or "").lower() - if hostname not in GITHUB_HOSTS: - new_req.headers.pop("Authorization", None) - return new_req - opener = urllib.request.build_opener(_StripAuthOnRedirect) return opener.open(req, timeout=timeout) From 1d63f2db21a376125f2f066f49afa55bcfa72108 Mon Sep 17 00:00:00 2001 From: Anas Seth Date: Mon, 13 Apr 2026 19:13:06 +0500 Subject: [PATCH 9/9] =?UTF-8?q?fix(lint):=20resolve=20ruff=20F821/F401=20e?= =?UTF-8?q?rrors=20=E2=80=94=20drop=20stale=20urllib=20annotations=20and?= =?UTF-8?q?=20unused=20imports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/specify_cli/extensions.py | 5 +---- src/specify_cli/presets.py | 9 +-------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/specify_cli/extensions.py b/src/specify_cli/extensions.py index a7f8fb6619..f5ac0ef0ab 100644 --- a/src/specify_cli/extensions.py +++ b/src/specify_cli/extensions.py @@ -1411,7 +1411,7 @@ def _validate_catalog_url(self, url: str) -> None: if not parsed.netloc: raise ValidationError("Catalog URL must be a valid URL with a host.") - def _make_request(self, url: str) -> "urllib.request.Request": + def _make_request(self, url: str): """Build a urllib Request, adding a GitHub auth header when available. Delegates to :func:`specify_cli._github_http.build_github_request`. @@ -1583,7 +1583,6 @@ def _fetch_single_catalog(self, entry: CatalogEntry, force_refresh: bool = False Raises: ExtensionError: If catalog cannot be fetched or has invalid format """ - import urllib.request import urllib.error # Determine cache file paths (backward compat for default catalog) @@ -1731,7 +1730,6 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: catalog_url = self.get_catalog_url() try: - import urllib.request import urllib.error with self._open_url(catalog_url, timeout=10) as response: @@ -1845,7 +1843,6 @@ def download_extension(self, extension_id: str, target_dir: Optional[Path] = Non Raises: ExtensionError: If extension not found or download fails """ - import urllib.request import urllib.error # Get extension info from catalog diff --git a/src/specify_cli/presets.py b/src/specify_cli/presets.py index 1c967787e2..56ca603cf3 100644 --- a/src/specify_cli/presets.py +++ b/src/specify_cli/presets.py @@ -1178,7 +1178,7 @@ def _validate_catalog_url(self, url: str) -> None: "Catalog URL must be a valid URL with a host." ) - def _make_request(self, url: str) -> "urllib.request.Request": + def _make_request(self, url: str): """Build a urllib Request, adding a GitHub auth header when available. Delegates to :func:`specify_cli._github_http.build_github_request`. @@ -1376,9 +1376,6 @@ def _fetch_single_catalog(self, entry: PresetCatalogEntry, force_refresh: bool = pass try: - import urllib.request - import urllib.error - with self._open_url(entry.url, timeout=10) as response: catalog_data = json.loads(response.read()) @@ -1472,9 +1469,6 @@ def fetch_catalog(self, force_refresh: bool = False) -> Dict[str, Any]: pass try: - import urllib.request - import urllib.error - with self._open_url(catalog_url, timeout=10) as response: catalog_data = json.loads(response.read()) @@ -1594,7 +1588,6 @@ def download_pack( Raises: PresetError: If pack not found or download fails """ - import urllib.request import urllib.error pack_info = self.get_pack_info(pack_id)