ScrappyCocco · dpashutskii · May 7, 2026 · ScrappyCocco · May 7, 2026 · ScrappyCocco
diff --git a/howlongtobeatpy/howlongtobeatpy/HTMLRequests.py b/howlongtobeatpy/howlongtobeatpy/HTMLRequests.py
@@ -75,7 +75,7 @@ def __extract_search_url_script(self, script_content: str):
 
 
 class SearchAuthToken:
-    search_url = "api/s"
+    search_url = "api/bleed"
     search_url_endpoint = "/init"
     auth_token = None
     auth_key = None
@@ -105,8 +105,12 @@ class HTMLRequests:
     BASE_URL = 'https://howlongtobeat.com/'
     REFERER_HEADER = BASE_URL
     GAME_URL = BASE_URL + "game"
-    # Static search url to use in case it can't be extracted from JS code
-    SEARCH_URL = BASE_URL + "api/s/"
+    # Static search url used as a fallback if extraction from JS fails.
+    # HLTB rotates this name periodically (api/find -> api/finder -> api/bleed,
+    # current as of 2026-05). The runtime extraction in
+    # send_website_request_getcode is the source of truth — this is just
+    # a backstop.
+    SEARCH_URL = BASE_URL + "api/bleed"
     HTML_PARSER = 'html.parser'
 
     @staticmethod
@@ -347,21 +351,23 @@ def send_website_request_getcode(parse_all_scripts: bool, user_agent):
         """
         Function that send a request to howlongtobeat to scrape the correct search url
         @return: The search informations to use in the request
+
+        Note: ``parse_all_scripts`` is kept for backward compatibility but no
+        longer changes which scripts are inspected. HLTB used to bundle the
+        relevant code under ``_app-*.js``, but the modern (Turbopack) build
+        emits opaque chunk names like ``0-~-0up.q3_p0.js``, so a name-based
+        filter is no longer reliable — we iterate every ``<script src>`` tag
+        and stop at the first one that yields a ``search_url``.
         """
         # Make the post request and return the result if is valid
         headers = HTMLRequests.get_title_request_headers(user_agent)
         resp = requests.get(HTMLRequests.BASE_URL, headers=headers, timeout=60)
         if resp.status_code == 200 and resp.text is not None:
             # Parse the HTML content using BeautifulSoup
             soup = BeautifulSoup(resp.text, HTMLRequests.HTML_PARSER)
-            # Find all <script> tags with a src attribute containing the substring
             scripts = soup.find_all('script', src=True)
-            if parse_all_scripts:
-                matching_scripts = [script['src'] for script in scripts]
-            else:
-                matching_scripts = [script['src'] for script in scripts if '_app-' in script['src']]
-            for script_url in matching_scripts:
-                script_url = HTMLRequests.BASE_URL + script_url
+            for script in scripts:
+                script_url = HTMLRequests.BASE_URL + script['src']
                 script_resp = requests.get(script_url, headers=headers, timeout=60)
                 if script_resp.status_code == 200 and script_resp.text is not None:
                     search_info = SearchInformations(script_resp.text)
@@ -374,36 +380,31 @@ async def async_send_website_request_getcode(parse_all_scripts: bool, user_agent
         """
         Function that send a request to howlongtobeat to scrape the correct search url
         @return: The search informations to use in the request
+
+        See ``send_website_request_getcode`` for why ``parse_all_scripts`` is
+        no longer used.
         """
         # Make the post request and return the result if is valid
         headers = HTMLRequests.get_title_request_headers(user_agent)
         timeout = aiohttp.ClientTimeout(total=60)
         async with aiohttp.ClientSession() as session:
             async with session.get(HTMLRequests.BASE_URL, headers=headers, timeout=timeout) as resp:
-                if resp is not None and resp.status == 200:
-                    resp_text = await resp.text()
-                    # Parse the HTML content using BeautifulSoup
-                    soup = BeautifulSoup(resp_text, HTMLRequests.HTML_PARSER)
-                    # Find all <script> tags with a src attribute containing the substring
-                    scripts = soup.find_all('script', src=True)
-                    if parse_all_scripts:
-                        matching_scripts = [script['src'] for script in scripts]
-                    else:
-                        matching_scripts = [script['src'] for script in scripts if '_app-' in script['src']]
-                    for script_url in matching_scripts:
-                        script_url = HTMLRequests.BASE_URL + script_url
-                        async with aiohttp.ClientSession() as session:
-                            async with session.get(script_url, headers=headers, timeout=timeout) as script_resp:
-                                if script_resp is not None and resp.status == 200:
-                                    script_resp_text = await script_resp.text()
-                                    search_info = SearchInformations(script_resp_text)
-                                    if search_info.search_url is not None:
-                                        # The api key is necessary
-                                        return search_info
-                                else:
-                                    return None
-                else:
+                if resp is None or resp.status != 200:
                     return None
+                resp_text = await resp.text()
+                soup = BeautifulSoup(resp_text, HTMLRequests.HTML_PARSER)
+                scripts = soup.find_all('script', src=True)
+                for script in scripts:
+                    script_url = HTMLRequests.BASE_URL + script['src']
+                    async with aiohttp.ClientSession() as inner_session:
+                        async with inner_session.get(script_url, headers=headers, timeout=timeout) as script_resp:
+                            if script_resp is None or script_resp.status != 200:
+                                continue
+                            script_resp_text = await script_resp.text()
+                            search_info = SearchInformations(script_resp_text)
+                            if search_info.search_url is not None:
+                                return search_info
+                return None
 
     @staticmethod
     def get_auth_token_request_params():

diff --git a/howlongtobeatpy/setup.py b/howlongtobeatpy/setup.py
@@ -4,7 +4,7 @@
     long_description = fh.read()
 
 setup(name='howlongtobeatpy',
-      version='1.0.21',
+      version='1.0.22',
       packages=find_packages(exclude=['tests']),
       description='A Python API for How Long to Beat',
       long_description=long_description,

diff --git a/howlongtobeatpy/tests/test_search_url_extraction.py b/howlongtobeatpy/tests/test_search_url_extraction.py
@@ -0,0 +1,49 @@
+"""
+Hermetic unit tests for the search-URL discovery regex.
+
+These don't hit the network and are intended to catch silent regressions
+when HLTB rotates their endpoint name (which they do periodically — most
+recently from /api/finder to /api/bleed).
+"""
+from unittest import TestCase
+
+from howlongtobeatpy.HTMLRequests import SearchInformations
+
+
+# Real shape captured from HLTB's Turbopack chunk on 2026-05-07. If HLTB
+# rotates the endpoint name again, update this fixture to the new shape and
+# the tests should still pass without code changes.
+BLEED_CHUNK_SHAPE = (
+    '...he:!(u?.user_id>0)};a&&(s[a]=l);'
+    'let i=await fetch("/api/bleed",{method:"POST",'
+    'headers:{"Content-Type":"application/json",'
+    '"x-auth-token":t,"x-hp-key":a,"x-hp-val":l},'
+    'body:JSON.stringify(s)});'
+    'if(403===i.status&&!e){...'
+)
+
+
+class TestSearchUrlExtraction(TestCase):
+
+    def test_extracts_current_api_bleed_endpoint(self):
+        info = SearchInformations(BLEED_CHUNK_SHAPE)
+        self.assertEqual("api/bleed", info.search_url)
+
+    def test_extracts_a_hypothetical_future_endpoint(self):
+        future = BLEED_CHUNK_SHAPE.replace("/api/bleed", "/api/somethingnew")
+        info = SearchInformations(future)
+        self.assertEqual("api/somethingnew", info.search_url)
+
+    def test_returns_none_when_no_post_fetch_present(self):
+        info = SearchInformations('var x = 1; console.log("hello")')
+        self.assertIsNone(info.search_url)
+
+    def test_ignores_get_only_fetches(self):
+        get_only = 'fetch("/api/bleed",{method:"GET"})'
+        info = SearchInformations(get_only)
+        self.assertIsNone(info.search_url)
+
+    def test_extracts_root_path_from_versioned_endpoint(self):
+        versioned = BLEED_CHUNK_SHAPE.replace("/api/bleed", "/api/bleed/v2")
+        info = SearchInformations(versioned)
+        self.assertEqual("api/bleed", info.search_url)