@@ -467,9 +467,13 @@ def _encode_userinfo_part(text, maximal=True):
467467)
468468# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
469469
470+ NO_QUERY_PLUS_SCHEMES = set ()
470471
471- def register_scheme (text , uses_netloc = True , default_port = None ):
472- # type: (Text, bool, Optional[int]) -> None
472+
473+ def register_scheme (
474+ text , uses_netloc = True , default_port = None , query_plus_is_space = True
475+ ):
476+ # type: (Text, bool, Optional[int], bool) -> None
473477 """Registers new scheme information, resulting in correct port and
474478 slash behavior from the URL object. There are dozens of standard
475479 schemes preregistered, so this function is mostly meant for
@@ -485,6 +489,8 @@ def register_scheme(text, uses_netloc=True, default_port=None):
485489 not. Defaults to True.
486490 default_port: The default port, if any, for
487491 netloc-using schemes.
492+ query_plus_is_space: If true, a "+" in the query string should be
493+ decoded as a space by DecodedURL.
488494
489495 .. _file an issue: https://github.com/mahmoud/hyperlink/issues
490496 """
@@ -510,6 +516,9 @@ def register_scheme(text, uses_netloc=True, default_port=None):
510516 else :
511517 raise ValueError ("uses_netloc expected bool, not: %r" % uses_netloc )
512518
519+ if not query_plus_is_space :
520+ NO_QUERY_PLUS_SCHEMES .add (text )
521+
513522 return
514523
515524
@@ -1969,6 +1978,16 @@ def remove(
19691978_EMPTY_URL = URL ()
19701979
19711980
1981+ def _replace_plus (text ):
1982+ # type: (Text) -> Text
1983+ return text .replace ("+" , "%20" )
1984+
1985+
1986+ def _no_op (text ):
1987+ # type: (Text) -> Text
1988+ return text
1989+
1990+
19721991class DecodedURL (object ):
19731992 """
19741993 :class:`DecodedURL` is a type designed to act as a higher-level
@@ -1998,6 +2017,9 @@ class DecodedURL(object):
19982017 lazy: Set to True to avoid pre-decode all parts of the URL to check for
19992018 validity.
20002019 Defaults to False.
2020+ query_plus_is_space: + characters in the query string should be treated
2021+ as spaces when decoding. If unspecified, the default is taken from
2022+ the scheme.
20012023
20022024 .. note::
20032025
@@ -2012,18 +2034,21 @@ class DecodedURL(object):
20122034 .. versionadded:: 18.0.0
20132035 """
20142036
2015- def __init__ (self , url = _EMPTY_URL , lazy = False ):
2016- # type: (URL, bool) -> None
2037+ def __init__ (self , url = _EMPTY_URL , lazy = False , query_plus_is_space = None ):
2038+ # type: (URL, bool, Optional[bool] ) -> None
20172039 self ._url = url
2040+ if query_plus_is_space is None :
2041+ query_plus_is_space = url .scheme not in NO_QUERY_PLUS_SCHEMES
2042+ self ._query_plus_is_space = query_plus_is_space
20182043 if not lazy :
20192044 # cache the following, while triggering any decoding
20202045 # issues with decodable fields
20212046 self .host , self .userinfo , self .path , self .query , self .fragment
20222047 return
20232048
20242049 @classmethod
2025- def from_text (cls , text , lazy = False ):
2026- # type: (Text, bool) -> DecodedURL
2050+ def from_text (cls , text , lazy = False , query_plus_is_space = None ):
2051+ # type: (Text, bool, Optional[bool] ) -> DecodedURL
20272052 """\
20282053 Make a `DecodedURL` instance from any text string containing a URL.
20292054
@@ -2034,7 +2059,7 @@ def from_text(cls, text, lazy=False):
20342059 Defaults to True.
20352060 """
20362061 _url = URL .from_text (text )
2037- return cls (_url , lazy = lazy )
2062+ return cls (_url , lazy = lazy , query_plus_is_space = query_plus_is_space )
20382063
20392064 @property
20402065 def encoded_url (self ):
@@ -2059,22 +2084,34 @@ def to_iri(self):
20592084 "Passthrough to :meth:`~hyperlink.URL.to_iri()`"
20602085 return self ._url .to_iri ()
20612086
2087+ def _clone (self , url ):
2088+ # type: (URL) -> DecodedURL
2089+ return self .__class__ (
2090+ url ,
2091+ # TODO: propagate laziness?
2092+ query_plus_is_space = self ._query_plus_is_space ,
2093+ )
2094+
20622095 def click (self , href = u"" ):
20632096 # type: (Union[Text, URL, DecodedURL]) -> DecodedURL
20642097 """Return a new DecodedURL wrapping the result of
20652098 :meth:`~hyperlink.URL.click()`
20662099 """
20672100 if isinstance (href , DecodedURL ):
20682101 href = href ._url
2069- return self .__class__ (self ._url .click (href = href ))
2102+ return self ._clone (
2103+ self ._url .click (href = href ),
2104+ )
20702105
20712106 def sibling (self , segment ):
20722107 # type: (Text) -> DecodedURL
20732108 """Automatically encode any reserved characters in *segment* and
20742109 return a new `DecodedURL` wrapping the result of
20752110 :meth:`~hyperlink.URL.sibling()`
20762111 """
2077- return self .__class__ (self ._url .sibling (_encode_reserved (segment )))
2112+ return self ._clone (
2113+ self ._url .sibling (_encode_reserved (segment )),
2114+ )
20782115
20792116 def child (self , * segments ):
20802117 # type: (Text) -> DecodedURL
@@ -2085,7 +2122,7 @@ def child(self, *segments):
20852122 if not segments :
20862123 return self
20872124 new_segs = [_encode_reserved (s ) for s in segments ]
2088- return self .__class__ (self ._url .child (* new_segs ))
2125+ return self ._clone (self ._url .child (* new_segs ))
20892126
20902127 def normalize (
20912128 self ,
@@ -2101,7 +2138,7 @@ def normalize(
21012138 """Return a new `DecodedURL` wrapping the result of
21022139 :meth:`~hyperlink.URL.normalize()`
21032140 """
2104- return self .__class__ (
2141+ return self ._clone (
21052142 self ._url .normalize (
21062143 scheme , host , path , query , fragment , userinfo , percents
21072144 )
@@ -2148,11 +2185,18 @@ def path(self):
21482185 def query (self ):
21492186 # type: () -> QueryPairs
21502187 if not hasattr (self , "_query" ):
2188+ if self ._query_plus_is_space :
2189+ predecode = _replace_plus
2190+ else :
2191+ predecode = _no_op
2192+
21512193 self ._query = cast (
21522194 QueryPairs ,
21532195 tuple (
21542196 tuple (
2155- _percent_decode (x , raise_subencoding_exc = True )
2197+ _percent_decode (
2198+ predecode (x ), raise_subencoding_exc = True
2199+ )
21562200 if x is not None
21572201 else None
21582202 for x in (k , v )
@@ -2248,7 +2292,7 @@ def replace(
22482292 userinfo = userinfo_text ,
22492293 uses_netloc = uses_netloc ,
22502294 )
2251- return self .__class__ (url = new_url )
2295+ return self ._clone (url = new_url )
22522296
22532297 def get (self , name ):
22542298 # type: (Text) -> List[Optional[Text]]
0 commit comments