You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

949 lines
30 KiB

4 years ago
  1. import warnings
  2. from collections.abc import Mapping, Sequence
  3. from ipaddress import ip_address
  4. from urllib.parse import (SplitResult, parse_qsl,
  5. urljoin, urlsplit, urlunsplit)
  6. from multidict import MultiDict, MultiDictProxy
  7. import idna
  8. from .quoting import _Quoter, _Unquoter
  9. __version__ = '1.2.6'
  10. __all__ = ('URL',)
  11. # is_leaf()
  12. DEFAULT_PORTS = {
  13. 'http': 80,
  14. 'https': 443,
  15. 'ws': 80,
  16. 'wss': 443,
  17. }
  18. sentinel = object()
  19. class cached_property:
  20. """Use as a class method decorator. It operates almost exactly like
  21. the Python `@property` decorator, but it puts the result of the
  22. method it decorates into the instance dict after the first call,
  23. effectively replacing the function it decorates with an instance
  24. variable. It is, in Python parlance, a data descriptor.
  25. """
  26. def __init__(self, wrapped):
  27. self.wrapped = wrapped
  28. try:
  29. self.__doc__ = wrapped.__doc__
  30. except AttributeError: # pragma: no cover
  31. self.__doc__ = ""
  32. self.name = wrapped.__name__
  33. def __get__(self, inst, owner, _sentinel=sentinel):
  34. if inst is None:
  35. return self
  36. val = inst._cache.get(self.name, _sentinel)
  37. if val is not _sentinel:
  38. return val
  39. val = self.wrapped(inst)
  40. inst._cache[self.name] = val
  41. return val
  42. def __set__(self, inst, value):
  43. raise AttributeError("cached property is read-only")
  44. class URL:
  45. # Don't derive from str
  46. # follow pathlib.Path design
  47. # probably URL will not suffer from pathlib problems:
  48. # it's intended for libraries like aiohttp,
  49. # not to be passed into standard library functions like os.open etc.
  50. # URL grammar (RFC 3986)
  51. # pct-encoded = "%" HEXDIG HEXDIG
  52. # reserved = gen-delims / sub-delims
  53. # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
  54. # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
  55. # / "*" / "+" / "," / ";" / "="
  56. # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
  57. # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
  58. # hier-part = "//" authority path-abempty
  59. # / path-absolute
  60. # / path-rootless
  61. # / path-empty
  62. # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  63. # authority = [ userinfo "@" ] host [ ":" port ]
  64. # userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
  65. # host = IP-literal / IPv4address / reg-name
  66. # IP-literal = "[" ( IPv6address / IPvFuture ) "]"
  67. # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
  68. # IPv6address = 6( h16 ":" ) ls32
  69. # / "::" 5( h16 ":" ) ls32
  70. # / [ h16 ] "::" 4( h16 ":" ) ls32
  71. # / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
  72. # / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
  73. # / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
  74. # / [ *4( h16 ":" ) h16 ] "::" ls32
  75. # / [ *5( h16 ":" ) h16 ] "::" h16
  76. # / [ *6( h16 ":" ) h16 ] "::"
  77. # ls32 = ( h16 ":" h16 ) / IPv4address
  78. # ; least-significant 32 bits of address
  79. # h16 = 1*4HEXDIG
  80. # ; 16 bits of address represented in hexadecimal
  81. # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
  82. # dec-octet = DIGIT ; 0-9
  83. # / %x31-39 DIGIT ; 10-99
  84. # / "1" 2DIGIT ; 100-199
  85. # / "2" %x30-34 DIGIT ; 200-249
  86. # / "25" %x30-35 ; 250-255
  87. # reg-name = *( unreserved / pct-encoded / sub-delims )
  88. # port = *DIGIT
  89. # path = path-abempty ; begins with "/" or is empty
  90. # / path-absolute ; begins with "/" but not "//"
  91. # / path-noscheme ; begins with a non-colon segment
  92. # / path-rootless ; begins with a segment
  93. # / path-empty ; zero characters
  94. # path-abempty = *( "/" segment )
  95. # path-absolute = "/" [ segment-nz *( "/" segment ) ]
  96. # path-noscheme = segment-nz-nc *( "/" segment )
  97. # path-rootless = segment-nz *( "/" segment )
  98. # path-empty = 0<pchar>
  99. # segment = *pchar
  100. # segment-nz = 1*pchar
  101. # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
  102. # ; non-zero-length segment without any colon ":"
  103. # pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
  104. # query = *( pchar / "/" / "?" )
  105. # fragment = *( pchar / "/" / "?" )
  106. # URI-reference = URI / relative-ref
  107. # relative-ref = relative-part [ "?" query ] [ "#" fragment ]
  108. # relative-part = "//" authority path-abempty
  109. # / path-absolute
  110. # / path-noscheme
  111. # / path-empty
  112. # absolute-URI = scheme ":" hier-part [ "?" query ]
  113. __slots__ = ('_cache', '_val')
  114. _QUOTER = _Quoter()
  115. _PATH_QUOTER = _Quoter(safe='@:', protected='/+')
  116. _QUERY_QUOTER = _Quoter(safe='?/:@', protected='=+&;', qs=True)
  117. _QUERY_PART_QUOTER = _Quoter(safe='?/:@', qs=True)
  118. _FRAGMENT_QUOTER = _Quoter(safe='?/:@')
  119. _UNQUOTER = _Unquoter()
  120. _PATH_UNQUOTER = _Unquoter(unsafe='+')
  121. _QS_UNQUOTER = _Unquoter(qs=True)
  122. def __new__(cls, val='', *, encoded=False, strict=None):
  123. if strict is not None: # pragma: no cover
  124. warnings.warn("strict parameter is ignored")
  125. if type(val) is cls:
  126. return val
  127. if type(val) is str:
  128. val = urlsplit(val)
  129. elif type(val) is SplitResult:
  130. if not encoded:
  131. raise ValueError("Cannot apply decoding to SplitResult")
  132. elif isinstance(val, str):
  133. val = urlsplit(str(val))
  134. else:
  135. raise TypeError("Constructor parameter should be str")
  136. if not encoded:
  137. if not val[1]: # netloc
  138. netloc = ''
  139. else:
  140. host = val.hostname
  141. if host is None:
  142. raise ValueError(
  143. "Invalid URL: host is required for abolute urls.")
  144. netloc = cls._make_netloc(val.username,
  145. val.password,
  146. host,
  147. val.port,
  148. encode=True)
  149. path = cls._PATH_QUOTER(val[2])
  150. if netloc:
  151. path = cls._normalize_path(path)
  152. query = cls._QUERY_QUOTER(val[3])
  153. fragment = cls._FRAGMENT_QUOTER(val[4])
  154. val = SplitResult(val[0], netloc, path, query, fragment)
  155. self = object.__new__(cls)
  156. self._val = val
  157. self._cache = {}
  158. return self
  159. @classmethod
  160. def build(cls, *, scheme='', user='', password='', host='', port=None,
  161. path='', query=None, query_string='', fragment='',
  162. encoded=False):
  163. """Creates and returns a new URL"""
  164. if host and not scheme:
  165. raise ValueError(
  166. 'Can\'t build URL with "host" but without "scheme".')
  167. if not host and scheme:
  168. raise ValueError(
  169. 'Can\'t build URL with "scheme" but without "host".')
  170. if query and query_string:
  171. raise ValueError(
  172. "Only one of \"query\" or \"query_string\" should be passed")
  173. if not user and not password and not host and not port:
  174. netloc = ''
  175. else:
  176. netloc = cls._make_netloc(user, password, host, port,
  177. encode=not encoded)
  178. if not encoded:
  179. path = cls._PATH_QUOTER(path)
  180. if netloc:
  181. path = cls._normalize_path(path)
  182. query_string = cls._QUERY_QUOTER(query_string)
  183. fragment = cls._FRAGMENT_QUOTER(fragment)
  184. url = cls(
  185. SplitResult(
  186. scheme,
  187. netloc,
  188. path,
  189. query_string,
  190. fragment
  191. ),
  192. encoded=True
  193. )
  194. if query:
  195. return url.with_query(query)
  196. else:
  197. return url
  198. def __init_subclass__(cls):
  199. raise TypeError("Inheritance a class {!r} from URL "
  200. "is forbidden".format(cls))
  201. def __str__(self):
  202. val = self._val
  203. if not val.path and self.is_absolute() and (val.query or val.fragment):
  204. val = val._replace(path='/')
  205. return urlunsplit(val)
  206. def __repr__(self):
  207. return "{}('{}')".format(self.__class__.__name__, str(self))
  208. def __eq__(self, other):
  209. if not type(other) is URL:
  210. return NotImplemented
  211. val1 = self._val
  212. if not val1.path and self.is_absolute():
  213. val1 = val1._replace(path='/')
  214. val2 = other._val
  215. if not val2.path and other.is_absolute():
  216. val2 = val2._replace(path='/')
  217. return val1 == val2
  218. def __hash__(self):
  219. ret = self._cache.get('hash')
  220. if ret is None:
  221. val = self._val
  222. if not val.path and self.is_absolute():
  223. val = val._replace(path='/')
  224. ret = self._cache['hash'] = hash(val)
  225. return ret
  226. def __le__(self, other):
  227. if not type(other) is URL:
  228. return NotImplemented
  229. return self._val <= other._val
  230. def __lt__(self, other):
  231. if not type(other) is URL:
  232. return NotImplemented
  233. return self._val < other._val
  234. def __ge__(self, other):
  235. if not type(other) is URL:
  236. return NotImplemented
  237. return self._val >= other._val
  238. def __gt__(self, other):
  239. if not type(other) is URL:
  240. return NotImplemented
  241. return self._val > other._val
  242. def __truediv__(self, name):
  243. name = self._PATH_QUOTER(name)
  244. if name.startswith('/'):
  245. raise ValueError("Appending path "
  246. "starting from slash is forbidden")
  247. path = self._val.path
  248. if path == '/':
  249. new_path = '/' + name
  250. elif not path and not self.is_absolute():
  251. new_path = name
  252. else:
  253. parts = path.rstrip('/').split('/')
  254. parts.append(name)
  255. new_path = '/'.join(parts)
  256. if self.is_absolute():
  257. new_path = self._normalize_path(new_path)
  258. return URL(self._val._replace(path=new_path, query='', fragment=''),
  259. encoded=True)
  260. def __getstate__(self):
  261. return self._val,
  262. def __setstate__(self, state):
  263. if state[0] is None and isinstance(state[1], dict):
  264. # default style pickle
  265. self._val = state[1]['_val']
  266. else:
  267. self._val, *unused = state
  268. self._cache = {}
  269. def is_absolute(self):
  270. """A check for absolute URLs.
  271. Return True for absolute ones (having scheme or starting
  272. with //), False otherwise.
  273. """
  274. return self.raw_host is not None
  275. def is_default_port(self):
  276. """A check for default port.
  277. Return True if port is default for specified scheme,
  278. e.g. 'http://python.org' or 'http://python.org:80', False
  279. otherwise.
  280. """
  281. if self.port is None:
  282. return False
  283. default = DEFAULT_PORTS.get(self.scheme)
  284. if default is None:
  285. return False
  286. return self.port == default
  287. def origin(self):
  288. """Return an URL with scheme, host and port parts only.
  289. user, password, path, query and fragment are removed.
  290. """
  291. # TODO: add a keyword-only option for keeping user/pass maybe?
  292. if not self.is_absolute():
  293. raise ValueError("URL should be absolute")
  294. if not self._val.scheme:
  295. raise ValueError("URL should have scheme")
  296. v = self._val
  297. netloc = self._make_netloc(None, None, v.hostname, v.port,
  298. encode=False)
  299. val = v._replace(netloc=netloc, path='', query='', fragment='')
  300. return URL(val, encoded=True)
  301. def relative(self):
  302. """Return a relative part of the URL.
  303. scheme, user, password, host and port are removed.
  304. """
  305. if not self.is_absolute():
  306. raise ValueError("URL should be absolute")
  307. val = self._val._replace(scheme='', netloc='')
  308. return URL(val, encoded=True)
  309. @property
  310. def scheme(self):
  311. """Scheme for absolute URLs.
  312. Empty string for relative URLs or URLs starting with //
  313. """
  314. return self._val.scheme
  315. @property
  316. def raw_user(self):
  317. """Encoded user part of URL.
  318. None if user is missing.
  319. """
  320. # not .username
  321. ret = self._val.username
  322. if not ret:
  323. return None
  324. return ret
  325. @cached_property
  326. def user(self):
  327. """Decoded user part of URL.
  328. None if user is missing.
  329. """
  330. return self._UNQUOTER(self.raw_user)
  331. @property
  332. def raw_password(self):
  333. """Encoded password part of URL.
  334. None if password is missing.
  335. """
  336. return self._val.password
  337. @cached_property
  338. def password(self):
  339. """Decoded password part of URL.
  340. None if password is missing.
  341. """
  342. return self._UNQUOTER(self.raw_password)
  343. @property
  344. def raw_host(self):
  345. """Encoded host part of URL.
  346. None for relative URLs.
  347. """
  348. # Use host instead of hostname for sake of shortness
  349. # May add .hostname prop later
  350. return self._val.hostname
  351. @cached_property
  352. def host(self):
  353. """Decoded host part of URL.
  354. None for relative URLs.
  355. """
  356. raw = self.raw_host
  357. if raw is None:
  358. return None
  359. if '%' in raw:
  360. # Hack for scoped IPv6 addresses like
  361. # fe80::2%Проверка
  362. # presence of '%' sign means only IPv6 address, so idna is useless.
  363. return raw
  364. try:
  365. return idna.decode(raw.encode('ascii'))
  366. except UnicodeError: # e.g. '::1'
  367. return raw.encode('ascii').decode('idna')
  368. @property
  369. def port(self):
  370. """Port part of URL.
  371. None for relative URLs or URLs without explicit port and
  372. scheme without default port substitution.
  373. """
  374. return self._val.port or DEFAULT_PORTS.get(self._val.scheme)
  375. @property
  376. def raw_path(self):
  377. """Encoded path of URL.
  378. / for absolute URLs without path part.
  379. """
  380. ret = self._val.path
  381. if not ret and self.is_absolute():
  382. ret = '/'
  383. return ret
  384. @cached_property
  385. def path(self):
  386. """Decoded path of URL.
  387. / for absolute URLs without path part.
  388. """
  389. return self._PATH_UNQUOTER(self.raw_path)
  390. @cached_property
  391. def query(self):
  392. """A MultiDictProxy representing parsed query parameters in decoded
  393. representation.
  394. Empty value if URL has no query part.
  395. """
  396. ret = MultiDict(parse_qsl(self.raw_query_string,
  397. keep_blank_values=True))
  398. return MultiDictProxy(ret)
  399. @property
  400. def raw_query_string(self):
  401. """Encoded query part of URL.
  402. Empty string if query is missing.
  403. """
  404. return self._val.query
  405. @cached_property
  406. def query_string(self):
  407. """Decoded query part of URL.
  408. Empty string if query is missing.
  409. """
  410. return self._QS_UNQUOTER(self.raw_query_string)
  411. @cached_property
  412. def path_qs(self):
  413. """Decoded path of URL with query."""
  414. if not self.query_string:
  415. return self.path
  416. return '{}?{}'.format(self.path, self.query_string)
  417. @cached_property
  418. def raw_path_qs(self):
  419. """Encoded path of URL with query."""
  420. if not self.raw_query_string:
  421. return self.raw_path
  422. return '{}?{}'.format(self.raw_path, self.raw_query_string)
  423. @property
  424. def raw_fragment(self):
  425. """Encoded fragment part of URL.
  426. Empty string if fragment is missing.
  427. """
  428. return self._val.fragment
  429. @cached_property
  430. def fragment(self):
  431. """Decoded fragment part of URL.
  432. Empty string if fragment is missing.
  433. """
  434. return self._UNQUOTER(self.raw_fragment)
  435. @cached_property
  436. def raw_parts(self):
  437. """A tuple containing encoded *path* parts.
  438. ('/',) for absolute URLs if *path* is missing.
  439. """
  440. path = self._val.path
  441. if self.is_absolute():
  442. if not path:
  443. parts = ['/']
  444. else:
  445. parts = ['/'] + path[1:].split('/')
  446. else:
  447. if path.startswith('/'):
  448. parts = ['/'] + path[1:].split('/')
  449. else:
  450. parts = path.split('/')
  451. return tuple(parts)
  452. @cached_property
  453. def parts(self):
  454. """A tuple containing decoded *path* parts.
  455. ('/',) for absolute URLs if *path* is missing.
  456. """
  457. return tuple(self._UNQUOTER(part) for part in self.raw_parts)
  458. @cached_property
  459. def parent(self):
  460. """A new URL with last part of path removed and cleaned up query and
  461. fragment.
  462. """
  463. path = self.raw_path
  464. if not path or path == '/':
  465. if self.raw_fragment or self.raw_query_string:
  466. return URL(self._val._replace(query='', fragment=''),
  467. encoded=True)
  468. return self
  469. parts = path.split('/')
  470. val = self._val._replace(path='/'.join(parts[:-1]),
  471. query='', fragment='')
  472. return URL(val, encoded=True)
  473. @cached_property
  474. def raw_name(self):
  475. """The last part of raw_parts."""
  476. parts = self.raw_parts
  477. if self.is_absolute():
  478. parts = parts[1:]
  479. if not parts:
  480. return ''
  481. else:
  482. return parts[-1]
  483. else:
  484. return parts[-1]
  485. @cached_property
  486. def name(self):
  487. """The last part of parts."""
  488. return self._UNQUOTER(self.raw_name)
  489. @classmethod
  490. def _normalize_path(cls, path):
  491. # Drop '.' and '..' from path
  492. segments = path.split('/')
  493. resolved_path = []
  494. for seg in segments:
  495. if seg == '..':
  496. try:
  497. resolved_path.pop()
  498. except IndexError:
  499. # ignore any .. segments that would otherwise cause an
  500. # IndexError when popped from resolved_path if
  501. # resolving for rfc3986
  502. pass
  503. elif seg == '.':
  504. continue
  505. else:
  506. resolved_path.append(seg)
  507. if segments[-1] in ('.', '..'):
  508. # do some post-processing here.
  509. # if the last segment was a relative dir,
  510. # then we need to append the trailing '/'
  511. resolved_path.append('')
  512. return '/'.join(resolved_path)
  513. @classmethod
  514. def _encode_host(cls, host):
  515. try:
  516. ip, sep, zone = host.partition('%')
  517. ip = ip_address(ip)
  518. except ValueError:
  519. try:
  520. host = idna.encode(host, uts46=True).decode('ascii')
  521. except UnicodeError:
  522. host = host.encode('idna').decode('ascii')
  523. else:
  524. host = ip.compressed
  525. if sep:
  526. host += '%' + zone
  527. if ip.version == 6:
  528. host = '[' + host + ']'
  529. return host
  530. @classmethod
  531. def _make_netloc(cls, user, password, host, port, encode):
  532. if encode:
  533. ret = cls._encode_host(host)
  534. else:
  535. ret = host
  536. if port:
  537. ret = ret + ':' + str(port)
  538. if password:
  539. if not user:
  540. user = ''
  541. else:
  542. if encode:
  543. user = cls._QUOTER(user)
  544. if encode:
  545. password = cls._QUOTER(password)
  546. user = user + ':' + password
  547. elif user and encode:
  548. user = cls._QUOTER(user)
  549. if user:
  550. ret = user + '@' + ret
  551. return ret
  552. def with_scheme(self, scheme):
  553. """Return a new URL with scheme replaced."""
  554. # N.B. doesn't cleanup query/fragment
  555. if not isinstance(scheme, str):
  556. raise TypeError("Invalid scheme type")
  557. if not self.is_absolute():
  558. raise ValueError("scheme replacement is not allowed "
  559. "for relative URLs")
  560. return URL(self._val._replace(scheme=scheme.lower()), encoded=True)
  561. def with_user(self, user):
  562. """Return a new URL with user replaced.
  563. Autoencode user if needed.
  564. Clear user/password if user is None.
  565. """
  566. # N.B. doesn't cleanup query/fragment
  567. val = self._val
  568. if user is None:
  569. password = None
  570. elif isinstance(user, str):
  571. user = self._QUOTER(user)
  572. password = val.password
  573. else:
  574. raise TypeError("Invalid user type")
  575. if not self.is_absolute():
  576. raise ValueError("user replacement is not allowed "
  577. "for relative URLs")
  578. return URL(self._val._replace(netloc=self._make_netloc(user,
  579. password,
  580. val.hostname,
  581. val.port,
  582. encode=False)),
  583. encoded=True)
  584. def with_password(self, password):
  585. """Return a new URL with password replaced.
  586. Autoencode password if needed.
  587. Clear password if argument is None.
  588. """
  589. # N.B. doesn't cleanup query/fragment
  590. if password is None:
  591. pass
  592. elif isinstance(password, str):
  593. password = self._QUOTER(password)
  594. else:
  595. raise TypeError("Invalid password type")
  596. if not self.is_absolute():
  597. raise ValueError("password replacement is not allowed "
  598. "for relative URLs")
  599. val = self._val
  600. return URL(
  601. self._val._replace(
  602. netloc=self._make_netloc(val.username,
  603. password,
  604. val.hostname,
  605. val.port,
  606. encode=False)),
  607. encoded=True)
  608. def with_host(self, host):
  609. """Return a new URL with host replaced.
  610. Autoencode host if needed.
  611. Changing host for relative URLs is not allowed, use .join()
  612. instead.
  613. """
  614. # N.B. doesn't cleanup query/fragment
  615. if not isinstance(host, str):
  616. raise TypeError("Invalid host type")
  617. if not self.is_absolute():
  618. raise ValueError("host replacement is not allowed "
  619. "for relative URLs")
  620. if not host:
  621. raise ValueError("host removing is not allowed")
  622. host = self._encode_host(host)
  623. val = self._val
  624. return URL(
  625. self._val._replace(netloc=self._make_netloc(val.username,
  626. val.password,
  627. host,
  628. val.port,
  629. encode=False)),
  630. encoded=True)
  631. def with_port(self, port):
  632. """Return a new URL with port replaced.
  633. Clear port to default if None is passed.
  634. """
  635. # N.B. doesn't cleanup query/fragment
  636. if port is not None and not isinstance(port, int):
  637. raise TypeError(
  638. "port should be int or None, got {}".format(type(port)))
  639. if not self.is_absolute():
  640. raise ValueError("port replacement is not allowed "
  641. "for relative URLs")
  642. val = self._val
  643. return URL(
  644. self._val._replace(netloc=self._make_netloc(val.username,
  645. val.password,
  646. val.hostname,
  647. port,
  648. encode=False)),
  649. encoded=True)
  650. def with_path(self, path, *, encoded=False):
  651. """Return a new URL with path replaced."""
  652. if not encoded:
  653. path = self._PATH_QUOTER(path)
  654. if self.is_absolute():
  655. path = self._normalize_path(path)
  656. if len(path) > 0 and path[0] != '/':
  657. path = '/' + path
  658. return URL(self._val._replace(path=path, query='', fragment=''),
  659. encoded=True)
  660. def _get_str_query(self, *args, **kwargs):
  661. if kwargs:
  662. if len(args) > 0:
  663. raise ValueError("Either kwargs or single query parameter "
  664. "must be present")
  665. query = kwargs
  666. elif len(args) == 1:
  667. query = args[0]
  668. else:
  669. raise ValueError("Either kwargs or single query parameter "
  670. "must be present")
  671. if query is None:
  672. query = ''
  673. elif isinstance(query, Mapping):
  674. quoter = self._QUERY_PART_QUOTER
  675. lst = []
  676. for k, v in query.items():
  677. if isinstance(v, str):
  678. pass
  679. elif type(v) == int: # no subclasses like bool
  680. v = str(v)
  681. else:
  682. raise TypeError("Invalid variable type: mapping value "
  683. "should be str or int, got {!r}".format(v))
  684. lst.append(
  685. quoter(k) + '=' + quoter(v))
  686. query = '&'.join(lst)
  687. elif isinstance(query, str):
  688. query = self._QUERY_QUOTER(query)
  689. elif isinstance(query, (bytes, bytearray, memoryview)):
  690. raise TypeError("Invalid query type: bytes, bytearray and "
  691. "memoryview are forbidden")
  692. elif isinstance(query, Sequence):
  693. quoter = self._QUERY_PART_QUOTER
  694. query = '&'.join(quoter(k) + '=' + quoter(v)
  695. for k, v in query)
  696. else:
  697. raise TypeError("Invalid query type: only str, mapping or "
  698. "sequence of (str, str) pairs is allowed")
  699. return query
  700. def with_query(self, *args, **kwargs):
  701. """Return a new URL with query part replaced.
  702. Accepts any Mapping (e.g. dict, multidict.MultiDict instances)
  703. or str, autoencode the argument if needed.
  704. It also can take an arbitrary number of keyword arguments.
  705. Clear query if None is passed.
  706. """
  707. # N.B. doesn't cleanup query/fragment
  708. new_query = self._get_str_query(*args, **kwargs)
  709. return URL(
  710. self._val._replace(path=self._val.path, query=new_query),
  711. encoded=True)
  712. def update_query(self, *args, **kwargs):
  713. """Return a new URL with query part updated."""
  714. s = self._get_str_query(*args, **kwargs)
  715. new_query = MultiDict(parse_qsl(s, keep_blank_values=True))
  716. query = MultiDict(self.query)
  717. query.update(new_query)
  718. return URL(self._val._replace(query=self._get_str_query(query)),
  719. encoded=True)
  720. def with_fragment(self, fragment):
  721. """Return a new URL with fragment replaced.
  722. Autoencode fragment if needed.
  723. Clear fragment to default if None is passed.
  724. """
  725. # N.B. doesn't cleanup query/fragment
  726. if fragment is None:
  727. fragment = ''
  728. elif not isinstance(fragment, str):
  729. raise TypeError("Invalid fragment type")
  730. return URL(
  731. self._val._replace(
  732. fragment=self._FRAGMENT_QUOTER(fragment)),
  733. encoded=True)
  734. def with_name(self, name):
  735. """Return a new URL with name (last part of path) replaced.
  736. Query and fragment parts are cleaned up.
  737. Name is encoded if needed.
  738. """
  739. # N.B. DOES cleanup query/fragment
  740. if not isinstance(name, str):
  741. raise TypeError("Invalid name type")
  742. if '/' in name:
  743. raise ValueError("Slash in name is not allowed")
  744. name = self._PATH_QUOTER(name)
  745. if name in ('.', '..'):
  746. raise ValueError(". and .. values are forbidden")
  747. parts = list(self.raw_parts)
  748. if self.is_absolute():
  749. if len(parts) == 1:
  750. parts.append(name)
  751. else:
  752. parts[-1] = name
  753. parts[0] = '' # replace leading '/'
  754. else:
  755. parts[-1] = name
  756. if parts[0] == '/':
  757. parts[0] = '' # replace leading '/'
  758. return URL(
  759. self._val._replace(path='/'.join(parts),
  760. query='', fragment=''), encoded=True)
  761. def join(self, url):
  762. """Join URLs
  763. Construct a full (absolute) URL by combining a base URL
  764. (self) with another URL (url).
  765. Informally, this uses components of the base URL, in
  766. particular the addressing scheme, the network location and
  767. (part of) the path, to provide missing components in the
  768. relative URL.
  769. """
  770. # See docs for urllib.parse.urljoin
  771. if not isinstance(url, URL):
  772. raise TypeError("url should be URL")
  773. return URL(urljoin(str(self), str(url)), encoded=True)
  774. def human_repr(self):
  775. """Return decoded human readable string for URL representation."""
  776. return urlunsplit(SplitResult(self.scheme,
  777. self._make_netloc(self.user,
  778. self.password,
  779. self.host,
  780. self._val.port,
  781. encode=False),
  782. self.path,
  783. self.query_string,
  784. self.fragment))