You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

479 lines
20 KiB

  1. /**
  2. * URI.js
  3. *
  4. * @fileoverview An RFC 3986 compliant, scheme extendable URI parsing/validating/resolving library for JavaScript.
  5. * @author <a href="mailto:gary.court@gmail.com">Gary Court</a>
  6. * @see http://github.com/garycourt/uri-js
  7. */
  8. /**
  9. * Copyright 2011 Gary Court. All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without modification, are
  12. * permitted provided that the following conditions are met:
  13. *
  14. * 1. Redistributions of source code must retain the above copyright notice, this list of
  15. * conditions and the following disclaimer.
  16. *
  17. * 2. Redistributions in binary form must reproduce the above copyright notice, this list
  18. * of conditions and the following disclaimer in the documentation and/or other materials
  19. * provided with the distribution.
  20. *
  21. * THIS SOFTWARE IS PROVIDED BY GARY COURT ``AS IS'' AND ANY EXPRESS OR IMPLIED
  22. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  23. * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARY COURT OR
  24. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  26. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  27. * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  28. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  29. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. *
  31. * The views and conclusions contained in the software and documentation are those of the
  32. * authors and should not be interpreted as representing official policies, either expressed
  33. * or implied, of Gary Court.
  34. */
  35. import URI_PROTOCOL from "./regexps-uri";
  36. import IRI_PROTOCOL from "./regexps-iri";
  37. import punycode from "punycode";
  38. import { toUpperCase, typeOf, assign } from "./util";
  39. export const SCHEMES = {};
  40. export function pctEncChar(chr) {
  41. const c = chr.charCodeAt(0);
  42. let e;
  43. if (c < 16)
  44. e = "%0" + c.toString(16).toUpperCase();
  45. else if (c < 128)
  46. e = "%" + c.toString(16).toUpperCase();
  47. else if (c < 2048)
  48. e = "%" + ((c >> 6) | 192).toString(16).toUpperCase() + "%" + ((c & 63) | 128).toString(16).toUpperCase();
  49. else
  50. e = "%" + ((c >> 12) | 224).toString(16).toUpperCase() + "%" + (((c >> 6) & 63) | 128).toString(16).toUpperCase() + "%" + ((c & 63) | 128).toString(16).toUpperCase();
  51. return e;
  52. }
  53. export function pctDecChars(str) {
  54. let newStr = "";
  55. let i = 0;
  56. const il = str.length;
  57. while (i < il) {
  58. const c = parseInt(str.substr(i + 1, 2), 16);
  59. if (c < 128) {
  60. newStr += String.fromCharCode(c);
  61. i += 3;
  62. }
  63. else if (c >= 194 && c < 224) {
  64. if ((il - i) >= 6) {
  65. const c2 = parseInt(str.substr(i + 4, 2), 16);
  66. newStr += String.fromCharCode(((c & 31) << 6) | (c2 & 63));
  67. }
  68. else {
  69. newStr += str.substr(i, 6);
  70. }
  71. i += 6;
  72. }
  73. else if (c >= 224) {
  74. if ((il - i) >= 9) {
  75. const c2 = parseInt(str.substr(i + 4, 2), 16);
  76. const c3 = parseInt(str.substr(i + 7, 2), 16);
  77. newStr += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
  78. }
  79. else {
  80. newStr += str.substr(i, 9);
  81. }
  82. i += 9;
  83. }
  84. else {
  85. newStr += str.substr(i, 3);
  86. i += 3;
  87. }
  88. }
  89. return newStr;
  90. }
  91. function _normalizeComponentEncoding(components, protocol) {
  92. function decodeUnreserved(str) {
  93. const decStr = pctDecChars(str);
  94. return (!decStr.match(protocol.UNRESERVED) ? str : decStr);
  95. }
  96. if (components.scheme)
  97. components.scheme = String(components.scheme).replace(protocol.PCT_ENCODED, decodeUnreserved).toLowerCase().replace(protocol.NOT_SCHEME, "");
  98. if (components.userinfo !== undefined)
  99. components.userinfo = String(components.userinfo).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_USERINFO, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
  100. if (components.host !== undefined)
  101. components.host = String(components.host).replace(protocol.PCT_ENCODED, decodeUnreserved).toLowerCase().replace(protocol.NOT_HOST, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
  102. if (components.path !== undefined)
  103. components.path = String(components.path).replace(protocol.PCT_ENCODED, decodeUnreserved).replace((components.scheme ? protocol.NOT_PATH : protocol.NOT_PATH_NOSCHEME), pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
  104. if (components.query !== undefined)
  105. components.query = String(components.query).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_QUERY, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
  106. if (components.fragment !== undefined)
  107. components.fragment = String(components.fragment).replace(protocol.PCT_ENCODED, decodeUnreserved).replace(protocol.NOT_FRAGMENT, pctEncChar).replace(protocol.PCT_ENCODED, toUpperCase);
  108. return components;
  109. }
  110. ;
  111. function _stripLeadingZeros(str) {
  112. return str.replace(/^0*(.*)/, "$1") || "0";
  113. }
  114. function _normalizeIPv4(host, protocol) {
  115. const matches = host.match(protocol.IPV4ADDRESS) || [];
  116. const [, address] = matches;
  117. if (address) {
  118. return address.split(".").map(_stripLeadingZeros).join(".");
  119. }
  120. else {
  121. return host;
  122. }
  123. }
  124. function _normalizeIPv6(host, protocol) {
  125. const matches = host.match(protocol.IPV6ADDRESS) || [];
  126. const [, address, zone] = matches;
  127. if (address) {
  128. const [last, first] = address.toLowerCase().split('::').reverse();
  129. const firstFields = first ? first.split(":").map(_stripLeadingZeros) : [];
  130. const lastFields = last.split(":").map(_stripLeadingZeros);
  131. const isLastFieldIPv4Address = protocol.IPV4ADDRESS.test(lastFields[lastFields.length - 1]);
  132. const fieldCount = isLastFieldIPv4Address ? 7 : 8;
  133. const lastFieldsStart = lastFields.length - fieldCount;
  134. const fields = Array(fieldCount);
  135. for (let x = 0; x < fieldCount; ++x) {
  136. fields[x] = firstFields[x] || lastFields[lastFieldsStart + x] || '';
  137. }
  138. if (isLastFieldIPv4Address) {
  139. fields[fieldCount - 1] = _normalizeIPv4(fields[fieldCount - 1], protocol);
  140. }
  141. const allZeroFields = fields.reduce((acc, field, index) => {
  142. if (!field || field === "0") {
  143. const lastLongest = acc[acc.length - 1];
  144. if (lastLongest && lastLongest.index + lastLongest.length === index) {
  145. lastLongest.length++;
  146. }
  147. else {
  148. acc.push({ index, length: 1 });
  149. }
  150. }
  151. return acc;
  152. }, []);
  153. const longestZeroFields = allZeroFields.sort((a, b) => b.length - a.length)[0];
  154. let newHost;
  155. if (longestZeroFields && longestZeroFields.length > 1) {
  156. const newFirst = fields.slice(0, longestZeroFields.index);
  157. const newLast = fields.slice(longestZeroFields.index + longestZeroFields.length);
  158. newHost = newFirst.join(":") + "::" + newLast.join(":");
  159. }
  160. else {
  161. newHost = fields.join(":");
  162. }
  163. if (zone) {
  164. newHost += "%" + zone;
  165. }
  166. return newHost;
  167. }
  168. else {
  169. return host;
  170. }
  171. }
  172. const URI_PARSE = /^(?:([^:\/?#]+):)?(?:\/\/((?:([^\/?#@]*)@)?(\[[^\/?#\]]+\]|[^\/?#:]*)(?:\:(\d*))?))?([^?#]*)(?:\?([^#]*))?(?:#((?:.|\n|\r)*))?/i;
  173. const NO_MATCH_IS_UNDEFINED = ("").match(/(){0}/)[1] === undefined;
  174. export function parse(uriString, options = {}) {
  175. const components = {};
  176. const protocol = (options.iri !== false ? IRI_PROTOCOL : URI_PROTOCOL);
  177. if (options.reference === "suffix")
  178. uriString = (options.scheme ? options.scheme + ":" : "") + "//" + uriString;
  179. const matches = uriString.match(URI_PARSE);
  180. if (matches) {
  181. if (NO_MATCH_IS_UNDEFINED) {
  182. //store each component
  183. components.scheme = matches[1];
  184. components.userinfo = matches[3];
  185. components.host = matches[4];
  186. components.port = parseInt(matches[5], 10);
  187. components.path = matches[6] || "";
  188. components.query = matches[7];
  189. components.fragment = matches[8];
  190. //fix port number
  191. if (isNaN(components.port)) {
  192. components.port = matches[5];
  193. }
  194. }
  195. else { //IE FIX for improper RegExp matching
  196. //store each component
  197. components.scheme = matches[1] || undefined;
  198. components.userinfo = (uriString.indexOf("@") !== -1 ? matches[3] : undefined);
  199. components.host = (uriString.indexOf("//") !== -1 ? matches[4] : undefined);
  200. components.port = parseInt(matches[5], 10);
  201. components.path = matches[6] || "";
  202. components.query = (uriString.indexOf("?") !== -1 ? matches[7] : undefined);
  203. components.fragment = (uriString.indexOf("#") !== -1 ? matches[8] : undefined);
  204. //fix port number
  205. if (isNaN(components.port)) {
  206. components.port = (uriString.match(/\/\/(?:.|\n)*\:(?:\/|\?|\#|$)/) ? matches[4] : undefined);
  207. }
  208. }
  209. if (components.host) {
  210. //normalize IP hosts
  211. components.host = _normalizeIPv6(_normalizeIPv4(components.host, protocol), protocol);
  212. }
  213. //determine reference type
  214. if (components.scheme === undefined && components.userinfo === undefined && components.host === undefined && components.port === undefined && !components.path && components.query === undefined) {
  215. components.reference = "same-document";
  216. }
  217. else if (components.scheme === undefined) {
  218. components.reference = "relative";
  219. }
  220. else if (components.fragment === undefined) {
  221. components.reference = "absolute";
  222. }
  223. else {
  224. components.reference = "uri";
  225. }
  226. //check for reference errors
  227. if (options.reference && options.reference !== "suffix" && options.reference !== components.reference) {
  228. components.error = components.error || "URI is not a " + options.reference + " reference.";
  229. }
  230. //find scheme handler
  231. const schemeHandler = SCHEMES[(options.scheme || components.scheme || "").toLowerCase()];
  232. //check if scheme can't handle IRIs
  233. if (!options.unicodeSupport && (!schemeHandler || !schemeHandler.unicodeSupport)) {
  234. //if host component is a domain name
  235. if (components.host && (options.domainHost || (schemeHandler && schemeHandler.domainHost))) {
  236. //convert Unicode IDN -> ASCII IDN
  237. try {
  238. components.host = punycode.toASCII(components.host.replace(protocol.PCT_ENCODED, pctDecChars).toLowerCase());
  239. }
  240. catch (e) {
  241. components.error = components.error || "Host's domain name can not be converted to ASCII via punycode: " + e;
  242. }
  243. }
  244. //convert IRI -> URI
  245. _normalizeComponentEncoding(components, URI_PROTOCOL);
  246. }
  247. else {
  248. //normalize encodings
  249. _normalizeComponentEncoding(components, protocol);
  250. }
  251. //perform scheme specific parsing
  252. if (schemeHandler && schemeHandler.parse) {
  253. schemeHandler.parse(components, options);
  254. }
  255. }
  256. else {
  257. components.error = components.error || "URI can not be parsed.";
  258. }
  259. return components;
  260. }
  261. ;
  262. function _recomposeAuthority(components, options) {
  263. const protocol = (options.iri !== false ? IRI_PROTOCOL : URI_PROTOCOL);
  264. const uriTokens = [];
  265. if (components.userinfo !== undefined) {
  266. uriTokens.push(components.userinfo);
  267. uriTokens.push("@");
  268. }
  269. if (components.host !== undefined) {
  270. //normalize IP hosts, add brackets and escape zone separator for IPv6
  271. uriTokens.push(_normalizeIPv6(_normalizeIPv4(String(components.host), protocol), protocol).replace(protocol.IPV6ADDRESS, (_, $1, $2) => "[" + $1 + ($2 ? "%25" + $2 : "") + "]"));
  272. }
  273. if (typeof components.port === "number" || typeof components.port === "string") {
  274. uriTokens.push(":");
  275. uriTokens.push(String(components.port));
  276. }
  277. return uriTokens.length ? uriTokens.join("") : undefined;
  278. }
  279. ;
  280. const RDS1 = /^\.\.?\//;
  281. const RDS2 = /^\/\.(\/|$)/;
  282. const RDS3 = /^\/\.\.(\/|$)/;
  283. const RDS4 = /^\.\.?$/;
  284. const RDS5 = /^\/?(?:.|\n)*?(?=\/|$)/;
  285. export function removeDotSegments(input) {
  286. const output = [];
  287. while (input.length) {
  288. if (input.match(RDS1)) {
  289. input = input.replace(RDS1, "");
  290. }
  291. else if (input.match(RDS2)) {
  292. input = input.replace(RDS2, "/");
  293. }
  294. else if (input.match(RDS3)) {
  295. input = input.replace(RDS3, "/");
  296. output.pop();
  297. }
  298. else if (input === "." || input === "..") {
  299. input = "";
  300. }
  301. else {
  302. const im = input.match(RDS5);
  303. if (im) {
  304. const s = im[0];
  305. input = input.slice(s.length);
  306. output.push(s);
  307. }
  308. else {
  309. throw new Error("Unexpected dot segment condition");
  310. }
  311. }
  312. }
  313. return output.join("");
  314. }
  315. ;
  316. export function serialize(components, options = {}) {
  317. const protocol = (options.iri ? IRI_PROTOCOL : URI_PROTOCOL);
  318. const uriTokens = [];
  319. //find scheme handler
  320. const schemeHandler = SCHEMES[(options.scheme || components.scheme || "").toLowerCase()];
  321. //perform scheme specific serialization
  322. if (schemeHandler && schemeHandler.serialize)
  323. schemeHandler.serialize(components, options);
  324. if (components.host) {
  325. //if host component is an IPv6 address
  326. if (protocol.IPV6ADDRESS.test(components.host)) {
  327. //TODO: normalize IPv6 address as per RFC 5952
  328. }
  329. //if host component is a domain name
  330. else if (options.domainHost || (schemeHandler && schemeHandler.domainHost)) {
  331. //convert IDN via punycode
  332. try {
  333. components.host = (!options.iri ? punycode.toASCII(components.host.replace(protocol.PCT_ENCODED, pctDecChars).toLowerCase()) : punycode.toUnicode(components.host));
  334. }
  335. catch (e) {
  336. components.error = components.error || "Host's domain name can not be converted to " + (!options.iri ? "ASCII" : "Unicode") + " via punycode: " + e;
  337. }
  338. }
  339. }
  340. //normalize encoding
  341. _normalizeComponentEncoding(components, protocol);
  342. if (options.reference !== "suffix" && components.scheme) {
  343. uriTokens.push(components.scheme);
  344. uriTokens.push(":");
  345. }
  346. const authority = _recomposeAuthority(components, options);
  347. if (authority !== undefined) {
  348. if (options.reference !== "suffix") {
  349. uriTokens.push("//");
  350. }
  351. uriTokens.push(authority);
  352. if (components.path && components.path.charAt(0) !== "/") {
  353. uriTokens.push("/");
  354. }
  355. }
  356. if (components.path !== undefined) {
  357. let s = components.path;
  358. if (!options.absolutePath && (!schemeHandler || !schemeHandler.absolutePath)) {
  359. s = removeDotSegments(s);
  360. }
  361. if (authority === undefined) {
  362. s = s.replace(/^\/\//, "/%2F"); //don't allow the path to start with "//"
  363. }
  364. uriTokens.push(s);
  365. }
  366. if (components.query !== undefined) {
  367. uriTokens.push("?");
  368. uriTokens.push(components.query);
  369. }
  370. if (components.fragment !== undefined) {
  371. uriTokens.push("#");
  372. uriTokens.push(components.fragment);
  373. }
  374. return uriTokens.join(""); //merge tokens into a string
  375. }
  376. ;
  377. export function resolveComponents(base, relative, options = {}, skipNormalization) {
  378. const target = {};
  379. if (!skipNormalization) {
  380. base = parse(serialize(base, options), options); //normalize base components
  381. relative = parse(serialize(relative, options), options); //normalize relative components
  382. }
  383. options = options || {};
  384. if (!options.tolerant && relative.scheme) {
  385. target.scheme = relative.scheme;
  386. //target.authority = relative.authority;
  387. target.userinfo = relative.userinfo;
  388. target.host = relative.host;
  389. target.port = relative.port;
  390. target.path = removeDotSegments(relative.path || "");
  391. target.query = relative.query;
  392. }
  393. else {
  394. if (relative.userinfo !== undefined || relative.host !== undefined || relative.port !== undefined) {
  395. //target.authority = relative.authority;
  396. target.userinfo = relative.userinfo;
  397. target.host = relative.host;
  398. target.port = relative.port;
  399. target.path = removeDotSegments(relative.path || "");
  400. target.query = relative.query;
  401. }
  402. else {
  403. if (!relative.path) {
  404. target.path = base.path;
  405. if (relative.query !== undefined) {
  406. target.query = relative.query;
  407. }
  408. else {
  409. target.query = base.query;
  410. }
  411. }
  412. else {
  413. if (relative.path.charAt(0) === "/") {
  414. target.path = removeDotSegments(relative.path);
  415. }
  416. else {
  417. if ((base.userinfo !== undefined || base.host !== undefined || base.port !== undefined) && !base.path) {
  418. target.path = "/" + relative.path;
  419. }
  420. else if (!base.path) {
  421. target.path = relative.path;
  422. }
  423. else {
  424. target.path = base.path.slice(0, base.path.lastIndexOf("/") + 1) + relative.path;
  425. }
  426. target.path = removeDotSegments(target.path);
  427. }
  428. target.query = relative.query;
  429. }
  430. //target.authority = base.authority;
  431. target.userinfo = base.userinfo;
  432. target.host = base.host;
  433. target.port = base.port;
  434. }
  435. target.scheme = base.scheme;
  436. }
  437. target.fragment = relative.fragment;
  438. return target;
  439. }
  440. ;
  441. export function resolve(baseURI, relativeURI, options) {
  442. const schemelessOptions = assign({ scheme: 'null' }, options);
  443. return serialize(resolveComponents(parse(baseURI, schemelessOptions), parse(relativeURI, schemelessOptions), schemelessOptions, true), schemelessOptions);
  444. }
  445. ;
  446. export function normalize(uri, options) {
  447. if (typeof uri === "string") {
  448. uri = serialize(parse(uri, options), options);
  449. }
  450. else if (typeOf(uri) === "object") {
  451. uri = parse(serialize(uri, options), options);
  452. }
  453. return uri;
  454. }
  455. ;
  456. export function equal(uriA, uriB, options) {
  457. if (typeof uriA === "string") {
  458. uriA = serialize(parse(uriA, options), options);
  459. }
  460. else if (typeOf(uriA) === "object") {
  461. uriA = serialize(uriA, options);
  462. }
  463. if (typeof uriB === "string") {
  464. uriB = serialize(parse(uriB, options), options);
  465. }
  466. else if (typeOf(uriB) === "object") {
  467. uriB = serialize(uriB, options);
  468. }
  469. return uriA === uriB;
  470. }
  471. ;
  472. export function escapeComponent(str, options) {
  473. return str && str.toString().replace((!options || !options.iri ? URI_PROTOCOL.ESCAPE : IRI_PROTOCOL.ESCAPE), pctEncChar);
  474. }
  475. ;
  476. export function unescapeComponent(str, options) {
  477. return str && str.toString().replace((!options || !options.iri ? URI_PROTOCOL.PCT_ENCODED : IRI_PROTOCOL.PCT_ENCODED), pctDecChars);
  478. }
  479. ;
  480. //# sourceMappingURL=uri.js.map