You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

853 lines
30 KiB

  1. "use strict";
  2. var __extends = (this && this.__extends) || (function () {
  3. var extendStatics = function (d, b) {
  4. extendStatics = Object.setPrototypeOf ||
  5. ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
  6. function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
  7. return extendStatics(d, b);
  8. };
  9. return function (d, b) {
  10. extendStatics(d, b);
  11. function __() { this.constructor = d; }
  12. d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
  13. };
  14. })();
  15. var __spreadArrays = (this && this.__spreadArrays) || function () {
  16. for (var s = 0, i = 0, il = arguments.length; i < il; i++) s += arguments[i].length;
  17. for (var r = Array(s), k = 0, i = 0; i < il; i++)
  18. for (var a = arguments[i], j = 0, jl = a.length; j < jl; j++, k++)
  19. r[k] = a[j];
  20. return r;
  21. };
  22. var __importDefault = (this && this.__importDefault) || function (mod) {
  23. return (mod && mod.__esModule) ? mod : { "default": mod };
  24. };
  25. Object.defineProperty(exports, "__esModule", { value: true });
  26. exports.parse = void 0;
  27. var he_1 = require("he");
  28. var node_1 = __importDefault(require("./node"));
  29. var type_1 = __importDefault(require("./type"));
  30. var text_1 = __importDefault(require("./text"));
  31. var matcher_1 = __importDefault(require("../matcher"));
  32. var back_1 = __importDefault(require("../back"));
  33. var comment_1 = __importDefault(require("./comment"));
  34. var kBlockElements = {
  35. DIV: true,
  36. P: true,
  37. // ul: true,
  38. // ol: true,
  39. LI: true,
  40. // table: true,
  41. // tr: true,
  42. TD: true,
  43. SECTION: true,
  44. BR: true
  45. };
  46. /**
  47. * HTMLElement, which contains a set of children.
  48. *
  49. * Note: this is a minimalist implementation, no complete tree
  50. * structure provided (no parentNode, nextSibling,
  51. * previousSibling etc).
  52. * @class HTMLElement
  53. * @extends {Node}
  54. */
  55. var HTMLElement = /** @class */ (function (_super) {
  56. __extends(HTMLElement, _super);
  57. /**
  58. * Creates an instance of HTMLElement.
  59. * @param keyAttrs id and class attribute
  60. * @param [rawAttrs] attributes in string
  61. *
  62. * @memberof HTMLElement
  63. */
  64. function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode) {
  65. if (rawAttrs === void 0) { rawAttrs = ''; }
  66. if (parentNode === void 0) { parentNode = null; }
  67. var _this = _super.call(this) || this;
  68. _this.rawAttrs = rawAttrs;
  69. _this.parentNode = parentNode;
  70. _this.classNames = [];
  71. /**
  72. * Node Type declaration.
  73. */
  74. _this.nodeType = type_1.default.ELEMENT_NODE;
  75. _this._tag_name = tagName;
  76. _this.rawAttrs = rawAttrs || '';
  77. _this.parentNode = parentNode || null;
  78. _this.childNodes = [];
  79. if (keyAttrs.id) {
  80. _this.id = keyAttrs.id;
  81. if (!rawAttrs) {
  82. _this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
  83. }
  84. }
  85. if (keyAttrs.class) {
  86. _this.classNames = keyAttrs.class.split(/\s+/);
  87. if (!rawAttrs) {
  88. var cls = "class=\"" + _this.classNames.join(' ') + "\"";
  89. if (_this.rawAttrs) {
  90. _this.rawAttrs += " " + cls;
  91. }
  92. else {
  93. _this.rawAttrs = cls;
  94. }
  95. }
  96. }
  97. return _this;
  98. }
  99. /**
  100. * Remove Child element from childNodes array
  101. * @param {HTMLElement} node node to remove
  102. */
  103. HTMLElement.prototype.removeChild = function (node) {
  104. this.childNodes = this.childNodes.filter(function (child) {
  105. return (child !== node);
  106. });
  107. };
  108. /**
  109. * Exchanges given child with new child
  110. * @param {HTMLElement} oldNode node to exchange
  111. * @param {HTMLElement} newNode new node
  112. */
  113. HTMLElement.prototype.exchangeChild = function (oldNode, newNode) {
  114. var idx = -1;
  115. for (var i = 0; i < this.childNodes.length; i++) {
  116. if (this.childNodes[i] === oldNode) {
  117. idx = i;
  118. break;
  119. }
  120. }
  121. this.childNodes[idx] = newNode;
  122. };
  123. Object.defineProperty(HTMLElement.prototype, "tagName", {
  124. get: function () {
  125. return this._tag_name ? this._tag_name.toUpperCase() : this._tag_name;
  126. },
  127. enumerable: false,
  128. configurable: true
  129. });
  130. Object.defineProperty(HTMLElement.prototype, "rawText", {
  131. /**
  132. * Get escpaed (as-it) text value of current node and its children.
  133. * @return {string} text content
  134. */
  135. get: function () {
  136. return this.childNodes.reduce(function (pre, cur) {
  137. return (pre += cur.rawText);
  138. }, '');
  139. },
  140. enumerable: false,
  141. configurable: true
  142. });
  143. Object.defineProperty(HTMLElement.prototype, "text", {
  144. /**
  145. * Get unescaped text value of current node and its children.
  146. * @return {string} text content
  147. */
  148. get: function () {
  149. return he_1.decode(this.rawText);
  150. },
  151. enumerable: false,
  152. configurable: true
  153. });
  154. Object.defineProperty(HTMLElement.prototype, "structuredText", {
  155. /**
  156. * Get structured Text (with '\n' etc.)
  157. * @return {string} structured text
  158. */
  159. get: function () {
  160. var currentBlock = [];
  161. var blocks = [currentBlock];
  162. function dfs(node) {
  163. if (node.nodeType === type_1.default.ELEMENT_NODE) {
  164. if (kBlockElements[node.tagName]) {
  165. if (currentBlock.length > 0) {
  166. blocks.push(currentBlock = []);
  167. }
  168. node.childNodes.forEach(dfs);
  169. if (currentBlock.length > 0) {
  170. blocks.push(currentBlock = []);
  171. }
  172. }
  173. else {
  174. node.childNodes.forEach(dfs);
  175. }
  176. }
  177. else if (node.nodeType === type_1.default.TEXT_NODE) {
  178. if (node.isWhitespace) {
  179. // Whitespace node, postponed output
  180. currentBlock.prependWhitespace = true;
  181. }
  182. else {
  183. var text = node.text;
  184. if (currentBlock.prependWhitespace) {
  185. text = " " + text;
  186. currentBlock.prependWhitespace = false;
  187. }
  188. currentBlock.push(text);
  189. }
  190. }
  191. }
  192. dfs(this);
  193. return blocks.map(function (block) {
  194. // Normalize each line's whitespace
  195. return block.join('').trim().replace(/\s{2,}/g, ' ');
  196. })
  197. .join('\n').replace(/\s+$/, ''); // trimRight;
  198. },
  199. enumerable: false,
  200. configurable: true
  201. });
  202. HTMLElement.prototype.toString = function () {
  203. var tag = this._tag_name;
  204. if (tag) {
  205. var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
  206. var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
  207. if (is_void) {
  208. return "<" + tag + attrs + ">";
  209. }
  210. return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
  211. }
  212. return this.innerHTML;
  213. };
  214. Object.defineProperty(HTMLElement.prototype, "innerHTML", {
  215. get: function () {
  216. return this.childNodes.map(function (child) {
  217. return child.toString();
  218. }).join('');
  219. },
  220. enumerable: false,
  221. configurable: true
  222. });
  223. HTMLElement.prototype.set_content = function (content, options) {
  224. if (options === void 0) { options = {}; }
  225. if (content instanceof node_1.default) {
  226. content = [content];
  227. }
  228. else if (typeof content == 'string') {
  229. var r = parse(content, options);
  230. content = r.childNodes.length ? r.childNodes : [new text_1.default(content)];
  231. }
  232. this.childNodes = content;
  233. };
  234. Object.defineProperty(HTMLElement.prototype, "outerHTML", {
  235. get: function () {
  236. return this.toString();
  237. },
  238. enumerable: false,
  239. configurable: true
  240. });
  241. /**
  242. * Trim element from right (in block) after seeing pattern in a TextNode.
  243. * @param {RegExp} pattern pattern to find
  244. * @return {HTMLElement} reference to current node
  245. */
  246. HTMLElement.prototype.trimRight = function (pattern) {
  247. for (var i = 0; i < this.childNodes.length; i++) {
  248. var childNode = this.childNodes[i];
  249. if (childNode.nodeType === type_1.default.ELEMENT_NODE) {
  250. childNode.trimRight(pattern);
  251. }
  252. else {
  253. var index = childNode.rawText.search(pattern);
  254. if (index > -1) {
  255. childNode.rawText = childNode.rawText.substr(0, index);
  256. // trim all following nodes.
  257. this.childNodes.length = i + 1;
  258. }
  259. }
  260. }
  261. return this;
  262. };
  263. Object.defineProperty(HTMLElement.prototype, "structure", {
  264. /**
  265. * Get DOM structure
  266. * @return {string} strucutre
  267. */
  268. get: function () {
  269. var res = [];
  270. var indention = 0;
  271. function write(str) {
  272. res.push(' '.repeat(indention) + str);
  273. }
  274. function dfs(node) {
  275. var idStr = node.id ? ("#" + node.id) : '';
  276. var classStr = node.classNames.length ? ("." + node.classNames.join('.')) : '';
  277. write(node._tag_name + idStr + classStr);
  278. indention++;
  279. node.childNodes.forEach(function (childNode) {
  280. if (childNode.nodeType === type_1.default.ELEMENT_NODE) {
  281. dfs(childNode);
  282. }
  283. else if (childNode.nodeType === type_1.default.TEXT_NODE) {
  284. if (!childNode.isWhitespace) {
  285. write('#text');
  286. }
  287. }
  288. });
  289. indention--;
  290. }
  291. dfs(this);
  292. return res.join('\n');
  293. },
  294. enumerable: false,
  295. configurable: true
  296. });
  297. /**
  298. * Remove whitespaces in this sub tree.
  299. * @return {HTMLElement} pointer to this
  300. */
  301. HTMLElement.prototype.removeWhitespace = function () {
  302. var _this = this;
  303. var o = 0;
  304. this.childNodes.forEach(function (node) {
  305. if (node.nodeType === type_1.default.TEXT_NODE) {
  306. if (node.isWhitespace) {
  307. return;
  308. }
  309. node.rawText = node.rawText.trim();
  310. }
  311. else if (node.nodeType === type_1.default.ELEMENT_NODE) {
  312. node.removeWhitespace();
  313. }
  314. _this.childNodes[o++] = node;
  315. });
  316. this.childNodes.length = o;
  317. return this;
  318. };
  319. /**
  320. * Query CSS selector to find matching nodes.
  321. * @param {string} selector Simplified CSS selector
  322. * @param {Matcher} selector A Matcher instance
  323. * @return {HTMLElement[]} matching elements
  324. */
  325. HTMLElement.prototype.querySelectorAll = function (selector) {
  326. var _this = this;
  327. var matcher;
  328. if (selector instanceof matcher_1.default) {
  329. matcher = selector;
  330. matcher.reset();
  331. }
  332. else {
  333. if (selector.includes(',')) {
  334. var selectors = selector.split(',');
  335. return Array.from(selectors.reduce(function (pre, cur) {
  336. var result = _this.querySelectorAll(cur.trim());
  337. return result.reduce(function (p, c) {
  338. return p.add(c);
  339. }, pre);
  340. }, new Set()));
  341. }
  342. matcher = new matcher_1.default(selector);
  343. }
  344. var stack = [];
  345. return this.childNodes.reduce(function (res, cur) {
  346. stack.push([cur, 0, false]);
  347. while (stack.length) {
  348. var state = back_1.default(stack); // get last element
  349. var el = state[0];
  350. if (state[1] === 0) {
  351. // Seen for first time.
  352. if (el.nodeType !== type_1.default.ELEMENT_NODE) {
  353. stack.pop();
  354. continue;
  355. }
  356. var html_el = el;
  357. state[2] = matcher.advance(html_el);
  358. if (state[2]) {
  359. if (matcher.matched) {
  360. res.push(html_el);
  361. res.push.apply(res, (html_el.querySelectorAll(selector)));
  362. // no need to go further.
  363. matcher.rewind();
  364. stack.pop();
  365. continue;
  366. }
  367. }
  368. }
  369. if (state[1] < el.childNodes.length) {
  370. stack.push([el.childNodes[state[1]++], 0, false]);
  371. }
  372. else {
  373. if (state[2]) {
  374. matcher.rewind();
  375. }
  376. stack.pop();
  377. }
  378. }
  379. return res;
  380. }, []);
  381. };
  382. /**
  383. * Query CSS Selector to find matching node.
  384. * @param {string} selector Simplified CSS selector
  385. * @param {Matcher} selector A Matcher instance
  386. * @return {HTMLElement} matching node
  387. */
  388. HTMLElement.prototype.querySelector = function (selector) {
  389. var matcher;
  390. if (selector instanceof matcher_1.default) {
  391. matcher = selector;
  392. matcher.reset();
  393. }
  394. else {
  395. matcher = new matcher_1.default(selector);
  396. }
  397. var stack = [];
  398. for (var _i = 0, _a = this.childNodes; _i < _a.length; _i++) {
  399. var node = _a[_i];
  400. stack.push([node, 0, false]);
  401. while (stack.length) {
  402. var state = back_1.default(stack);
  403. var el = state[0];
  404. if (state[1] === 0) {
  405. // Seen for first time.
  406. if (el.nodeType !== type_1.default.ELEMENT_NODE) {
  407. stack.pop();
  408. continue;
  409. }
  410. state[2] = matcher.advance(el);
  411. if (state[2]) {
  412. if (matcher.matched) {
  413. return el;
  414. }
  415. }
  416. }
  417. if (state[1] < el.childNodes.length) {
  418. stack.push([el.childNodes[state[1]++], 0, false]);
  419. }
  420. else {
  421. if (state[2]) {
  422. matcher.rewind();
  423. }
  424. stack.pop();
  425. }
  426. }
  427. }
  428. return null;
  429. };
  430. /**
  431. * Append a child node to childNodes
  432. * @param {Node} node node to append
  433. * @return {Node} node appended
  434. */
  435. HTMLElement.prototype.appendChild = function (node) {
  436. // node.parentNode = this;
  437. this.childNodes.push(node);
  438. if (node instanceof HTMLElement) {
  439. node.parentNode = this;
  440. }
  441. return node;
  442. };
  443. Object.defineProperty(HTMLElement.prototype, "firstChild", {
  444. /**
  445. * Get first child node
  446. * @return {Node} first child node
  447. */
  448. get: function () {
  449. return this.childNodes[0];
  450. },
  451. enumerable: false,
  452. configurable: true
  453. });
  454. Object.defineProperty(HTMLElement.prototype, "lastChild", {
  455. /**
  456. * Get last child node
  457. * @return {Node} last child node
  458. */
  459. get: function () {
  460. return back_1.default(this.childNodes);
  461. },
  462. enumerable: false,
  463. configurable: true
  464. });
  465. Object.defineProperty(HTMLElement.prototype, "attributes", {
  466. /**
  467. * Get attributes
  468. * @return {Object} parsed and unescaped attributes
  469. */
  470. get: function () {
  471. if (this._attrs) {
  472. return this._attrs;
  473. }
  474. this._attrs = {};
  475. var attrs = this.rawAttributes;
  476. for (var key in attrs) {
  477. var val = attrs[key] || '';
  478. this._attrs[key] = he_1.decode(val);
  479. }
  480. return this._attrs;
  481. },
  482. enumerable: false,
  483. configurable: true
  484. });
  485. Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
  486. /**
  487. * Get escaped (as-it) attributes
  488. * @return {Object} parsed attributes
  489. */
  490. get: function () {
  491. if (this._rawAttrs) {
  492. return this._rawAttrs;
  493. }
  494. var attrs = {};
  495. if (this.rawAttrs) {
  496. var re = /\b([a-z][a-z0-9-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
  497. var match = void 0;
  498. while ((match = re.exec(this.rawAttrs))) {
  499. attrs[match[1]] = match[2] || match[3] || match[4] || null;
  500. }
  501. }
  502. this._rawAttrs = attrs;
  503. return attrs;
  504. },
  505. enumerable: false,
  506. configurable: true
  507. });
  508. HTMLElement.prototype.removeAttribute = function (key) {
  509. var attrs = this.rawAttributes;
  510. delete attrs[key];
  511. // Update this.attribute
  512. if (this._attrs) {
  513. delete this._attrs[key];
  514. }
  515. // Update rawString
  516. this.rawAttrs = Object.keys(attrs).map(function (name) {
  517. var val = JSON.stringify(attrs[name]);
  518. if (val === undefined || val === 'null') {
  519. return name;
  520. }
  521. return name + "=" + val;
  522. }).join(' ');
  523. };
  524. HTMLElement.prototype.hasAttribute = function (key) {
  525. return key in this.attributes;
  526. };
  527. /**
  528. * Get an attribute
  529. * @return {string} value of the attribute
  530. */
  531. HTMLElement.prototype.getAttribute = function (key) {
  532. return this.attributes[key];
  533. };
  534. /**
  535. * Set an attribute value to the HTMLElement
  536. * @param {string} key The attribute name
  537. * @param {string} value The value to set, or null / undefined to remove an attribute
  538. */
  539. HTMLElement.prototype.setAttribute = function (key, value) {
  540. if (arguments.length < 2) {
  541. throw new Error('Failed to execute \'setAttribute\' on \'Element\'');
  542. }
  543. var attrs = this.rawAttributes;
  544. attrs[key] = String(value);
  545. if (this._attrs) {
  546. this._attrs[key] = he_1.decode(attrs[key]);
  547. }
  548. // Update rawString
  549. this.rawAttrs = Object.keys(attrs).map(function (name) {
  550. var val = JSON.stringify(attrs[name]);
  551. if (val === 'null' || val === '""') {
  552. return name;
  553. }
  554. return name + "=" + val;
  555. }).join(' ');
  556. };
  557. /**
  558. * Replace all the attributes of the HTMLElement by the provided attributes
  559. * @param {Attributes} attributes the new attribute set
  560. */
  561. HTMLElement.prototype.setAttributes = function (attributes) {
  562. // Invalidate current this.attributes
  563. if (this._attrs) {
  564. delete this._attrs;
  565. }
  566. // Invalidate current this.rawAttributes
  567. if (this._rawAttrs) {
  568. delete this._rawAttrs;
  569. }
  570. // Update rawString
  571. this.rawAttrs = Object.keys(attributes).map(function (name) {
  572. var val = attributes[name];
  573. if (val === 'null' || val === '""') {
  574. return name;
  575. }
  576. return name + "=" + JSON.stringify(String(val));
  577. }).join(' ');
  578. };
  579. HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
  580. var _a, _b, _c;
  581. var _this = this;
  582. if (arguments.length < 2) {
  583. throw new Error('2 arguments required');
  584. }
  585. var p = parse(html);
  586. if (where === 'afterend') {
  587. var idx = this.parentNode.childNodes.findIndex(function (child) {
  588. return child === _this;
  589. });
  590. (_a = this.parentNode.childNodes).splice.apply(_a, __spreadArrays([idx + 1, 0], p.childNodes));
  591. p.childNodes.forEach(function (n) {
  592. if (n instanceof HTMLElement) {
  593. n.parentNode = _this.parentNode;
  594. }
  595. });
  596. }
  597. else if (where === 'afterbegin') {
  598. (_b = this.childNodes).unshift.apply(_b, p.childNodes);
  599. }
  600. else if (where === 'beforeend') {
  601. p.childNodes.forEach(function (n) {
  602. _this.appendChild(n);
  603. });
  604. }
  605. else if (where === 'beforebegin') {
  606. var idx = this.parentNode.childNodes.findIndex(function (child) {
  607. return child === _this;
  608. });
  609. (_c = this.parentNode.childNodes).splice.apply(_c, __spreadArrays([idx, 0], p.childNodes));
  610. p.childNodes.forEach(function (n) {
  611. if (n instanceof HTMLElement) {
  612. n.parentNode = _this.parentNode;
  613. }
  614. });
  615. }
  616. else {
  617. throw new Error("The value provided ('" + where + "') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'");
  618. }
  619. // if (!where || html === undefined || html === null) {
  620. // return;
  621. // }
  622. };
  623. return HTMLElement;
  624. }(node_1.default));
  625. exports.default = HTMLElement;
  626. // https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
  627. var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
  628. var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]+)"|'([^']+)'|(\S+))/ig;
  629. var kSelfClosingElements = {
  630. area: true,
  631. AREA: true,
  632. base: true,
  633. BASE: true,
  634. br: true,
  635. BR: true,
  636. col: true,
  637. COL: true,
  638. hr: true,
  639. HR: true,
  640. img: true,
  641. IMG: true,
  642. input: true,
  643. INPUT: true,
  644. link: true,
  645. LINK: true,
  646. meta: true,
  647. META: true,
  648. source: true,
  649. SOURCE: true
  650. };
  651. var kElementsClosedByOpening = {
  652. li: { li: true, LI: true },
  653. LI: { li: true, LI: true },
  654. p: { p: true, div: true, P: true, DIV: true },
  655. P: { p: true, div: true, P: true, DIV: true },
  656. b: { div: true, DIV: true },
  657. B: { div: true, DIV: true },
  658. td: { td: true, th: true, TD: true, TH: true },
  659. TD: { td: true, th: true, TD: true, TH: true },
  660. th: { td: true, th: true, TD: true, TH: true },
  661. TH: { td: true, th: true, TD: true, TH: true },
  662. h1: { h1: true, H1: true },
  663. H1: { h1: true, H1: true },
  664. h2: { h2: true, H2: true },
  665. H2: { h2: true, H2: true },
  666. h3: { h3: true, H3: true },
  667. H3: { h3: true, H3: true },
  668. h4: { h4: true, H4: true },
  669. H4: { h4: true, H4: true },
  670. h5: { h5: true, H5: true },
  671. H5: { h5: true, H5: true },
  672. h6: { h6: true, H6: true },
  673. H6: { h6: true, H6: true }
  674. };
  675. var kElementsClosedByClosing = {
  676. li: { ul: true, ol: true, UL: true, OL: true },
  677. LI: { ul: true, ol: true, UL: true, OL: true },
  678. a: { div: true, DIV: true },
  679. A: { div: true, DIV: true },
  680. b: { div: true, DIV: true },
  681. B: { div: true, DIV: true },
  682. i: { div: true, DIV: true },
  683. I: { div: true, DIV: true },
  684. p: { div: true, DIV: true },
  685. P: { div: true, DIV: true },
  686. td: { tr: true, table: true, TR: true, TABLE: true },
  687. TD: { tr: true, table: true, TR: true, TABLE: true },
  688. th: { tr: true, table: true, TR: true, TABLE: true },
  689. TH: { tr: true, table: true, TR: true, TABLE: true }
  690. };
  691. var kBlockTextElements = {
  692. script: true,
  693. SCRIPT: true,
  694. noscript: true,
  695. NOSCRIPT: true,
  696. style: true,
  697. STYLE: true,
  698. pre: true,
  699. PRE: true
  700. };
  701. var frameflag = 'documentfragmentcontainer';
  702. function parse(data, options) {
  703. if (options === void 0) { options = {}; }
  704. var root = new HTMLElement(null, {});
  705. var currentParent = root;
  706. var stack = [root];
  707. var lastTextPos = -1;
  708. var match;
  709. // https://github.com/taoqf/node-html-parser/issues/38
  710. data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
  711. var _loop_1 = function () {
  712. if (lastTextPos > -1) {
  713. if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) {
  714. // if has content
  715. var text = data.substring(lastTextPos, kMarkupPattern.lastIndex - match[0].length);
  716. currentParent.appendChild(new text_1.default(text));
  717. }
  718. }
  719. lastTextPos = kMarkupPattern.lastIndex;
  720. if (match[2] === frameflag) {
  721. return "continue";
  722. }
  723. if (match[0][1] === '!') {
  724. // this is a comment
  725. if (options.comment) {
  726. // Only keep what is in between <!-- and -->
  727. var text = data.substring(lastTextPos - 3, lastTextPos - match[0].length + 4);
  728. currentParent.appendChild(new comment_1.default(text));
  729. }
  730. return "continue";
  731. }
  732. if (options.lowerCaseTagName) {
  733. match[2] = match[2].toLowerCase();
  734. }
  735. if (!match[1]) {
  736. // not </ tags
  737. var attrs = {};
  738. for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) {
  739. attrs[attMatch[2]] = attMatch[4] || attMatch[5] || attMatch[6];
  740. }
  741. var tagName = currentParent.tagName;
  742. if (!match[4] && kElementsClosedByOpening[tagName]) {
  743. if (kElementsClosedByOpening[tagName][match[2]]) {
  744. stack.pop();
  745. currentParent = back_1.default(stack);
  746. }
  747. }
  748. // ignore container tag we add above
  749. // https://github.com/taoqf/node-html-parser/issues/38
  750. currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
  751. stack.push(currentParent);
  752. if (kBlockTextElements[match[2]]) {
  753. // a little test to find next </script> or </style> ...
  754. var closeMarkup_1 = "</" + match[2] + ">";
  755. var index = (function () {
  756. if (options.lowerCaseTagName) {
  757. return data.toLocaleLowerCase().indexOf(closeMarkup_1, kMarkupPattern.lastIndex);
  758. }
  759. return data.indexOf(closeMarkup_1, kMarkupPattern.lastIndex);
  760. })();
  761. if (options[match[2]]) {
  762. var text = void 0;
  763. if (index === -1) {
  764. // there is no matching ending for the text element.
  765. text = data.substr(kMarkupPattern.lastIndex);
  766. }
  767. else {
  768. text = data.substring(kMarkupPattern.lastIndex, index);
  769. }
  770. if (text.length > 0) {
  771. currentParent.appendChild(new text_1.default(text));
  772. }
  773. }
  774. if (index === -1) {
  775. lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
  776. }
  777. else {
  778. lastTextPos = kMarkupPattern.lastIndex = index + closeMarkup_1.length;
  779. match[1] = 'true';
  780. }
  781. }
  782. }
  783. if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
  784. // </ or /> or <br> etc.
  785. while (true) {
  786. if (currentParent.tagName === match[2].toUpperCase()) {
  787. stack.pop();
  788. currentParent = back_1.default(stack);
  789. break;
  790. }
  791. else {
  792. var tagName = currentParent.tagName;
  793. // Trying to close current tag, and move on
  794. if (kElementsClosedByClosing[tagName]) {
  795. if (kElementsClosedByClosing[tagName][match[2]]) {
  796. stack.pop();
  797. currentParent = back_1.default(stack);
  798. continue;
  799. }
  800. }
  801. // Use aggressive strategy to handle unmatching markups.
  802. break;
  803. }
  804. }
  805. }
  806. };
  807. while ((match = kMarkupPattern.exec(data))) {
  808. _loop_1();
  809. }
  810. var valid = Boolean(stack.length === 1);
  811. if (!options.noFix) {
  812. var response_1 = root;
  813. response_1.valid = valid;
  814. var _loop_2 = function () {
  815. // Handle each error elements.
  816. var last = stack.pop();
  817. var oneBefore = back_1.default(stack);
  818. if (last.parentNode && last.parentNode.parentNode) {
  819. if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
  820. // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
  821. oneBefore.removeChild(last);
  822. last.childNodes.forEach(function (child) {
  823. oneBefore.parentNode.appendChild(child);
  824. });
  825. stack.pop();
  826. }
  827. else {
  828. // Single error <div> <h3> </div> handle: Just removes <h3>
  829. oneBefore.removeChild(last);
  830. last.childNodes.forEach(function (child) {
  831. oneBefore.appendChild(child);
  832. });
  833. }
  834. }
  835. else {
  836. // If it's final element just skip.
  837. }
  838. };
  839. while (stack.length > 1) {
  840. _loop_2();
  841. }
  842. response_1.childNodes.forEach(function (node) {
  843. if (node instanceof HTMLElement) {
  844. node.parentNode = null;
  845. }
  846. });
  847. return response_1;
  848. }
  849. var response = new text_1.default(data);
  850. response.valid = valid;
  851. return response;
  852. }
  853. exports.parse = parse;