-
-
Notifications
You must be signed in to change notification settings - Fork 116
Replace jquery parseHTML with native alternative #474
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
f6bbe70
396aa3a
e5a580f
f8cb08b
96d5063
df65dc0
bec42cc
4f5dda5
287f9ce
447fe73
9731d20
7a1629f
822feba
8e111fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -7,34 +7,136 @@ const $jq = (typeof jQuery !== 'undefined' ? jQuery : | |||||||||||
| if (! $jq) | ||||||||||||
| throw new Error("jQuery not found"); | ||||||||||||
|
|
||||||||||||
| DOMBackend._$jq = $jq; | ||||||||||||
| import sanitizeHtml from 'sanitize-html'; | ||||||||||||
|
|
||||||||||||
| DOMBackend._$jq = $jq; | ||||||||||||
|
|
||||||||||||
| DOMBackend.getContext = function() { | ||||||||||||
| if (DOMBackend._context) { | ||||||||||||
| return DOMBackend._context; | ||||||||||||
| } | ||||||||||||
| if ( DOMBackend._$jq.support.createHTMLDocument ) { | ||||||||||||
| DOMBackend._context = document.implementation.createHTMLDocument( "" ); | ||||||||||||
|
|
||||||||||||
| // Check if createHTMLDocument is supported directly | ||||||||||||
| if (document.implementation && document.implementation.createHTMLDocument) { | ||||||||||||
| DOMBackend._context = document.implementation.createHTMLDocument(""); | ||||||||||||
|
|
||||||||||||
| // Set the base href for the created document | ||||||||||||
| // so any parsed elements with URLs | ||||||||||||
| // are based on the document's URL (gh-2965) | ||||||||||||
| const base = DOMBackend._context.createElement( "base" ); | ||||||||||||
| const base = DOMBackend._context.createElement("base"); | ||||||||||||
| base.href = document.location.href; | ||||||||||||
| DOMBackend._context.head.appendChild( base ); | ||||||||||||
| DOMBackend._context.head.appendChild(base); | ||||||||||||
| } else { | ||||||||||||
| DOMBackend._context = document; | ||||||||||||
| } | ||||||||||||
| return DOMBackend._context; | ||||||||||||
| } | ||||||||||||
| DOMBackend.parseHTML = function (html) { | ||||||||||||
| // Return an array of nodes. | ||||||||||||
| // | ||||||||||||
| // jQuery does fancy stuff like creating an appropriate | ||||||||||||
| // container element and setting innerHTML on it, as well | ||||||||||||
| // as working around various IE quirks. | ||||||||||||
| return $jq.parseHTML(html, DOMBackend.getContext()) || []; | ||||||||||||
|
|
||||||||||||
| DOMBackend.parseHTML = function(html, context) { | ||||||||||||
|
||||||||||||
| // Don't trim to preserve whitespace | ||||||||||||
| // Handle all falsy values and non-strings | ||||||||||||
| if (!html || typeof html !== 'string') { | ||||||||||||
| return []; | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| // Special handling for table elements to ensure proper parsing | ||||||||||||
| const tableElementMatch = html.match(/<(t(?:body|head|foot|r|d|h))\b/i); | ||||||||||||
|
||||||||||||
| let container; | ||||||||||||
|
|
||||||||||||
| if (tableElementMatch) { | ||||||||||||
| const tagName = tableElementMatch[1].toLowerCase(); | ||||||||||||
| // Create appropriate container based on the table element | ||||||||||||
| switch (tagName) { | ||||||||||||
| case 'td': | ||||||||||||
| case 'th': | ||||||||||||
| container = document.createElement('tr'); | ||||||||||||
| break; | ||||||||||||
| case 'tr': | ||||||||||||
| container = document.createElement('tbody'); | ||||||||||||
| break; | ||||||||||||
| case 'tbody': | ||||||||||||
| case 'thead': | ||||||||||||
| case 'tfoot': | ||||||||||||
| container = document.createElement('table'); | ||||||||||||
| break; | ||||||||||||
| default: | ||||||||||||
| container = document.createElement('template'); | ||||||||||||
| } | ||||||||||||
|
Comment on lines
+46
to
+64
|
||||||||||||
| } else { | ||||||||||||
| container = document.createElement('template'); | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| // Sanitize the HTML with sanitize-html | ||||||||||||
| const cleanHtml = sanitizeHtml(html, { | ||||||||||||
| allowedTags: [ | ||||||||||||
| // Basic elements | ||||||||||||
| 'div', 'span', 'p', 'br', 'hr', 'b', 'i', 'em', 'strong', 'u', | ||||||||||||
| 'a', 'img', 'pre', 'code', 'blockquote', | ||||||||||||
| // Lists | ||||||||||||
| 'ul', 'ol', 'li', 'dl', 'dt', 'dd', | ||||||||||||
| // Headers | ||||||||||||
| 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', | ||||||||||||
| // Table elements | ||||||||||||
| 'table', 'thead', 'tbody', 'tfoot', | ||||||||||||
| 'tr', 'td', 'th', 'col', 'colgroup', | ||||||||||||
| // Form elements | ||||||||||||
| 'input', 'textarea', 'select', 'option', 'label', 'button', | ||||||||||||
| // Other elements | ||||||||||||
| 'iframe', 'article', 'section', 'header', 'footer', 'nav', | ||||||||||||
| 'aside', 'main', 'figure', 'figcaption', 'audio', 'video', | ||||||||||||
| 'source', 'canvas', 'details', 'summary' | ||||||||||||
| ], | ||||||||||||
| allowedAttributes: { | ||||||||||||
| '*': [ | ||||||||||||
| 'class', 'id', 'style', 'title', 'role', 'data-*', 'aria-*', | ||||||||||||
| // Allow event handlers | ||||||||||||
| 'onclick', 'onmouseover', 'onmouseout', 'onkeydown', 'onkeyup', 'onkeypress', | ||||||||||||
| 'onfocus', 'onblur', 'onchange', 'onsubmit', 'onreset' | ||||||||||||
|
Comment on lines
+91
to
+94
|
||||||||||||
| 'class', 'id', 'style', 'title', 'role', 'data-*', 'aria-*', | |
| // Allow event handlers | |
| 'onclick', 'onmouseover', 'onmouseout', 'onkeydown', 'onkeyup', 'onkeypress', | |
| 'onfocus', 'onblur', 'onchange', 'onsubmit', 'onreset' | |
| 'class', 'id', 'style', 'title', 'role', 'data-*', 'aria-*' |
Copilot
AI
Dec 12, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The attribute name 'stuff' appears to be a placeholder or typo in the input element's allowed attributes list. This should either be removed or replaced with a valid attribute name if it was intended to be something else.
| 'input': ['type', 'value', 'placeholder', 'checked', 'disabled', 'readonly', 'required', 'pattern', 'min', 'max', 'step', 'minlength', 'maxlength', 'stuff'], | |
| 'input': ['type', 'value', 'placeholder', 'checked', 'disabled', 'readonly', 'required', 'pattern', 'min', 'max', 'step', 'minlength', 'maxlength'], |
Copilot
AI
Dec 12, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Allowing 'javascript:' protocol in iframe src attributes is a critical security vulnerability. The allowedSchemes list doesn't include 'javascript', but the test on line 1001-1011 expects javascript: URLs to be stripped. However, sanitize-html may not strip javascript: URLs from iframes by default unless explicitly configured. The configuration should explicitly exclude javascript: protocol or use disallowedTagsMode to ensure proper sanitization.
| 'img': ['data'] | |
| 'img': ['data'], | |
| 'iframe': ['http', 'https'] |
Copilot
AI
Dec 12, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using sanitize-html fundamentally changes the behavior of parseHTML compared to jQuery's implementation. The jQuery.parseHTML function did NOT sanitize HTML - it simply parsed it into DOM nodes. This is a breaking change that alters the API contract. Applications relying on parseHTML to preserve all HTML content (including scripts and event handlers for legitimate use cases like template compilation) will break. Consider implementing parseHTML without sanitization, as the original function was for parsing, not sanitizing.
Copilot
AI
Dec 12, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using HTMLTemplateElement is not supported in Internet Explorer 11 and older browsers. The code creates a template element on line 63 and 66, then checks if it's an HTMLTemplateElement on line 139. For browsers that don't support template elements, this will fail. The PR description mentions cross-browser compatibility as a key concern, but this implementation breaks IE11 support that jQuery.parseHTML provided.
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -9,7 +9,8 @@ Npm.depends({ | |||||||
| 'lodash.has': '4.5.2', | ||||||||
| 'lodash.isfunction': '3.0.9', | ||||||||
| 'lodash.isempty': '4.4.0', | ||||||||
| 'lodash.isobject': '3.0.2' | ||||||||
| 'lodash.isobject': '3.0.2', | ||||||||
| 'sanitize-html': '2.11.0' | ||||||||
|
Comment on lines
+12
to
+13
|
||||||||
| 'lodash.isobject': '3.0.2', | |
| 'sanitize-html': '2.11.0' | |
| 'lodash.isobject': '3.0.2' |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -785,3 +785,235 @@ if (typeof MutationObserver !== 'undefined') { | |
| }, 0); | ||
| }); | ||
| } | ||
|
|
||
| Tinytest.add("blaze - dombackend - parseHTML", function (test) { | ||
| // Test basic HTML parsing | ||
| const basicHtml = "<div>Hello</div>"; | ||
| const basicResult = Blaze._DOMBackend.parseHTML(basicHtml); | ||
| test.equal(basicResult.length, 1); | ||
| test.equal(basicResult[0].nodeName, "DIV"); | ||
| test.equal(basicResult[0].textContent || basicResult[0].innerText, "Hello"); // innerText for IE | ||
|
|
||
| // Test various falsy/empty inputs (from jQuery tests) | ||
| test.equal(Blaze._DOMBackend.parseHTML().length, 0, "Without arguments"); | ||
| test.equal(Blaze._DOMBackend.parseHTML(undefined).length, 0, "Undefined"); | ||
| test.equal(Blaze._DOMBackend.parseHTML(null).length, 0, "Null"); | ||
| test.equal(Blaze._DOMBackend.parseHTML(false).length, 0, "Boolean false"); | ||
| test.equal(Blaze._DOMBackend.parseHTML(0).length, 0, "Zero"); | ||
| test.equal(Blaze._DOMBackend.parseHTML(true).length, 0, "Boolean true"); | ||
| test.equal(Blaze._DOMBackend.parseHTML(42).length, 0, "Positive number"); | ||
| test.equal(Blaze._DOMBackend.parseHTML("").length, 0, "Empty string"); | ||
|
|
||
| // Test whitespace preservation (from jQuery tests) | ||
| const leadingWhitespace = Blaze._DOMBackend.parseHTML("\t<div></div>"); | ||
| test.equal(leadingWhitespace[0].nodeType, Node.TEXT_NODE, "First node should be text node"); | ||
| test.equal(leadingWhitespace[0].nodeValue, "\t", "Leading whitespace should be preserved"); | ||
|
|
||
| const surroundingWhitespace = Blaze._DOMBackend.parseHTML(" <div></div> "); | ||
| test.equal(surroundingWhitespace[0].nodeType, Node.TEXT_NODE, "Leading space should be text node"); | ||
| test.equal(surroundingWhitespace[2].nodeType, Node.TEXT_NODE, "Trailing space should be text node"); | ||
|
|
||
| // Test anchor href preservation (from jQuery gh-2965) | ||
| const anchor = Blaze._DOMBackend.parseHTML("<a href='example.html'></a>")[0]; | ||
| test.ok(anchor.href.endsWith("example.html"), "href attribute should be preserved"); | ||
|
|
||
| // Test malformed HTML handling | ||
| const malformedTestCases = [ | ||
| { | ||
| html: "<span><span>", // Unclosed tags | ||
| expectedLength: 1 | ||
| }, | ||
| { | ||
| html: "<td><td>", // Multiple table cells | ||
| expectedLength: 2 | ||
| }, | ||
| { | ||
| html: "<div class=''''''><span><<<>>></span", // Invalid attributes and unclosed tags | ||
| expectedLength: 1 // Should attempt to fix malformed HTML | ||
| }, | ||
| { | ||
| html: "<html><!DOCTYPE html><head></head><body>invalid order</body></html>", // Wrong DOM structure order | ||
| expectedLength: 1 // Should still parse despite invalid structure | ||
| } | ||
| ]; | ||
|
|
||
| malformedTestCases.forEach((testCase, i) => { | ||
| const result = Blaze._DOMBackend.parseHTML(testCase.html); | ||
| test.equal(result.length, testCase.expectedLength, | ||
| `Malformed test ${i}: Expected length ${testCase.expectedLength} but got ${result.length}`); | ||
| }); | ||
|
|
||
| // Test plain text (no HTML) | ||
| const textOnly = "Just some text"; | ||
| const textResult = Blaze._DOMBackend.parseHTML(textOnly); | ||
| test.equal(textResult.length, 1); | ||
| test.equal(textResult[0].nodeType, Node.TEXT_NODE); | ||
| test.equal(textResult[0].textContent || textResult[0].nodeValue, "Just some text"); | ||
|
|
||
| // Test self-closing tags | ||
| const selfClosing = "<div/>Content"; | ||
| const selfClosingResult = Blaze._DOMBackend.parseHTML(selfClosing); | ||
| test.equal(selfClosingResult.length, 1); | ||
| test.equal(selfClosingResult[0].nodeName, "DIV"); | ||
| test.equal(selfClosingResult[0].nodeType, Node.ELEMENT_NODE); | ||
|
Comment on lines
+854
to
+858
|
||
|
|
||
| // Test nested table elements (testing proper wrapping levels) | ||
| const nestedTable = "<td>Cell</td>"; | ||
| const nestedResult = Blaze._DOMBackend.parseHTML(nestedTable); | ||
| test.equal(nestedResult.length, 1); | ||
| test.equal(nestedResult[0].nodeName, "TD"); | ||
|
|
||
| // Test table elements (IE has special requirements) | ||
| const tableTestCases = { | ||
| tr: { | ||
| html: "<tr><td>Cell</td></tr>", | ||
| expectedTags: ["TR", "TD"] | ||
| }, | ||
| td: { | ||
| html: "<td>Cell</td>", | ||
| expectedTags: ["TD"] | ||
| }, | ||
| tbody: { | ||
| html: "<tbody><tr><td>Cell</td></tr></tbody>", | ||
| expectedTags: ["TBODY", "TR", "TD"] | ||
| }, | ||
| thead: { | ||
| html: "<thead><tr><th>Header</th></tr></thead>", | ||
| expectedTags: ["THEAD", "TR", "TH"] | ||
| }, | ||
| tfoot: { | ||
| html: "<tfoot><tr><td>Footer</td></tr></tfoot>", | ||
| expectedTags: ["TFOOT", "TR", "TD"] | ||
| }, | ||
| colgroup: { | ||
| html: "<colgroup><col span='2'></colgroup>", | ||
| expectedTags: ["COLGROUP", "COL"] | ||
| } | ||
| }; | ||
|
|
||
| Object.entries(tableTestCases).forEach(([testCaseName, testCase]) => { | ||
| const result = Blaze._DOMBackend.parseHTML(testCase.html); | ||
| const firstNode = result[0]; | ||
| test.equal(firstNode.nodeName, testCase.expectedTags[0], | ||
| `${testCaseName}: Expected ${testCase.expectedTags[0]} but got ${firstNode.nodeName}`); | ||
| }); | ||
|
|
||
| // Test whitespace handling (IE is sensitive to this) | ||
| const whitespaceTestCases = [ | ||
| { | ||
| html: " <div>Padded</div> ", | ||
| expectedLength: 3, // Leading space + div + trailing space | ||
| expectedTag: "DIV" | ||
| }, | ||
| { | ||
| html: "\n<div>Newlines</div>\n", | ||
| expectedLength: 3, // Leading newline + div + trailing newline | ||
| expectedTag: "DIV" | ||
| }, | ||
| { | ||
| html: "\t<div>Tabs</div>\t", | ||
| expectedLength: 3, // Leading tab + div + trailing tab | ||
| expectedTag: "DIV" | ||
| } | ||
| ]; | ||
|
|
||
| whitespaceTestCases.forEach((testCase, i) => { | ||
| const result = Blaze._DOMBackend.parseHTML(testCase.html); | ||
| test.equal(result.length, testCase.expectedLength, | ||
| `Whitespace test ${i}: Expected length ${testCase.expectedLength} but got ${result.length}`); | ||
| // Check the middle node (the div) | ||
| test.equal(result[1].nodeName, testCase.expectedTag, | ||
| `Whitespace test ${i}: Expected tag ${testCase.expectedTag} but got ${result[1].nodeName}`); | ||
| // Verify surrounding nodes are text nodes | ||
| test.equal(result[0].nodeType, Node.TEXT_NODE, | ||
| `Whitespace test ${i}: Expected leading text node`); | ||
| test.equal(result[2].nodeType, Node.TEXT_NODE, | ||
| `Whitespace test ${i}: Expected trailing text node`); | ||
| }); | ||
|
|
||
| // Test empty input | ||
| test.equal(Blaze._DOMBackend.parseHTML("").length, 0); | ||
| test.equal(Blaze._DOMBackend.parseHTML(null).length, 0); | ||
| test.equal(Blaze._DOMBackend.parseHTML(undefined).length, 0); | ||
| // This is a unique case since a whitespace-only input is parsed as a single text node. | ||
| test.equal(Blaze._DOMBackend.parseHTML(" ").length, 1); | ||
|
|
||
| // Test malformed HTML (IE is more strict) | ||
| const malformedTestCasesIE = [ | ||
| { | ||
| html: "<div>Hello<span>World</span></div>", // Well-formed control case | ||
| expectedLength: 1, | ||
| expectedChildren: 1 | ||
| }, | ||
| { | ||
| html: "<div>Test</div><p>", // Partial second tag | ||
| expectedLength: 2 | ||
| }, | ||
| { | ||
| html: "<div class=>Test</div>", // Invalid attribute | ||
| expectedLength: 1 | ||
| } | ||
| ]; | ||
|
|
||
| malformedTestCasesIE.forEach((testCase, i) => { | ||
| const result = Blaze._DOMBackend.parseHTML(testCase.html); | ||
| test.equal(result.length, testCase.expectedLength, | ||
| `Malformed test ${i}: Expected length ${testCase.expectedLength} but got ${result.length}`); | ||
| if (testCase.expectedChildren !== undefined) { | ||
| const childCount = result[0].getElementsByTagName('span').length; | ||
| test.equal(childCount, testCase.expectedChildren, | ||
| `Malformed test ${i}: Expected ${testCase.expectedChildren} span elements but got ${childCount}`); | ||
| } | ||
| }); | ||
|
|
||
| // Test array-like properties of result (important for IE) | ||
| const arrayResult = Blaze._DOMBackend.parseHTML("<div></div><span></span>"); | ||
| test.equal(typeof arrayResult.length, "number", "Result should have length property"); | ||
| test.equal(typeof arrayResult[0], "object", "Result should have indexed access"); | ||
| test.equal(arrayResult[0].nodeName, "DIV", "First element should be accessible by index"); | ||
| }); | ||
|
|
||
| Tinytest.add("blaze - security - XSS prevention in HTML parsing", function (test) { | ||
| const xssTestCases = [ | ||
| { | ||
| html: "<div><p>Test</p><script>alert('XSS')</script></div>", | ||
| description: "Prevents inline script execution", | ||
| checks: (result) => { | ||
| test.equal(result.length, 1, "Should parse into a single element"); | ||
| const div = result[0]; | ||
| test.equal(div.querySelector('script'), null, "Script tag should be removed"); | ||
| test.equal(div.querySelector('p').textContent, "Test", "Safe content should be preserved"); | ||
| } | ||
|
Comment on lines
+976
to
+986
|
||
| }, | ||
| { | ||
| html: "<div><p>Test</p><img src='x' onerror='alert(\"XSS\")'></div>", | ||
| description: "Prevents event handler injection", | ||
| checks: (result) => { | ||
| test.equal(result.length, 1, "Should parse into a single element"); | ||
| const div = result[0]; | ||
| const img = div.querySelector('img'); | ||
| test.isNotNull(img, "Image element should be preserved"); | ||
| test.isFalse(img.hasAttribute('onerror'), "Event handler should be stripped"); | ||
|
Comment on lines
+988
to
+996
|
||
| test.equal(div.querySelector('p').textContent, "Test", "Safe content should be preserved"); | ||
| } | ||
| }, | ||
| { | ||
| html: "<div><p>Test</p><iframe src='javascript:alert(\"XSS\")'></iframe></div>", | ||
| description: "Prevents javascript: URL injection", | ||
| checks: (result) => { | ||
| test.equal(result.length, 1, "Should parse into a single element"); | ||
| const div = result[0]; | ||
| const iframe = div.querySelector('iframe'); | ||
| test.isNotNull(iframe, "iframe element should be preserved"); | ||
| const src = iframe.getAttribute('src') || ''; | ||
| test.isFalse(src.includes('javascript:'), "javascript: protocol should be stripped"); | ||
| test.equal(div.querySelector('p').textContent, "Test", "Safe content should be preserved"); | ||
| } | ||
| } | ||
| ]; | ||
|
|
||
| xssTestCases.forEach((testCase, i) => { | ||
| const result = Blaze._DOMBackend.parseHTML(testCase.html); | ||
| testCase.checks(result); | ||
| }); | ||
| }); | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment mentions checking for createHTMLDocument support directly, but the original code checked via jQuery.support.createHTMLDocument which may have included additional browser-specific checks or polyfills. The direct document.implementation check might not account for all edge cases that jQuery handled. Consider verifying that this simplified check works correctly across all target browsers, especially older ones.