Skip to content
This repository was archived by the owner on Jul 15, 2019. It is now read-only.

Commit 8243352

Browse files
author
adon
committed
fix tag balancing logics
- logics simplified - optional tags are ignored during balancing - ignore self-closing solidus, which is required only for foreign elements
1 parent fa8349d commit 8243352

File tree

4 files changed

+67
-31
lines changed

4 files changed

+67
-31
lines changed

src/html-purify.js

Lines changed: 45 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ See the accompanying LICENSE file for terms.
1212
xssFilters = require('xss-filters'),
1313
CssParser = require('css-js'),
1414
hrefAttributes = tagAttList.HrefAttributes,
15-
voidElements = tagAttList.VoidElements;
15+
voidElements = tagAttList.VoidElements,
16+
optionalElements = tagAttList.OptionalElements;
1617

1718
function Purifier(config) {
1819
var that = this;
@@ -43,14 +44,14 @@ See the accompanying LICENSE file for terms.
4344
that.cssParser = new CssParser({"ver": "strict", "throwError": false});
4445
}
4546

46-
// TODO: introduce polyfill for Array.indexOf
47-
function contains(arr, element) {
48-
for (var i = 0, len = arr.length; i < len; i++) {
47+
// TODO: introduce polyfill for Array.lastIndexOf
48+
function arrayLastIndexOf(arr, element) {
49+
for (var i = arr.length - 1; i >= 0; i--) {
4950
if (arr[i] === element) {
50-
return true;
51+
return i;
5152
}
5253
}
53-
return false;
54+
return -1;
5455
}
5556

5657
function processTransition(prevState, nextState, i) {
@@ -70,30 +71,40 @@ See the accompanying LICENSE file for terms.
7071
idx = parser.getCurrentTagIndex();
7172
tagName = parser.getCurrentTag(idx);
7273

73-
if (contains(this.tagsWhitelist, tagName)) {
74+
if (arrayLastIndexOf(this.tagsWhitelist, tagName) !== -1) {
7475

7576
if (idx) {
76-
if (this.config.enableTagBalancing) {
77-
// add closing tags for any opened ones before closing the current one
78-
while((openedTag = this.openedTags.pop()) && openedTag !== tagName) {
79-
this.output += '</' + openedTag + '>';
80-
}
81-
// openedTag is undefined if tagName is never found in all openedTags, no output needed
82-
if (openedTag) {
83-
this.output += '</' + openedTag + '>';
77+
if (this.config.enableTagBalancing && !optionalElements[tagName]) {
78+
// relaxed tag balancing, accept it as long as the tag exists in the stack
79+
idx = arrayLastIndexOf(this.openedTags, tagName);
80+
81+
if (idx >= 0) {
82+
this.output += '</' + tagName + '>';
83+
this.openedTags.splice(idx, 1);
8484
}
85+
86+
// // add closing tags for any opened ones before closing the current one
87+
// while((openedTag = this.openedTags.pop()) && openedTag !== tagName) {
88+
// this.output += '</' + openedTag + '>';
89+
// }
90+
// // openedTag is undefined if tagName is never found in all openedTags, no output needed
91+
// if (openedTag) {
92+
// this.output += '</' + openedTag + '>';
93+
// }
8594
}
8695
else {
8796
this.output += '</' + tagName + '>';
8897
}
8998
}
9099
else {
91-
// - void elements only have a start tag; end tags must not be specified for void elements.
92-
this.hasSelfClosing = this.hasSelfClosing || voidElements[tagName];
100+
// void elements only have a start tag; end tags must not be specified for void elements.
101+
// this.hasSelfClosing = this.hasSelfClosing || voidElements[tagName];
102+
this.hasSelfClosing = voidElements[tagName];
93103

94104
// push the tagName into the openedTags stack if not found:
95105
// - a self-closing tag or a void element
96-
this.config.enableTagBalancing && !this.hasSelfClosing && this.openedTags.push(tagName);
106+
// this.config.enableTagBalancing && !this.hasSelfClosing && this.openedTags.push(tagName);
107+
this.config.enableTagBalancing && !this.hasSelfClosing && !optionalElements[tagName] && this.openedTags.push(tagName);
97108

98109
if (prevState === 35 ||
99110
prevState === 36 ||
@@ -103,7 +114,7 @@ See the accompanying LICENSE file for terms.
103114

104115
attrValString = '';
105116
for (key in this.attrVals) {
106-
if (contains(this.attributesWhitelist, key)) {
117+
if (arrayLastIndexOf(this.attributesWhitelist, key) !== -1) {
107118
value = this.attrVals[key];
108119

109120
if (key === "style") { // TODO: move style to a const
@@ -125,12 +136,13 @@ See the accompanying LICENSE file for terms.
125136

126137
// handle self-closing tags
127138
this.output += '<' + tagName + attrValString + (this.hasSelfClosing ? ' />' : '>');
139+
// this.output += '<' + tagName + attrValString + '>';
128140

129141
}
130142
}
131143
// reinitialize once tag has been written to output
132144
this.attrVals = {};
133-
this.hasSelfClosing = 0;
145+
// this.hasSelfClosing = false;
134146
break;
135147

136148
case derivedState.TransitionName.ATTR_TO_AFTER_ATTR:
@@ -150,7 +162,18 @@ See the accompanying LICENSE file for terms.
150162
if (prevState === 35) {
151163
this.attrVals[parser.getAttributeName()] = null;
152164
}
153-
this.hasSelfClosing = 1;
165+
166+
/* According to https://html.spec.whatwg.org/multipage/syntax.html#start-tags
167+
* "Then, if the element is one of the void elements, or if the element is a foreign element, then there may be a single U+002F SOLIDUS character (/).
168+
* This character has no effect on void elements, but on foreign elements it marks the start tag as self-closing."
169+
*/
170+
// that means only foreign elements can self-close (self-closing is optional for void elements)
171+
// no foreign elements will be allowed, so the following logic can be commented
172+
// openedTag = parser.getStartTagName();
173+
// if (openedTag === 'svg' || openedTag === 'math') { // ...
174+
// this.hasSelfClosing = true;
175+
// }
176+
154177
break;
155178
}
156179
}
@@ -161,7 +184,7 @@ See the accompanying LICENSE file for terms.
161184
that.output = '';
162185
that.openedTags = [];
163186
that.attrVals = {};
164-
that.hasSelfClosing = 0;
187+
// that.hasSelfClosing = false;
165188
that.parser.reset();
166189
that.parser.contextualize(data);
167190

src/tag-attr-list.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,3 +261,6 @@ exports.HrefAttributes = {
261261
// Void elements only have a start tag; end tags must not be specified for void elements.
262262
// https://html.spec.whatwg.org/multipage/syntax.html#void-elements
263263
exports.VoidElements = {"area":1, "base":1, "br":1, "col":1, "embed":1, "hr":1, "img":1, "input":1, "keygen":1, "link":1, "menuitem":1, "meta":1, "param":1, "source":1, "track":1, "wbr":1};
264+
265+
// https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
266+
exports.OptionalElements = {/*"plaintext":1, "html":1, "head":1, "body":1,*/ "li":1, "dt":1, "dd":1, "p":1, "rt":1, "rp":1, "optgroup":1, "option":1, "colgroup":1, "caption":1, "thead":1, "tbody":1, "tfoot":1, "tr":1, "td":1, "th":1};

tests/test-vectors.js

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -628,12 +628,12 @@ var generalVectors = [
628628
{
629629
id: 10,
630630
input: "<table><tr bz/></table>normal tag made standalone with bogus trunc attr",
631-
output: "<table><tr /></table>normal tag made standalone with bogus trunc attr"
631+
output: "<table><tr></table>normal tag made standalone with bogus trunc attr"
632632
},
633633
{
634634
id: 11,
635635
input: "<table><tr color/></table>normal tag made standalone with trunc attr bad val",
636-
output: "<table><tr /></table>normal tag made standalone with trunc attr bad val"
636+
output: "<table><tr></table>normal tag made standalone with trunc attr bad val"
637637
},
638638
{
639639
id: 12,
@@ -648,7 +648,7 @@ var generalVectors = [
648648
{
649649
id: 14,
650650
input: "<table><tr size=\"4\"/></table>normal tag made standalone with value",
651-
output: "<table><tr size=\"4\" /></table>normal tag made standalone with value"
651+
output: "<table><tr size=\"4\"></table>normal tag made standalone with value"
652652
},
653653
//style attribute tests
654654
{
@@ -779,7 +779,7 @@ var generalVectors = [
779779
{
780780
id: 40,
781781
input: "<option selected />",
782-
output: "<option selected />"
782+
output: "<option selected>"
783783
},
784784
{
785785
id: 41,
@@ -799,7 +799,7 @@ var generalVectors = [
799799
{
800800
id: 44,
801801
input: "<option selected/>",
802-
output: "<option selected />"
802+
output: "<option selected>"
803803
},
804804
{
805805
id: 45,
@@ -831,7 +831,17 @@ var generalVectors = [
831831
id: 50,
832832
input: "abc <!-- 123",
833833
output: "abc "
834-
}
834+
},
835+
{
836+
id: 51,
837+
input: "<a href=\"http://www.yahoo.com/\" />hello",
838+
output: "<a href=\"http://www.yahoo.com/\">hello</a>"
839+
},
840+
// {
841+
// id: 52,
842+
// input: "<img src=\"x\" id=\'\" onerror=\"alert(1)\' />",
843+
// output: "<img src=\"x\" id=\'\" onerror=\"alert(1)\' />"
844+
// }
835845
];
836846

837847
exports.html5secVectors = html5secVectors;

tests/unit/html-purify.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,12 @@ Authors: Aditya Mahendrakar <[email protected]>
3535
var html = "</div>foo</h2>bar<a href=\"123\">hello<b>world</a><embed>123</embed><br /><br/><p>";
3636

3737
// with tag balancing enabled by default
38-
var output = (new Purifier()).purify(html);
39-
assert.equal(output, 'foobar<a href=\"123\">hello<b>world</b></a><embed />123<br /><br /><p></p>');
38+
var output = (new Purifier({enableTagBalancing:true})).purify(html);
39+
assert.equal(output, 'foobar<a href="123">hello<b>world</a><embed />123<br /><br /><p></b>');
4040

4141
// with tag balancing disabled
4242
var output = (new Purifier({enableTagBalancing:false})).purify(html);
43-
assert.equal(output, "</div>foo</h2>bar<a href=\"123\">hello<b>world</a><embed />123</embed><br /><br /><p>");
43+
assert.equal(output, '</div>foo</h2>bar<a href="123">hello<b>world</a><embed />123</embed><br /><br /><p>');
4444
});
4545

4646
it('should handle all vectors mentioned in https://html5sec.org', function(){

0 commit comments

Comments
 (0)