Skip to content
This repository was archived by the owner on Jul 15, 2019. It is now read-only.

Commit fa24fa2

Browse files
author
adon
committed
updated after reviewer's comments
1 parent 8243352 commit fa24fa2

File tree

2 files changed

+69
-28
lines changed

2 files changed

+69
-28
lines changed

src/html-purify.js

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,25 @@ See the accompanying LICENSE file for terms.
1515
voidElements = tagAttList.VoidElements,
1616
optionalElements = tagAttList.OptionalElements;
1717

18+
/*jshint -W030 */
1819
function Purifier(config) {
19-
var that = this;
20+
var that = this, tagBalance;
2021

2122
config = config || {};
2223
// defaulted to true
2324
config.enableCanonicalization = config.enableCanonicalization !== false;
2425
config.enableVoidingIEConditionalComments = config.enableVoidingIEConditionalComments !== false;
26+
27+
// defaulted to true
28+
config.tagBalance || (config.tagBalance = {});
29+
tagBalance = that.tagBalance = {};
30+
tagBalance.stackOverflow = false;
31+
if ((tagBalance.enabled = config.tagBalance.enabled !== false)) {
32+
tagBalance.stackSize = parseInt(config.tagBalance.stackSize) || 100;
33+
tagBalance.stackPtr = 0;
34+
tagBalance.stack = new Array(tagBalance.stackSize);
35+
}
2536

26-
config.enableTagBalancing = config.enableTagBalancing !== false;
2737

2838
// accept array of tags to be whitelisted, default list in tag-attr-list.js
2939
that.tagsWhitelist = config.whitelistTags || tagAttList.WhiteListTags;
@@ -38,15 +48,16 @@ See the accompanying LICENSE file for terms.
3848
enableCanonicalization: config.enableCanonicalization,
3949
enableVoidingIEConditionalComments: config.enableVoidingIEConditionalComments
4050
}).on('postWalk', function (lastState, state, i, endsWithEOF) {
41-
processTransition.call(that, lastState, state, i);
51+
!tagBalance.stackOverflow && processTransition.call(that, lastState, state, i);
4252
});
4353

4454
that.cssParser = new CssParser({"ver": "strict", "throwError": false});
55+
4556
}
4657

4758
// TODO: introduce polyfill for Array.lastIndexOf
48-
function arrayLastIndexOf(arr, element) {
49-
for (var i = arr.length - 1; i >= 0; i--) {
59+
function arrayLastIndexOf(arr, element, fromIndex) {
60+
for (var i = fromIndex === undefined ? arr.length - 1 : fromIndex; i >= 0; i--) {
5061
if (arr[i] === element) {
5162
return i;
5263
}
@@ -58,8 +69,8 @@ See the accompanying LICENSE file for terms.
5869
/* jshint validthis: true */
5970
/* jshint expr: true */
6071
var parser = this.parser,
61-
idx, tagName, attrValString, openedTag, key, value;
62-
72+
tagBalance = this.tagBalance,
73+
idx = 0, tagName = '', attrValString = '', key = '', value = '', hasSelfClosing = 0;
6374

6475
switch (derivedState.Transitions[prevState][nextState]) {
6576

@@ -74,13 +85,14 @@ See the accompanying LICENSE file for terms.
7485
if (arrayLastIndexOf(this.tagsWhitelist, tagName) !== -1) {
7586

7687
if (idx) {
77-
if (this.config.enableTagBalancing && !optionalElements[tagName]) {
88+
if (tagBalance.enabled && !optionalElements[tagName]) {
7889
// relaxed tag balancing, accept it as long as the tag exists in the stack
79-
idx = arrayLastIndexOf(this.openedTags, tagName);
90+
idx = arrayLastIndexOf(tagBalance.stack, tagName, tagBalance.stackPtr - 1);
8091

8192
if (idx >= 0) {
8293
this.output += '</' + tagName + '>';
83-
this.openedTags.splice(idx, 1);
94+
tagBalance.stack.splice(idx, 1);
95+
tagBalance.stackPtr--;
8496
}
8597

8698
// // add closing tags for any opened ones before closing the current one
@@ -99,20 +111,27 @@ See the accompanying LICENSE file for terms.
99111
else {
100112
// void elements only have a start tag; end tags must not be specified for void elements.
101113
// this.hasSelfClosing = this.hasSelfClosing || voidElements[tagName];
102-
this.hasSelfClosing = voidElements[tagName];
114+
hasSelfClosing = voidElements[tagName];
103115

104116
// push the tagName into the openedTags stack if not found:
105117
// - a self-closing tag or a void element
106-
// this.config.enableTagBalancing && !this.hasSelfClosing && this.openedTags.push(tagName);
107-
this.config.enableTagBalancing && !this.hasSelfClosing && !optionalElements[tagName] && this.openedTags.push(tagName);
118+
// this.config.tagBalance.enabled && !this.hasSelfClosing && this.openedTags.push(tagName);
119+
if (tagBalance.enabled && !hasSelfClosing && !optionalElements[tagName]) {
120+
if (tagBalance.stackPtr < tagBalance.stackSize) {
121+
tagBalance.stack[tagBalance.stackPtr++] = tagName;
122+
} else {
123+
// cease processing anything if it exceeds the maximum stack size allowed
124+
tagBalance.stackOverflow = true;
125+
break;
126+
}
127+
}
108128

109129
if (prevState === 35 ||
110130
prevState === 36 ||
111131
prevState === 40) {
112132
this.attrVals[parser.getAttributeName()] = parser.getAttributeValue();
113133
}
114134

115-
attrValString = '';
116135
for (key in this.attrVals) {
117136
if (arrayLastIndexOf(this.attributesWhitelist, key) !== -1) {
118137
value = this.attrVals[key];
@@ -135,14 +154,13 @@ See the accompanying LICENSE file for terms.
135154
}
136155

137156
// handle self-closing tags
138-
this.output += '<' + tagName + attrValString + (this.hasSelfClosing ? ' />' : '>');
157+
this.output += '<' + tagName + attrValString + (hasSelfClosing ? ' />' : '>');
139158
// this.output += '<' + tagName + attrValString + '>';
140159

141160
}
142161
}
143162
// reinitialize once tag has been written to output
144163
this.attrVals = {};
145-
// this.hasSelfClosing = false;
146164
break;
147165

148166
case derivedState.TransitionName.ATTR_TO_AFTER_ATTR:
@@ -179,20 +197,30 @@ See the accompanying LICENSE file for terms.
179197
}
180198

181199
Purifier.prototype.purify = function (data) {
182-
var that = this, openedTag;
200+
var that = this, i;
183201

184-
that.output = '';
185-
that.openedTags = [];
186202
that.attrVals = {};
187-
// that.hasSelfClosing = false;
188-
that.parser.reset();
189-
that.parser.contextualize(data);
203+
that.output = '';
204+
205+
if (that.tagBalance.enabled) {
206+
that.tagBalance.stack = new Array(this.tagBalance.stackSize);
207+
that.tagBalance.stackPtr = 0;
208+
}
209+
210+
that.parser.reset().contextualize(data);
211+
212+
if (that.tagBalance.enabled) {
190213

191-
if (that.config.enableTagBalancing) {
192214
// close any remaining openedTags
193-
while((openedTag = this.openedTags.pop())) {
194-
that.output += '</' + openedTag + '>';
215+
for (i = that.tagBalance.stackPtr - 1; i >= 0; i--) {
216+
that.output += '</' + that.tagBalance.stack[i] + '>';
195217
}
218+
// if ((that.tagBalance.stack.length = that.tagBalance.stackPtr)) {
219+
// that.output += '</' + that.tagBalance.stack.join('></') + '>';
220+
// }
221+
// while((openedTag = this.openedTags.pop())) {
222+
// that.output += '</' + openedTag + '>';
223+
// }
196224
}
197225

198226
return that.output;

tests/unit/html-purify.js

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,31 @@ Authors: Aditya Mahendrakar <[email protected]>
3131
assert.equal(output, '<h1 id="foo" title="asd" checked>hello world 2</h1>');
3232
});
3333

34-
it('should always balance unopened tags', function(){
34+
it('should balance tags', function(){
3535
var html = "</div>foo</h2>bar<a href=\"123\">hello<b>world</a><embed>123</embed><br /><br/><p>";
3636

3737
// with tag balancing enabled by default
38-
var output = (new Purifier({enableTagBalancing:true})).purify(html);
38+
var output = (new Purifier({tagBalance:{enabled:true}})).purify(html);
3939
assert.equal(output, 'foobar<a href="123">hello<b>world</a><embed />123<br /><br /><p></b>');
40+
});
41+
42+
it('should balance remaining tags and drop inputs when there are too many unclosed tags', function(){
43+
var html = "<b>1<b>2<b>3<b>4<b>5<b>6</b></b></b></b>";
44+
45+
// with tag balancing enabled by default
46+
var output = (new Purifier({tagBalance:{enabled:true, stackSize:3}})).purify(html);
47+
assert.equal(output, '<b>1<b>2<b>3</b></b></b>');
48+
});
49+
50+
it('should not balance tags if disabled', function(){
51+
var html = "</div>foo</h2>bar<a href=\"123\">hello<b>world</a><embed>123</embed><br /><br/><p>";
4052

4153
// with tag balancing disabled
42-
var output = (new Purifier({enableTagBalancing:false})).purify(html);
54+
var output = (new Purifier({tagBalance:{enabled:false}})).purify(html);
4355
assert.equal(output, '</div>foo</h2>bar<a href="123">hello<b>world</a><embed />123</embed><br /><br /><p>');
4456
});
4557

58+
4659
it('should handle all vectors mentioned in https://html5sec.org', function(){
4760
var output, i, vector;
4861
for (var i = 0; i < html5secVectors.length; i++) {

0 commit comments

Comments
 (0)