Skip to content
This repository was archived by the owner on Jul 15, 2019. It is now read-only.

Commit 12d5fd2

Browse files
author
adon
committed
updated comments and README
- also improved the arrayLastIndexOf to use the native Array.prototype.lastIndexOf if exixts
1 parent a5e8d53 commit 12d5fd2

File tree

2 files changed

+65
-31
lines changed

2 files changed

+65
-31
lines changed

README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,36 @@ var input = '...';
3232
var result = purifier.purify(input);
3333
```
3434

35+
## Advanced Usage
36+
37+
The following outlines the configuration that is secure by default. You should perform due dilligence to confirm your use cases are safe before disabling or altering the configurations.
38+
39+
```js
40+
// The default configuration
41+
new Purifier({
42+
whitelistTags: ['a', '...'],
43+
whitelistAttributes: ['href', '...'],
44+
enableCanonicalization: true,
45+
tagBalance: {
46+
enabled: true,
47+
stackSize: 100
48+
}
49+
});
50+
```
51+
52+
<!--
53+
#### whitelistTags
54+
55+
#### whitelistAttributes
56+
57+
#### enableCanonicalization
58+
-->
59+
60+
#### tagBalance
61+
The untrusted data must be self-contained. Hence, it cannot close any tags prior to its inclusion, nor leave any of its own tags unclosed. An efficient and simple tag balancing algorithm is applied by default to enforce this goal only, and may not produce perfectly nested output. You may implement another tag balancing algorithm before invoking purify. But the default one should still be enabled, unless you're sure the self-contained requirement is met.
62+
63+
The ``stackSize`` (default: 100) is a limit imposed on the maximum number of unclosed tags (or the max levels of nested tags). When an untrusted data attempts to open tags that are so nested and has exceeded the allowed limit, the algorithm will cease any further processing but simply close all of those tags.
64+
3565
## Development
3666

3767
### How to build

src/html-purify.js

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ See the accompanying LICENSE file for terms.
2929
tagBalance = that.tagBalance = {};
3030
tagBalance.stackOverflow = false;
3131
if ((tagBalance.enabled = config.tagBalance.enabled !== false)) {
32-
tagBalance.stackSize = parseInt(config.tagBalance.stackSize) || 100;
32+
tagBalance.stackPtrMax = (parseInt(config.tagBalance.stackSize) || 100) - 1;
3333
tagBalance.stackPtr = 0;
34-
tagBalance.stack = new Array(tagBalance.stackSize);
34+
tagBalance.stack = new Array(tagBalance.stackPtrMax + 1);
3535
}
3636

3737
// accept array of tags to be whitelisted, default list in tag-attr-list.js
@@ -54,11 +54,18 @@ See the accompanying LICENSE file for terms.
5454

5555
}
5656

57-
// TODO: introduce polyfill for Array.lastIndexOf
57+
// A simple polyfill for Array.lastIndexOf
5858
function arrayLastIndexOf(arr, element, fromIndex) {
59-
for (var i = fromIndex === undefined ? arr.length - 1 : fromIndex; i >= 0; i--) {
60-
if (arr[i] === element) {
61-
return i;
59+
if (arguments.length < 3) {
60+
fromIndex = arr.length - 1;
61+
}
62+
63+
if (Array.prototype.lastIndexOf) {
64+
return arr.lastIndexOf(element, fromIndex);
65+
}
66+
for (; fromIndex >= 0; fromIndex--) {
67+
if (arr[fromIndex] === element) {
68+
return fromIndex;
6269
}
6370
}
6471
return -1;
@@ -85,7 +92,12 @@ See the accompanying LICENSE file for terms.
8592

8693
if (idx) {
8794
if (tagBalance.enabled && !optionalElements[tagName]) {
88-
// relaxed tag balancing, accept it as long as the tag exists in the stack
95+
96+
// Simple tag balancing: close the tag as long as it
97+
// exists in the stack, as we only want to ensure the
98+
// untrusted data must be self-contained. Hence, it can
99+
// not close any tags prior to its inclusion, nor leave
100+
// any of its own tags unclosed.
89101
idx = arrayLastIndexOf(tagBalance.stack, tagName, tagBalance.stackPtr - 1);
90102

91103
if (idx >= 0) {
@@ -94,7 +106,7 @@ See the accompanying LICENSE file for terms.
94106
tagBalance.stackPtr--;
95107
}
96108

97-
// // add closing tags for any opened ones before closing the current one
109+
// Pop-until-matched tag balancing: add closing tags for any opened ones before closing the matched one
98110
// while((openedTag = this.openedTags.pop()) && openedTag !== tagName) {
99111
// this.output += '</' + openedTag + '>';
100112
// }
@@ -109,20 +121,18 @@ See the accompanying LICENSE file for terms.
109121
}
110122
else {
111123
// void elements only have a start tag; end tags must not be specified for void elements.
112-
// this.hasSelfClosing = this.hasSelfClosing || voidElements[tagName];
113124
hasSelfClosing = voidElements[tagName];
114125

115126
// push the tagName into the openedTags stack if not found:
116127
// - a self-closing tag or a void element
117-
// this.config.tagBalance.enabled && !this.hasSelfClosing && this.openedTags.push(tagName);
118128
if (tagBalance.enabled && !hasSelfClosing && !optionalElements[tagName]) {
119-
if (tagBalance.stackPtr < tagBalance.stackSize) {
120-
tagBalance.stack[tagBalance.stackPtr++] = tagName;
121-
} else {
122-
// cease processing anything if it exceeds the maximum stack size allowed
129+
// cease further processing if it exceeds the maximum stack size allowed
130+
if (tagBalance.stackPtr > tagBalance.stackPtrMax) {
123131
tagBalance.stackOverflow = true;
124-
break;
132+
return;
125133
}
134+
135+
tagBalance.stack[tagBalance.stackPtr++] = tagName;
126136
}
127137

128138
if (prevState === 35 ||
@@ -172,7 +182,7 @@ See the accompanying LICENSE file for terms.
172182

173183
//case derivedState.TransitionName.TAG_OPEN_TO_MARKUP_OPEN:
174184
// this.output += "<" + parser.input[i];
175-
// break;
185+
// break;
176186

177187
case derivedState.TransitionName.TO_SELF_CLOSING_START:
178188
// boolean attributes may not have a value
@@ -196,30 +206,24 @@ See the accompanying LICENSE file for terms.
196206
}
197207

198208
Purifier.prototype.purify = function (data) {
199-
var that = this, i;
209+
var that = this, i,
210+
tagBalance = that.tagBalance;
200211

201212
that.attrVals = {};
202213
that.output = '';
203214

204-
if (that.tagBalance.enabled) {
205-
that.tagBalance.stack = new Array(this.tagBalance.stackSize);
206-
that.tagBalance.stackPtr = 0;
215+
if (tagBalance.enabled) {
216+
tagBalance.stack = new Array(tagBalance.stackPtrMax + 1);
217+
tagBalance.stackPtr = 0;
207218
}
208219

209220
that.parser.reset().contextualize(data);
210221

211-
if (that.tagBalance.enabled) {
212-
213-
// close any remaining openedTags
214-
for (i = that.tagBalance.stackPtr - 1; i >= 0; i--) {
215-
that.output += '</' + that.tagBalance.stack[i] + '>';
222+
if (tagBalance.enabled) {
223+
// close remaining opened tags, if any
224+
for (i = tagBalance.stackPtr - 1; i >= 0; i--) {
225+
that.output += '</' + tagBalance.stack[i] + '>';
216226
}
217-
// if ((that.tagBalance.stack.length = that.tagBalance.stackPtr)) {
218-
// that.output += '</' + that.tagBalance.stack.join('></') + '>';
219-
// }
220-
// while((openedTag = this.openedTags.pop())) {
221-
// that.output += '</' + openedTag + '>';
222-
// }
223227
}
224228

225229
return that.output;

0 commit comments

Comments
 (0)