Skip to content

Commit f64691b

Browse files
author
Ashe Connor
authored
Default to safe operation (commonmark#123)
* default to safe * fix setter test
1 parent a9ed0e2 commit f64691b

File tree

8 files changed

+47
-44
lines changed

8 files changed

+47
-44
lines changed

README.md

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,15 @@ be found in the man pages in the `man` subdirectory.
163163
Security
164164
--------
165165

166-
By default, the library will pass through raw HTML and potentially
167-
dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`).
168-
169-
It is recommended that users either disable this potentially unsafe
170-
feature by using the option `CMARK_OPT_SAFE` (or `--safe` with the
171-
command-line program), or run the output through an HTML sanitizer
172-
to protect against
173-
[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
166+
By default, the library will scrub raw HTML and potentially dangerous links
167+
(`javascript:`, `vbscript:`, `data:`, `file:`). Please note this is the
168+
_opposite_ of the upstream [`cmark`](https://github.com/CommonMark/cmark)
169+
library, a change introduced in `cmark-gfm` in version `0.28.3.gfm.18`.
170+
171+
To allow these, use the option `CMARK_OPT_UNSAFE` (or `--unsafe`) with the
172+
command line program. If doing so, we recommend you use a HTML sanitizer
173+
specific to your needs to protect against [XSS
174+
attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
174175

175176
Contributing
176177
------------

api_test/main.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ static void accessors(test_batch_runner *runner) {
178178
OK(runner, cmark_node_set_literal(string, literal + sizeof("prefix")),
179179
"set_literal suffix");
180180

181-
char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL);
181+
char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT | CMARK_OPT_UNSAFE, NULL);
182182
static const char expected_html[] =
183183
"<h3>Header</h3>\n"
184184
"<ol start=\"3\">\n"
@@ -910,7 +910,7 @@ static void test_safe(test_batch_runner *runner) {
910910
"a>\n[link](JAVAscript:alert('hi'))\n![image]("
911911
"file:my.js)\n";
912912
char *html = cmark_markdown_to_html(raw_html, sizeof(raw_html) - 1,
913-
CMARK_OPT_DEFAULT | CMARK_OPT_SAFE);
913+
CMARK_OPT_DEFAULT);
914914
STR_EQ(runner, html, "<!-- raw HTML omitted -->\n<p><!-- raw HTML omitted "
915915
"-->hi<!-- raw HTML omitted -->\n<a "
916916
"href=\"\">link</a>\n<img src=\"\" alt=\"image\" "

man/man3/cmark-gfm.3

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.TH cmark-gfm 3 "September 17, 2018" "LOCAL" "Library Functions Manual"
1+
.TH cmark-gfm 3 "October 17, 2018" "LOCAL" "Library Functions Manual"
22
.SH
33
NAME
44
.PP
@@ -852,22 +852,6 @@ Include a \f[C]data\-sourcepos\f[] attribute on all block elements.
852852
.PP
853853
Render \f[C]softbreak\f[] elements as hard line breaks.
854854

855-
.PP
856-
.nf
857-
\fC
858-
.RS 0n
859-
#define CMARK_OPT_SAFE (1 << 3)
860-
.RE
861-
\f[]
862-
.fi
863-
864-
.PP
865-
Suppress raw HTML and unsafe links (\f[C]javascript:\f[],
866-
\f[C]vbscript:\f[], \f[C]file:\f[], and \f[C]data:\f[], except for
867-
\f[C]image/png\f[], \f[C]image/gif\f[], \f[C]image/jpeg\f[], or
868-
\f[C]image/webp\f[] mime types). Raw HTML is replaced by a placeholder
869-
HTML comment. Unsafe links are replaced by empty strings.
870-
871855
.PP
872856
.nf
873857
\fC
@@ -995,6 +979,23 @@ Use style attributes to align table cells instead of align attributes.
995979
Include the remainder of the info string in code blocks in a separate
996980
attribute.
997981

982+
.PP
983+
.nf
984+
\fC
985+
.RS 0n
986+
#define CMARK_OPT_UNSAFE (1 << 17)
987+
.RE
988+
\f[]
989+
.fi
990+
991+
.PP
992+
Allow raw HTML and unsafe links, \f[C]javascript:\f[],
993+
\f[C]vbscript:\f[], \f[C]file:\f[], and all \f[C]data:\f[] URLs \-\- by
994+
default, only \f[C]image/png\f[], \f[C]image/gif\f[],
995+
\f[C]image/jpeg\f[], or \f[C]image/webp\f[] mime types are allowed.
996+
Without this option, raw HTML is replaced by a placeholder HTML comment,
997+
and unsafe links are replaced by empty strings.
998+
998999
.SS
9991000
Version information
10001001

src/cmark-gfm.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -690,14 +690,6 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
690690
*/
691691
#define CMARK_OPT_HARDBREAKS (1 << 2)
692692

693-
/** Suppress raw HTML and unsafe links (`javascript:`, `vbscript:`,
694-
* `file:`, and `data:`, except for `image/png`, `image/gif`,
695-
* `image/jpeg`, or `image/webp` mime types). Raw HTML is replaced
696-
* by a placeholder HTML comment. Unsafe links are replaced by
697-
* empty strings.
698-
*/
699-
#define CMARK_OPT_SAFE (1 << 3)
700-
701693
/** Render `softbreak` elements as spaces.
702694
*/
703695
#define CMARK_OPT_NOBREAKS (1 << 4)
@@ -746,6 +738,14 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
746738
*/
747739
#define CMARK_OPT_FULL_INFO_STRING (1 << 16)
748740

741+
/** Allow raw HTML and unsafe links, `javascript:`, `vbscript:`, `file:`, and
742+
* all `data:` URLs -- by default, only `image/png`, `image/gif`, `image/jpeg`,
743+
* or `image/webp` mime types are allowed. Without this option, raw HTML is
744+
* replaced by a placeholder HTML comment, and unsafe links are replaced by
745+
* empty strings.
746+
*/
747+
#define CMARK_OPT_UNSAFE (1 << 17)
748+
749749
/**
750750
* ## Version information
751751
*/

src/html.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
227227

228228
case CMARK_NODE_HTML_BLOCK:
229229
cmark_html_render_cr(html);
230-
if (options & CMARK_OPT_SAFE) {
230+
if (!(options & CMARK_OPT_UNSAFE)) {
231231
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
232232
} else if (renderer->filter_extensions) {
233233
filter_html_block(renderer, node->as.literal.data, node->as.literal.len);
@@ -305,7 +305,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
305305
break;
306306

307307
case CMARK_NODE_HTML_INLINE:
308-
if (options & CMARK_OPT_SAFE) {
308+
if (!(options & CMARK_OPT_UNSAFE)) {
309309
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
310310
} else {
311311
filtered = false;
@@ -354,7 +354,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
354354
case CMARK_NODE_LINK:
355355
if (entering) {
356356
cmark_strbuf_puts(html, "<a href=\"");
357-
if (!((options & CMARK_OPT_SAFE) &&
357+
if (!(!(options & CMARK_OPT_UNSAFE) &&
358358
scan_dangerous_url(&node->as.link.url, 0))) {
359359
houdini_escape_href(html, node->as.link.url.data,
360360
node->as.link.url.len);
@@ -372,7 +372,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
372372
case CMARK_NODE_IMAGE:
373373
if (entering) {
374374
cmark_strbuf_puts(html, "<img src=\"");
375-
if (!((options & CMARK_OPT_SAFE) &&
375+
if (!(!(options & CMARK_OPT_UNSAFE) &&
376376
scan_dangerous_url(&node->as.link.url, 0))) {
377377
houdini_escape_href(html, node->as.link.url.data,
378378
node->as.link.url.len);

src/main.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ void print_usage() {
3737
printf(" --sourcepos Include source position attribute\n");
3838
printf(" --hardbreaks Treat newlines as hard line breaks\n");
3939
printf(" --nobreaks Render soft line breaks as spaces\n");
40-
printf(" --safe Suppress raw HTML and dangerous URLs\n");
40+
printf(" --unsafe Allow raw HTML and dangerous URLs\n");
4141
printf(" --smart Use smart punctuation\n");
4242
printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n");
4343
printf(" --github-pre-lang Use GitHub-style <pre lang> for code blocks\n");
@@ -150,8 +150,8 @@ int main(int argc, char *argv[]) {
150150
options |= CMARK_OPT_SMART;
151151
} else if (strcmp(argv[i], "--github-pre-lang") == 0) {
152152
options |= CMARK_OPT_GITHUB_PRE_LANG;
153-
} else if (strcmp(argv[i], "--safe") == 0) {
154-
options |= CMARK_OPT_SAFE;
153+
} else if (strcmp(argv[i], "--unsafe") == 0) {
154+
options |= CMARK_OPT_UNSAFE;
155155
} else if (strcmp(argv[i], "--validate-utf8") == 0) {
156156
options |= CMARK_OPT_VALIDATE_UTF8;
157157
} else if (strcmp(argv[i], "--liberal-html-tag") == 0) {

test/cmark-fuzz.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
99
int options = *(const int *)data;
1010

1111
/* Mask off valid option bits */
12-
options = options & (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_SAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
12+
options = options & (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
1313

1414
/* Remainder of input is the markdown */
1515
const char *markdown = (const char *)(data + sizeof(options));

test/cmark.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def to_html(lib, extlib, text, extensions):
5757
render_html = lib.cmark_render_html
5858
render_html.restype = c_char_p
5959
render_html.argtypes = [c_void_p, c_int, c_void_p]
60-
result = render_html(document, 0, syntax_extensions).decode('utf-8')
60+
result = render_html(document, 1 << 17, syntax_extensions).decode('utf-8')
6161
return [0, result, '']
6262

6363
def to_commonmark(lib, extlib, text, extensions):
@@ -77,6 +77,7 @@ def __init__(self, prog=None, library_dir=None, extensions=None):
7777
self.extensions = extensions.split()
7878

7979
if prog:
80+
prog += ' --unsafe'
8081
extsfun = lambda exts: ''.join([' -e ' + e for e in set(exts)])
8182
self.to_html = lambda x, exts=[]: pipe_through_prog(prog + extsfun(exts + self.extensions), x)
8283
self.to_commonmark = lambda x, exts=[]: pipe_through_prog(prog + ' -t commonmark' + extsfun(exts + self.extensions), x)

0 commit comments

Comments
 (0)