From 4b48783c3e209d900a459a2bd76aa314cb62f7e1 Mon Sep 17 00:00:00 2001 From: Rasmus Andersson Date: Wed, 30 Jun 2021 20:01:49 -0700 Subject: [PATCH] Disallow "javascript:" URIs in links. Adds option allowJSURIs to explicitly allow it. Closes #14 --- example/example.html | 1 + example/example.md | 2 ++ markdown.d.ts | 5 ++++- src/common.h | 7 +++++++ src/fmt_html.c | 27 ++++++++++++++++++++------- src/fmt_html.h | 8 +++----- src/md.c | 13 ++----------- src/md.js | 10 ++++++---- 8 files changed, 45 insertions(+), 28 deletions(-) diff --git a/example/example.html b/example/example.html index e46b151..7ecb278 100644 --- a/example/example.html +++ b/example/example.html @@ -79,3 +79,4 @@

TablesAnöt######her!

?!Anöt//her!!

?!!

+

XSS test

diff --git a/example/example.md b/example/example.md index 5d5ce8d..5beb1ea 100644 --- a/example/example.md +++ b/example/example.md @@ -82,3 +82,5 @@ function codeBlocks() { ## ?!Anöt//her!! ## ?!! + +[XSS test](javAscRipt:alert("xss")) diff --git a/markdown.d.ts b/markdown.d.ts index 6e7d1a5..638fd82 100644 --- a/markdown.d.ts +++ b/markdown.d.ts @@ -28,8 +28,11 @@ export interface ParseOptions { */ bytes? :boolean + /** Allow "javascript:" in links */ + allowJSURIs? :boolean + /** - * onCodeBlock is an optional callback which if provided is called for each code block. + * Optional callback which if provided is called for each code block. * langname holds the "language tag", if any, of the block. * * The returned value is inserted into the resulting HTML verbatim, without HTML escaping. diff --git a/src/common.h b/src/common.h index 836b2a5..5a0c8f3 100644 --- a/src/common.h +++ b/src/common.h @@ -61,6 +61,13 @@ typedef int32_t i32; #include "wbuf.h" +// these should be in sync with "OutputFlags" in md.js +typedef enum OutputFlags { + OutputFlagHTML = 1 << 0, + OutputFlagXHTML = 1 << 1, + OutputFlagAllowJSURI = 1 << 2, // allow "javascript:" URIs in links +} OutputFlags; + typedef int(*JSTextFilterFun)( const char* metaptr, u32 metalen, const char* inptr, u32 inlen, diff --git a/src/fmt_html.c b/src/fmt_html.c index 2e71b04..6894ef4 100644 --- a/src/fmt_html.c +++ b/src/fmt_html.c @@ -25,6 +25,7 @@ #include #include +#include #include "common.h" #include "fmt_html.h" @@ -257,9 +258,21 @@ static void render_open_td_block(FmtHTML* r, bool isTH, const MD_BLOCK_TD_DETAIL } } +static bool is_javascript_uri(const MD_CHAR* text, size_t len) { + return ( + len >= strlen("javascript:") && + strncasecmp(text, "javascript:", strlen("javascript:")) == 0 + ); +} + static void render_open_a_span(FmtHTML* r, const MD_SPAN_A_DETAIL* det) { render_literal(r, "href); + // skip "javascript:" URIs unless explicitly allowed + if ((r->flags & OutputFlagAllowJSURI) != 0 || + !is_javascript_uri(det->href.text, det->href.size)) + { + render_attribute(r, &det->href); + } if (det->title.text != NULL) { render_literal(r, "\" title=\""); render_attribute(r, &det->title); @@ -279,7 +292,7 @@ static void render_close_img_span(FmtHTML* r, const MD_SPAN_IMG_DETAIL* det) { render_literal(r, "\" title=\""); render_attribute(r, &det->title); } - render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\"/>" : "\">"); + render_literal(r, (r->flags & OutputFlagXHTML) ? "\"/>" : "\">"); r->imgnest--; } @@ -306,7 +319,7 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) case MD_BLOCK_UL: render_literal(r, "
    \n"); break; case MD_BLOCK_OL: render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break; case MD_BLOCK_LI: render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break; - case MD_BLOCK_HR: render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "
    \n" : "
    \n"); break; + case MD_BLOCK_HR: render_literal(r, (r->flags & OutputFlagXHTML) ? "
    \n" : "
    \n"); break; case MD_BLOCK_H: { render_literal(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); @@ -379,8 +392,8 @@ static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) { case MD_SPAN_EM: render_literal(r, ""); break; case MD_SPAN_STRONG: render_literal(r, ""); break; case MD_SPAN_U: render_literal(r, ""); break; - case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break; - case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break; + case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*)detail); break; + case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*)detail); break; case MD_SPAN_CODE: render_literal(r, ""); break; case MD_SPAN_DEL: render_literal(r, ""); break; case MD_SPAN_LATEXMATH: render_literal(r, ""); break; @@ -452,12 +465,12 @@ static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, vo render_literal( r, r->imgnest == 0 ? - ((r->flags & MD_HTML_FLAG_XHTML) ? "
    \n" : "
    \n") : + ((r->flags & OutputFlagXHTML) ? "
    \n" : "
    \n") : " " ); break; - render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "
    \n" : "
    \n"); break; + render_literal(r, (r->flags & OutputFlagXHTML) ? "
    \n" : "
    \n"); break; case MD_TEXT_SOFTBR: render_literal(r, (r->imgnest == 0 ? "\n" : " ")); break; case MD_TEXT_HTML: render_text(r, text, size); break; diff --git a/src/fmt_html.h b/src/fmt_html.h index a903981..cfe8215 100644 --- a/src/fmt_html.h +++ b/src/fmt_html.h @@ -1,11 +1,9 @@ #pragma once -#define MD_HTML_FLAG_XHTML 0x0008 // instead of e.g.
    , generate
    - typedef struct FmtHTML { - u32 flags; // MD_HTML_FLAG_* - u32 parserFlags; // passed along to md_parse - WBuf* outbuf; + OutputFlags flags; + u32 parserFlags; // passed along to md_parse + WBuf* outbuf; // optional callbacks JSTextFilterFun onCodeBlock; diff --git a/src/md.c b/src/md.c index b0c6225..f17f8cc 100644 --- a/src/md.c +++ b/src/md.c @@ -4,12 +4,6 @@ #include "fmt_html.h" // #include "fmt_json.h" -// these should be in sync with "OutputFlags" in md.js -typedef enum OutputFlags { - OutputFlagHTML = 1 << 0, - OutputFlagXHTML = 1 << 1, -} OutputFlags; - typedef enum ErrorCode { ERR_NONE, ERR_MD_PARSE, @@ -41,19 +35,16 @@ export size_t parseUTF8( WBufReset(&outbuf); - if (outflags & OutputFlagHTML) { + if ((outflags & OutputFlagHTML) || (outflags & OutputFlagXHTML)) { WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations FmtHTML fmt = { - .flags = 0, + .flags = outflags, .parserFlags = parser_flags, .outbuf = &outbuf, .onCodeBlock = onCodeBlock, }; - if (outflags & OutputFlagXHTML) - fmt.flags |= MD_HTML_FLAG_XHTML; - if (fmt_html(inbufptr, inbuflen, &fmt) != 0) { // fmt_html returns status of md_parse which only fails in extreme cases // like when out of memory. md4c does not provide error codes or error messages. diff --git a/src/md.js b/src/md.js index c5d05cd..c85a52f 100644 --- a/src/md.js +++ b/src/md.js @@ -41,10 +41,11 @@ export const ParseFlags = { NO_HTML: 0x0020 | 0x0040, // NO_HTML_BLOCKS | NO_HTML_SPANS } -// these should be in sync with "OutputFlags" in md.c +// these should be in sync with "OutputFlags" in common.h const OutputFlags = { - HTML: 1 << 0, // Output HTML - XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set) + HTML: 1 << 0, // Output HTML + XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set) + AllowJSURI: 1 << 2, // Allow "javascript:" URIs } @@ -56,7 +57,8 @@ export function parse(source, options) { options.parseFlags ) - let outputFlags = 0 + let outputFlags = options.allowJSURIs ? OutputFlags.AllowJSURI : 0 + switch (options.format) { case "xhtml": outputFlags |= OutputFlags.HTML | OutputFlags.XHTML