diff --git a/README.md b/README.md index 9abe987e..35d26cd4 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,10 @@ extensions: * With the flag `MD_FLAG_UNDERLINE`, underscore (`_`) denotes an underline instead of an ordinary emphasis or strong emphasis. +* With the flag `MD_FLAG_HEADINGAUTOID`, unique identifiers are generated for + headings. The HTML render output them as `id` in the heading tag. For example + `

Title

`. + Few features of CommonMark (those some people see as mis-features) may be disabled with the following flags: diff --git a/md2html/md2html.c b/md2html/md2html.c index 06b2b74b..139a6eda 100644 --- a/md2html/md2html.c +++ b/md2html/md2html.c @@ -42,8 +42,10 @@ static unsigned parser_flags = 0; #endif static int want_fullhtml = 0; static int want_xhtml = 0; +static int want_toc = 0; static int want_stat = 0; +MD_TOC_OPTIONS toc_options = { 0, NULL}; /********************************* *** Simple grow-able buffer *** @@ -142,7 +144,7 @@ process_file(FILE* in, FILE* out) t0 = clock(); ret = md_html(buf_in.data, (MD_SIZE)buf_in.size, process_output, (void*) &buf_out, - parser_flags, renderer_flags); + parser_flags, renderer_flags, &toc_options); t1 = clock(); if(ret != 0) { @@ -200,6 +202,9 @@ static const CMDLINE_OPTION cmdline_options[] = { { 'o', "output", 'o', CMDLINE_OPTFLAG_REQUIREDARG }, { 'f', "full-html", 'f', 0 }, { 'x', "xhtml", 'x', 0 }, + { 't', "table-of-content", 't', CMDLINE_OPTFLAG_OPTIONALARG }, + { 0, "toc", 't', CMDLINE_OPTFLAG_OPTIONALARG }, + { 0, "toc-depth", 'd', CMDLINE_OPTFLAG_REQUIREDARG }, { 's', "stat", 's', 0 }, { 'h', "help", 'h', 0 }, { 'v', "version", 'v', 0 }, @@ -220,6 +225,7 @@ static const CMDLINE_OPTION cmdline_options[] = { { 0, "funderline", '_', 0 }, { 0, "fverbatim-entities", 'E', 0 }, { 0, "fwiki-links", 'K', 0 }, + { 0, "fheading-auto-id", '#', 0 }, { 0, "fno-html-blocks", 'F', 0 }, { 0, "fno-html-spans", 'G', 0 }, @@ -240,6 +246,11 @@ usage(void) " -o --output=FILE Output file (default is standard output)\n" " -f, --full-html Generate full HTML document, including header\n" " -x, --xhtml Generate XHTML instead of HTML\n" + " -t, --table-of-content=MARK, --toc=MARK\n" + " Generate a table of content in place of MARK line\n" + " If no MARK is given, the toc is generated at start\n" + " --toc-depth=D Set the maximum level of heading in the table\n" + " of content. 1 to 6. Default is 3\n" " -s, --stat Measure time of input parsing\n" " -h, --help Display this help and exit\n" " -v, --version Display version and exit\n" @@ -269,6 +280,8 @@ usage(void) " --ftasklists Enable task lists\n" " --funderline Enable underline spans\n" " --fwiki-links Enable wiki links\n" + " --fheading-auto-id\n" + " Enable heading auto identifier\n" "\n" "Markdown suppression options:\n" " --fno-html-blocks\n" @@ -295,6 +308,12 @@ version(void) static const char* input_path = NULL; static const char* output_path = NULL; +static int parse_toc_depth(char const* value){ + toc_options.depth = -1; + toc_options.depth = *value - '0'; + return (toc_options.depth>0 && toc_options.depth <= 6); +} + static int cmdline_callback(int opt, char const* value, void* data) { @@ -311,6 +330,20 @@ cmdline_callback(int opt, char const* value, void* data) case 'o': output_path = value; break; case 'f': want_fullhtml = 1; break; case 'x': want_xhtml = 1; renderer_flags |= MD_HTML_FLAG_XHTML; break; + case 't': + want_toc = 1; + parser_flags |= MD_FLAG_HEADINGAUTOID; + toc_options.toc_placeholder = value; + if(toc_options.depth == 0) + toc_options.depth = 3; + break; + case 'd': + if(!parse_toc_depth(value)){ + fprintf(stderr, "Invalid toc-depth: %s\n", value); + fprintf(stderr, "Must be a number in the range 1-6\n"); + exit(1); + } + break; case 's': want_stat = 1; break; case 'h': usage(); exit(0); break; case 'v': version(); exit(0); break; @@ -335,6 +368,7 @@ cmdline_callback(int opt, char const* value, void* data) case 'K': parser_flags |= MD_FLAG_WIKILINKS; break; case 'X': parser_flags |= MD_FLAG_TASKLISTS; break; case '_': parser_flags |= MD_FLAG_UNDERLINE; break; + case '#': parser_flags |= MD_FLAG_HEADINGAUTOID; break; default: fprintf(stderr, "Illegal option: %s\n", value); diff --git a/scripts/build_symbol_map.py b/scripts/build_symbol_map.py new file mode 100644 index 00000000..bd19f5a5 --- /dev/null +++ b/scripts/build_symbol_map.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +import os +import sys +import textwrap + + +self_path = os.path.dirname(os.path.realpath(__file__)); +f = open(self_path + "/unicode/DerivedGeneralCategory.txt", "r") + +codepoint_list = [] +category_list = [ "Sm", "Sc", "Sk", "So" ] + +# Filter codepoints falling in the right category: +for line in f: + comment_off = line.find("#") + if comment_off >= 0: + line = line[:comment_off] + line = line.strip() + if not line: + continue + + char_range, category = line.split(";") + char_range = char_range.strip() + category = category.strip() + + if not category in category_list: + continue + + delim_off = char_range.find("..") + if delim_off >= 0: + codepoint0 = int(char_range[:delim_off], 16) + codepoint1 = int(char_range[delim_off+2:], 16) + for codepoint in range(codepoint0, codepoint1 + 1): + codepoint_list.append(codepoint) + else: + codepoint = int(char_range, 16) + codepoint_list.append(codepoint) +f.close() + + +codepoint_list.sort() + + +index0 = 0 +count = len(codepoint_list) + +records = list() +while index0 < count: + index1 = index0 + 1 + while index1 < count and codepoint_list[index1] == codepoint_list[index1-1] + 1: + index1 += 1 + + if index1 - index0 > 1: + # Range of codepoints + records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1])) + else: + # Single codepoint + records.append("S(0x{:04x})".format(codepoint_list[index0])) + + index0 = index1 + +sys.stdout.write("static const unsigned SYMBOL_MAP[] = {\n") +sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110, + initial_indent = " ", subsequent_indent=" "))) +sys.stdout.write("\n};\n\n") diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh index c00b36a9..6ed95ffa 100755 --- a/scripts/run-tests.sh +++ b/scripts/run-tests.sh @@ -70,6 +70,22 @@ echo echo "Underline extension:" $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/underline.txt" -p "$PROGRAM --funderline" +echo +echo "Heading auto identifiers extension:" +$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/heading-auto-identifier.txt" -p "$PROGRAM --fheading-auto-id" + echo echo "Pathological input:" $PYTHON "$TEST_DIR/pathological_tests.py" -p "$PROGRAM" + +echo +echo "Heading auto identifiers pathological input:" +$PYTHON "$TEST_DIR/pathological_auto_ident_tests.py" -p "$PROGRAM --fheading-auto-id" + +echo +echo "Table of content extension:" +$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/toc.txt" -p "$PROGRAM --table-of-content" + +echo +echo "Table of content placement extension:" +$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/toc-mark.txt" -p "$PROGRAM --table-of-content=[[__TOC__]]" diff --git a/src/md4c-html.c b/src/md4c-html.c index d604aecb..b6cdf7af 100644 --- a/src/md4c-html.c +++ b/src/md4c-html.c @@ -309,6 +309,20 @@ render_open_code_block(MD_HTML* r, const MD_BLOCK_CODE_DETAIL* det) RENDER_VERBATIM(r, ">"); } +static void +render_header_block(MD_HTML* r, const MD_BLOCK_H_DETAIL* det) +{ + static const MD_CHAR* head[6] = { "level- 1]); + if(det->identifier.text != NULL) { + RENDER_VERBATIM(r, " id=\""); + render_attribute(r, &det->identifier, render_html_escaped); + RENDER_VERBATIM(r, "\""); + } + RENDER_VERBATIM(r, ">"); +} + static void render_open_td_block(MD_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det) { @@ -378,7 +392,6 @@ render_open_wikilink_span(MD_HTML* r, const MD_SPAN_WIKILINK_DETAIL* det) static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) { - static const MD_CHAR* head[6] = { "

", "

", "

", "

", "

", "
" }; MD_HTML* r = (MD_HTML*) userdata; switch(type) { @@ -388,7 +401,7 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) case MD_BLOCK_OL: render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break; case MD_BLOCK_LI: render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break; case MD_BLOCK_HR: RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "
\n" : "
\n"); break; - case MD_BLOCK_H: RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break; + case MD_BLOCK_H: render_header_block(r, (const MD_BLOCK_H_DETAIL*)detail); break; case MD_BLOCK_CODE: render_open_code_block(r, (const MD_BLOCK_CODE_DETAIL*) detail); break; case MD_BLOCK_HTML: /* noop */ break; case MD_BLOCK_P: RENDER_VERBATIM(r, "

"); break; @@ -398,6 +411,7 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) case MD_BLOCK_TR: RENDER_VERBATIM(r, "\n"); break; case MD_BLOCK_TH: render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL*)detail); break; case MD_BLOCK_TD: render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL*)detail); break; + case MD_BLOCK_NAV: RENDER_VERBATIM(r, "

\n"); break; } return 0; @@ -531,13 +546,14 @@ debug_log_callback(const char* msg, void* userdata) int md_html(const MD_CHAR* input, MD_SIZE input_size, void (*process_output)(const MD_CHAR*, MD_SIZE, void*), - void* userdata, unsigned parser_flags, unsigned renderer_flags) + void* userdata, unsigned parser_flags, unsigned renderer_flags, + MD_TOC_OPTIONS* toc_options) { MD_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } }; int i; MD_PARSER parser = { - 0, + 1, parser_flags, enter_block_callback, leave_block_callback, @@ -545,6 +561,7 @@ md_html(const MD_CHAR* input, MD_SIZE input_size, leave_span_callback, text_callback, debug_log_callback, + *toc_options, NULL }; diff --git a/src/md4c-html.h b/src/md4c-html.h index 23d3f739..aeac7f52 100644 --- a/src/md4c-html.h +++ b/src/md4c-html.h @@ -52,13 +52,16 @@ * Param userdata is just propagated back to process_output() callback. * Param parser_flags are flags from md4c.h propagated to md_parse(). * Param render_flags is bitmask of MD_HTML_FLAG_xxxx. + * Param toc_options is a pointer to toc options from md4c.h propagated to md_parse(). * * Returns -1 on error (if md_parse() fails.) * Returns 0 on success. */ int md_html(const MD_CHAR* input, MD_SIZE input_size, void (*process_output)(const MD_CHAR*, MD_SIZE, void*), - void* userdata, unsigned parser_flags, unsigned renderer_flags); + void* userdata, unsigned parser_flags, unsigned renderer_flags, + MD_TOC_OPTIONS* toc_options + ); #ifdef __cplusplus diff --git a/src/md4c.c b/src/md4c.c index 3677c0e0..89f4642c 100644 --- a/src/md4c.c +++ b/src/md4c.c @@ -128,7 +128,7 @@ typedef struct MD_MARK_tag MD_MARK; typedef struct MD_BLOCK_tag MD_BLOCK; typedef struct MD_CONTAINER_tag MD_CONTAINER; typedef struct MD_REF_DEF_tag MD_REF_DEF; - +typedef struct MD_HEADING_DEF_tag MD_HEADING_DEF; /* During analyzes of inline marks, we need to manage some "mark chains", * of (yet unresolved) openers. This structure holds start/end of the chain. @@ -163,6 +163,20 @@ struct MD_CTX_tag { void** ref_def_hashtable; int ref_def_hashtable_size; + /* Heading definitions. */ + MD_HEADING_DEF* heading_defs; + int n_heading_defs; + int alloc_heading_defs; + void** heading_def_hashtable; + int heading_def_hashtable_size; + /* autogenerated identifiers for heading */ + CHAR* identifiers; + SZ identifiers_size; + SZ alloc_identifiers; + + /* Toc informations */ + int toc_found; + /* Stack of inline/span markers. * This is only used for parsing a single block contents but by storing it * here we may reuse the stack for subsequent blocks; i.e. we have fewer @@ -245,7 +259,8 @@ enum MD_LINETYPE_tag { MD_LINE_HTML, MD_LINE_TEXT, MD_LINE_TABLE, - MD_LINE_TABLEUNDERLINE + MD_LINE_TABLEUNDERLINE, + MD_LINE_TOC }; typedef enum MD_LINETYPE_tag MD_LINETYPE; @@ -292,6 +307,7 @@ struct MD_VERBATIMLINE_tag { #define ISWHITESPACE_(ch) (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f'))) #define ISCNTRL_(ch) ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127) #define ISPUNCT_(ch) (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126)) +#define ISSYMBOL_(ch) (ISANYOF3_(ch, _T('+'), _T('|'), _T('~')) || ISIN_(ch, 60, 62)) #define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z'))) #define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z'))) #define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch)) @@ -308,6 +324,7 @@ struct MD_VERBATIMLINE_tag { #define ISWHITESPACE(off) ISWHITESPACE_(CH(off)) #define ISCNTRL(off) ISCNTRL_(CH(off)) #define ISPUNCT(off) ISPUNCT_(CH(off)) +#define ISSYMBOL(off) ISSYMBOL_(CH(off)) #define ISUPPER(off) ISUPPER_(CH(off)) #define ISLOWER(off) ISLOWER_(CH(off)) #define ISALPHA(off) ISALPHA_(CH(off)) @@ -611,6 +628,64 @@ struct MD_UNICODE_FOLD_INFO_tag { return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0); } + static int + md_is_unicode_symbol__(unsigned codepoint) + { +#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000) +#define S(cp) (cp) + /* Unicode "Sm", "Sc", "Sk", "So" categories. + * (generated by scripts/build_symbol_map.py) */ + static const unsigned SYMBOL_MAP[] = { + S(0x0024), S(0x002b), R(0x003c,0x003e), S(0x005e), S(0x0060), S(0x007c), S(0x007e), R(0x00a2,0x00a6), + R(0x00a8,0x00a9), S(0x00ac), R(0x00ae,0x00b1), S(0x00b4), S(0x00b8), S(0x00d7), S(0x00f7), + R(0x02c2,0x02c5), R(0x02d2,0x02df), R(0x02e5,0x02eb), S(0x02ed), R(0x02ef,0x02ff), S(0x0375), + R(0x0384,0x0385), S(0x03f6), S(0x0482), R(0x058d,0x058f), R(0x0606,0x0608), S(0x060b), R(0x060e,0x060f), + S(0x06de), S(0x06e9), R(0x06fd,0x06fe), S(0x07f6), R(0x07fe,0x07ff), R(0x09f2,0x09f3), R(0x09fa,0x09fb), + S(0x0af1), S(0x0b70), R(0x0bf3,0x0bfa), S(0x0c7f), S(0x0d4f), S(0x0d79), S(0x0e3f), R(0x0f01,0x0f03), + S(0x0f13), R(0x0f15,0x0f17), R(0x0f1a,0x0f1f), S(0x0f34), S(0x0f36), S(0x0f38), R(0x0fbe,0x0fc5), + R(0x0fc7,0x0fcc), R(0x0fce,0x0fcf), R(0x0fd5,0x0fd8), R(0x109e,0x109f), R(0x1390,0x1399), S(0x166d), + S(0x17db), S(0x1940), R(0x19de,0x19ff), R(0x1b61,0x1b6a), R(0x1b74,0x1b7c), S(0x1fbd), R(0x1fbf,0x1fc1), + R(0x1fcd,0x1fcf), R(0x1fdd,0x1fdf), R(0x1fed,0x1fef), R(0x1ffd,0x1ffe), S(0x2044), S(0x2052), + R(0x207a,0x207c), R(0x208a,0x208c), R(0x20a0,0x20bf), R(0x2100,0x2101), R(0x2103,0x2106), + R(0x2108,0x2109), S(0x2114), R(0x2116,0x2118), R(0x211e,0x2123), S(0x2125), S(0x2127), S(0x2129), + S(0x212e), R(0x213a,0x213b), R(0x2140,0x2144), R(0x214a,0x214d), S(0x214f), R(0x218a,0x218b), + R(0x2190,0x2307), R(0x230c,0x2328), R(0x232b,0x2426), R(0x2440,0x244a), R(0x249c,0x24e9), + R(0x2500,0x2767), R(0x2794,0x27c4), R(0x27c7,0x27e5), R(0x27f0,0x2982), R(0x2999,0x29d7), + R(0x29dc,0x29fb), R(0x29fe,0x2b73), R(0x2b76,0x2b95), R(0x2b97,0x2bff), R(0x2ce5,0x2cea), + R(0x2e50,0x2e51), R(0x2e80,0x2e99), R(0x2e9b,0x2ef3), R(0x2f00,0x2fd5), R(0x2ff0,0x2ffb), S(0x3004), + R(0x3012,0x3013), S(0x3020), R(0x3036,0x3037), R(0x303e,0x303f), R(0x309b,0x309c), R(0x3190,0x3191), + R(0x3196,0x319f), R(0x31c0,0x31e3), R(0x3200,0x321e), R(0x322a,0x3247), S(0x3250), R(0x3260,0x327f), + R(0x328a,0x32b0), R(0x32c0,0x33ff), R(0x4dc0,0x4dff), R(0xa490,0xa4c6), R(0xa700,0xa716), + R(0xa720,0xa721), R(0xa789,0xa78a), R(0xa828,0xa82b), R(0xa836,0xa839), R(0xaa77,0xaa79), S(0xab5b), + R(0xab6a,0xab6b), S(0xfb29), R(0xfbb2,0xfbc1), R(0xfdfc,0xfdfd), S(0xfe62), R(0xfe64,0xfe66), S(0xfe69), + S(0xff04), S(0xff0b), R(0xff1c,0xff1e), S(0xff3e), S(0xff40), S(0xff5c), S(0xff5e), R(0xffe0,0xffe6), + R(0xffe8,0xffee), R(0xfffc,0xfffd), R(0x10137,0x1013f), R(0x10179,0x10189), R(0x1018c,0x1018e), + R(0x10190,0x1019c), S(0x101a0), R(0x101d0,0x101fc), R(0x10877,0x10878), S(0x10ac8), S(0x1173f), + R(0x11fd5,0x11ff1), R(0x16b3c,0x16b3f), S(0x16b45), S(0x1bc9c), R(0x1d000,0x1d0f5), R(0x1d100,0x1d126), + R(0x1d129,0x1d164), R(0x1d16a,0x1d16c), R(0x1d183,0x1d184), R(0x1d18c,0x1d1a9), R(0x1d1ae,0x1d1e8), + R(0x1d200,0x1d241), S(0x1d245), R(0x1d300,0x1d356), S(0x1d6c1), S(0x1d6db), S(0x1d6fb), S(0x1d715), + S(0x1d735), S(0x1d74f), S(0x1d76f), S(0x1d789), S(0x1d7a9), S(0x1d7c3), R(0x1d800,0x1d9ff), + R(0x1da37,0x1da3a), R(0x1da6d,0x1da74), R(0x1da76,0x1da83), R(0x1da85,0x1da86), S(0x1e14f), S(0x1e2ff), + S(0x1ecac), S(0x1ecb0), S(0x1ed2e), R(0x1eef0,0x1eef1), R(0x1f000,0x1f02b), R(0x1f030,0x1f093), + R(0x1f0a0,0x1f0ae), R(0x1f0b1,0x1f0bf), R(0x1f0c1,0x1f0cf), R(0x1f0d1,0x1f0f5), R(0x1f10d,0x1f1ad), + R(0x1f1e6,0x1f202), R(0x1f210,0x1f23b), R(0x1f240,0x1f248), R(0x1f250,0x1f251), R(0x1f260,0x1f265), + R(0x1f300,0x1f6d7), R(0x1f6e0,0x1f6ec), R(0x1f6f0,0x1f6fc), R(0x1f700,0x1f773), R(0x1f780,0x1f7d8), + R(0x1f7e0,0x1f7eb), R(0x1f800,0x1f80b), R(0x1f810,0x1f847), R(0x1f850,0x1f859), R(0x1f860,0x1f887), + R(0x1f890,0x1f8ad), R(0x1f8b0,0x1f8b1), R(0x1f900,0x1f978), R(0x1f97a,0x1f9cb), R(0x1f9cd,0x1fa53), + R(0x1fa60,0x1fa6d), R(0x1fa70,0x1fa74), R(0x1fa78,0x1fa7a), R(0x1fa80,0x1fa86), R(0x1fa90,0x1faa8), + R(0x1fab0,0x1fab6), R(0x1fac0,0x1fac2), R(0x1fad0,0x1fad6), R(0x1fb00,0x1fb92), R(0x1fb94,0x1fbca) + }; + +#undef R +#undef S + + /* The ASCII ones are the most frequently used ones. */ + if(codepoint <= 0x7f) + return ISSYMBOL_(codepoint); + + return (md_unicode_bsearch__(codepoint, SYMBOL_MAP, SIZEOF_ARRAY(SYMBOL_MAP)) >= 0); + } + static void md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info) { @@ -850,6 +925,36 @@ struct MD_UNICODE_FOLD_INFO_tag { return (unsigned) str[0]; } +/* + * encode a codepoint into the corresponding utf8 byte sequence + * the string buffer passed must be large enough + * return the number of bytes written to the buffer + */ + static unsigned + md_encode_utf8__(unsigned codepoint, CHAR* str ) + { + if(codepoint <= 0x7f){ + *str++ = (char)codepoint; + return 1; + } else if (codepoint <= 0x7FF){ + *str++ = 0xc0 | (codepoint >> 6); + *str++ = 0x80 | ((codepoint >> 0) & 0x3f); + return 2; + } else if ( codepoint <= 0xFFFF) { + *str++ = 0xe0 | (codepoint >> 12); + *str++ = 0x80 | ((codepoint >> 6 ) & 0x3f); + *str++ = 0x80 | ((codepoint >> 0 ) & 0x3f); + return 3; + } else if ( codepoint <= 0x10FFFF) { + *str++ = 0xf0 | (codepoint >> 18); + *str++ = 0x80 | ((codepoint >> 12) & 0x3f); + *str++ = 0x80 | ((codepoint >> 6 ) & 0x3f); + *str++ = 0x80 | ((codepoint >> 0 ) & 0x3f); + return 4; + } + return 0; + } + static unsigned md_decode_utf8_before__(MD_CTX* ctx, OFF off) { @@ -877,22 +982,34 @@ struct MD_UNICODE_FOLD_INFO_tag { #define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL)) #define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off)) + #define ISUNICODEPUNCT_(codepoint) md_is_unicode_punct__(codepoint) #define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL)) #define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf8_before__(ctx, off)) + #define ISUNICODESYMBOL_(codepoint) md_is_unicode_symbol__(codepoint) + static inline unsigned md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size) { return md_decode_utf8__(str+off, str_size-off, p_char_size); } + + static inline unsigned + md_encode_unicode(unsigned codepoint, CHAR* str ) + { + return md_encode_utf8__(codepoint, str); + } #else #define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint) #define ISUNICODEWHITESPACE(off) ISWHITESPACE(off) #define ISUNICODEWHITESPACEBEFORE(off) ISWHITESPACE((off)-1) + #define ISUNICODEPUNCT_(codepoint) ISPUNCT_(codepoint) #define ISUNICODEPUNCT(off) ISPUNCT(off) #define ISUNICODEPUNCTBEFORE(off) ISPUNCT((off)-1) + #define ISUNICODESYMBOL_(codepoint) ISSYMBOL_(codepoint) + static inline void md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info) { @@ -902,6 +1019,13 @@ struct MD_UNICODE_FOLD_INFO_tag { info->n_codepoints = 1; } + static unsigned + md_encode_unicode(unsigned codepoint, CHAR* str ) + { + *str = codepoint; + return 1; + } + static inline unsigned md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size) { @@ -1417,11 +1541,107 @@ md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build) if(build->substr_alloc > 0) { free(build->text); - free(build->substr_types); - free(build->substr_offsets); + if( build->substr_types != build->trivial_types) + free(build->substr_types); + if( build->substr_offsets != build->trivial_offsets) + free(build->substr_offsets); } } +static int +md_build_trivial_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size, + MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build) +{ + MD_UNUSED(ctx); + memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD)); + build->substr_types = build->trivial_types; + build->substr_offsets = build->trivial_offsets; + build->substr_count = 1; + build->substr_alloc = 0; + build->trivial_types[0] = MD_TEXT_NORMAL; + build->trivial_offsets[0] = 0; + build->trivial_offsets[1] = raw_size; + + attr->text = (CHAR*) (raw_size ? raw_text : NULL); + attr->size = raw_size; + attr->substr_offsets = build->substr_offsets; + attr->substr_types = build->substr_types; + return 0; +} + +/* Convert a 16 bits unsigned word to a string +* the dest buffer must be at least 5 char long +* It does not nul terminat the string +* Return the number of characters used by the string +*/ +static int +md_int16_to_str(unsigned short n, CHAR* dest){ + char count = 5; + + if(n <10 ){ + static const CHAR numbers[] = _T("0123456789"); + *dest = numbers[n]; + return 1; + } + while(1){ + if(n< 100){ count = 2; break;} + if(n< 1000){ count = 3; break;} + if(n< 10000){ count = 4; break;} + break; + } + // start from end + dest += count; + while (n) { + *--dest = '0' + ( n % 10); + n /= 10; + } + return count; +} + +static int +md_build_attribute_postfix(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size, + unsigned postfix, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build) +{ + OFF off; + const SZ MAX_POSTFIX_SIZE = 5; // but also add 1 for the '-' + + memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD)); + build->substr_types = build->trivial_types; + build->substr_offsets = build->trivial_offsets; + build->substr_count = 1; + build->substr_alloc = 1; + build->trivial_types[0] = MD_TEXT_NORMAL; + build->trivial_offsets[0] = 0; + off = raw_size; + if (postfix > 0xffff) { + // postfix is not allowed to be bigger than 65535 (2^16) , so maximum 5 char + postfix = 0xffff; + } + + build->text = (CHAR*) malloc((raw_size + MAX_POSTFIX_SIZE+1) * sizeof(CHAR)); + if(build->text == NULL) { + MD_LOG("malloc() failed."); + goto abort; + } + + // copy original text + memcpy(build->text, raw_text, raw_size); + // append postfix + build->text[off++] = _T('-'); + off+= md_int16_to_str(postfix, &build->text[off]); + + attr->text = build->text; + build->trivial_offsets[1] = off; + attr->size = off; + attr->substr_offsets = build->substr_offsets; + attr->substr_types = build->substr_types; + return 0; + +abort: + md_free_attribute(ctx, build); + return -1; +} + static int md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size, unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build) @@ -1507,7 +1727,6 @@ md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size, return -1; } - /********************************************* *** Dictionary of Reference Definitions *** *********************************************/ @@ -1534,11 +1753,11 @@ md_fnv1a(unsigned base, const void* data, size_t n) struct MD_REF_DEF_tag { CHAR* label; CHAR* title; + CHAR* dest; unsigned hash; SZ label_size; SZ title_size; - OFF dest_beg; - OFF dest_end; + SZ dest_size; unsigned char label_needs_free : 1; unsigned char title_needs_free : 1; }; @@ -1872,8 +2091,8 @@ md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size) typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR; struct MD_LINK_ATTR_tag { - OFF dest_beg; - OFF dest_end; + CHAR* dest; + SZ dest_size; CHAR* title; SZ title_size; @@ -1955,7 +2174,7 @@ md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, static int md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, - OFF* p_contents_beg, OFF* p_contents_end) + CHAR** p_contents, SZ* p_contents_size) { OFF off = beg; @@ -1974,8 +2193,8 @@ md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, if(CH(off) == _T('>')) { /* Success. */ - *p_contents_beg = beg+1; - *p_contents_end = off; + *p_contents = (CHAR*)STR(beg+1); + *p_contents_size = off - (beg+1); *p_end = off+1; return TRUE; } @@ -1988,7 +2207,7 @@ md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, static int md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, - OFF* p_contents_beg, OFF* p_contents_end) + CHAR** p_contents, SZ* p_contents_size) { OFF off = beg; int parenthesis_level = 0; @@ -2022,20 +2241,20 @@ md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, return FALSE; /* Success. */ - *p_contents_beg = beg; - *p_contents_end = off; + *p_contents = (CHAR*)STR(beg); + *p_contents_size = off - beg; *p_end = off; return TRUE; } static inline int md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, - OFF* p_contents_beg, OFF* p_contents_end) + CHAR** p_contents, SZ* p_contents_size) { if(CH(beg) == _T('<')) - return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); + return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents, p_contents_size); else - return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); + return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents, p_contents_size); } static int @@ -2098,6 +2317,26 @@ md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, return FALSE; } +static int +md_push_ref_def(MD_CTX* ctx) +{ + if(ctx->n_ref_defs >= ctx->alloc_ref_defs) { + MD_REF_DEF* new_defs; + + ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0 + ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2 + : 16); + new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF)); + if(new_defs == NULL) { + MD_LOG("realloc() failed."); + return -1; + } + + ctx->ref_defs = new_defs; + } + return 0; +} + /* Returns 0 if it is not a reference definition. * * Returns N > 0 if it is a reference definition. N then corresponds to the @@ -2113,8 +2352,8 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines) OFF label_contents_end; int label_contents_line_index = -1; int label_is_multiline = FALSE; - OFF dest_contents_beg; - OFF dest_contents_end; + CHAR* dest_contents; + SZ dest_contents_size; OFF title_contents_beg; OFF title_contents_end; int title_contents_line_index; @@ -2149,7 +2388,7 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines) /* Link destination. */ if(!md_is_link_destination(ctx, off, lines[line_index].end, - &off, &dest_contents_beg, &dest_contents_end)) + &off, &dest_contents, &dest_contents_size)) return FALSE; /* (Optional) title. Note we interpret it as an title only if nothing @@ -2175,20 +2414,7 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines) return FALSE; /* So, it _is_ a reference definition. Remember it. */ - if(ctx->n_ref_defs >= ctx->alloc_ref_defs) { - MD_REF_DEF* new_defs; - - ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0 - ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2 - : 16); - new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF)); - if(new_defs == NULL) { - MD_LOG("realloc() failed."); - goto abort; - } - - ctx->ref_defs = new_defs; - } + MD_CHECK(md_push_ref_def(ctx)); def = &ctx->ref_defs[ctx->n_ref_defs]; memset(def, 0, sizeof(MD_REF_DEF)); @@ -2212,8 +2438,8 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines) def->title_size = title_contents_end - title_contents_beg; } - def->dest_beg = dest_contents_beg; - def->dest_end = dest_contents_end; + def->dest = dest_contents; + def->dest_size = dest_contents_size; /* Success. */ ctx->n_ref_defs++; @@ -2259,8 +2485,8 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines, def = md_lookup_ref_def(ctx, label, label_size); if(def != NULL) { - attr->dest_beg = def->dest_beg; - attr->dest_end = def->dest_end; + attr->dest = def->dest; + attr->dest_size = def->dest_size; attr->title = def->title; attr->title_size = def->title_size; attr->title_needs_free = FALSE; @@ -2306,8 +2532,8 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines, /* Link destination may be omitted, but only when not also having a title. */ if(off < ctx->size && CH(off) == _T(')')) { - attr->dest_beg = off; - attr->dest_end = off; + attr->dest = (CHAR*)STR(off); + attr->dest_size = 0; attr->title = NULL; attr->title_size = 0; attr->title_needs_free = FALSE; @@ -2318,7 +2544,7 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines, /* Link destination. */ if(!md_is_link_destination(ctx, off, lines[line_index].end, - &off, &attr->dest_beg, &attr->dest_end)) + &off, &attr->dest, &attr->dest_size)) return FALSE; /* (Optional) title. */ @@ -2389,6 +2615,249 @@ md_free_ref_defs(MD_CTX* ctx) free(ctx->ref_defs); } +/********************************************* + *** Dictionary of Heading Definitions *** + *********************************************/ + +struct MD_HEADING_DEF_tag { + CHAR* heading; + SZ heading_size; + CHAR* identifier; // only valid after all heading are known + unsigned hash; + OFF ident_beg; + SZ ident_size; + unsigned postfix; + unsigned level:8; +}; + +static int +md_push_heading_def(MD_CTX* ctx) +{ + if(ctx->n_heading_defs >= ctx->alloc_heading_defs) { + MD_HEADING_DEF* new_defs; + + ctx->alloc_heading_defs = (ctx->alloc_heading_defs > 0 + ? ctx->alloc_heading_defs + ctx->alloc_heading_defs / 2 + : 16); + new_defs = (MD_HEADING_DEF*) realloc(ctx->heading_defs, ctx->alloc_heading_defs * sizeof(MD_HEADING_DEF)); + if(new_defs == NULL) { + MD_LOG("realloc() failed."); + return -1; + } + + ctx->heading_defs = new_defs; + } + return 0; +} + +static int +md_alloc_identifiers(MD_CTX *ctx, MD_HEADING_DEF* def) +{ + if (ctx->identifiers_size + def->ident_size >= ctx->alloc_identifiers) + { + CHAR *new_identifiers; + + ctx->alloc_identifiers = (ctx->alloc_identifiers > 0 + ? ctx->alloc_identifiers + ctx->alloc_identifiers / 2 + : 512); + + new_identifiers = (CHAR *)realloc(ctx->identifiers, sizeof(CHAR) * ctx->alloc_identifiers); + if (new_identifiers == NULL) + { + MD_LOG("realloc() failed."); + return -1; + } + if (ctx->identifiers != new_identifiers){ + // rebuild all ref_def pointing to identifiers + int i; + for(i = 0; i < ctx->n_ref_defs; i++) { + MD_REF_DEF* def = &ctx->ref_defs[i]; + if (def->dest > ctx->identifiers + && def->dest <= ctx->identifiers+ctx->identifiers_size ){ + def->dest = new_identifiers + (def->dest - ctx->identifiers); + } + } + } + ctx->identifiers = new_identifiers; + } + + def->ident_beg = ctx->identifiers_size; + return 0; +} + +/** forward declaration */ +static int +md_heading_build_ident(MD_CTX* ctx, MD_HEADING_DEF* def, MD_LINE* lines, int n_lines, int level); + +typedef struct MD_HEADING_DEF_LIST_tag MD_HEADING_DEF_LIST; +struct MD_HEADING_DEF_LIST_tag { + int n_heading_defs; + int alloc_heading_defs; + MD_HEADING_DEF* heading_defs[]; /* Valid items always point into ctx->heading_defs[] */ +}; + +static int +md_heading_def_cmp(const void* a, const void* b) +{ + const MD_HEADING_DEF* a_ref = *(const MD_HEADING_DEF**)a; + const MD_HEADING_DEF* b_ref = *(const MD_HEADING_DEF**)b; + + if(a_ref->hash < b_ref->hash) + return -1; + else if(a_ref->hash > b_ref->hash) + return +1; + else + return md_link_label_cmp(a_ref->identifier, a_ref->ident_size, + b_ref->identifier, b_ref->ident_size); +} + +static int +md_heading_def_cmp_for_sort(const void* a, const void* b) +{ + int cmp; + + cmp = md_heading_def_cmp(a, b); + + /* Ensure stability of the sorting. */ + if(cmp == 0) { + const MD_HEADING_DEF* a_ref = *(const MD_HEADING_DEF**)a; + const MD_HEADING_DEF* b_ref = *(const MD_HEADING_DEF**)b; + + if(a_ref < b_ref) + cmp = -1; + else if(a_ref > b_ref) + cmp = +1; + else + cmp = 0; + } + + return cmp; +} + +static int +md_build_heading_def_hashtable(MD_CTX* ctx) +{ + int i, j; + + if(ctx->n_heading_defs == 0) + return 0; + + ctx->heading_def_hashtable_size = (ctx->n_heading_defs * 5) / 4; + ctx->heading_def_hashtable = malloc(ctx->heading_def_hashtable_size * sizeof(void*)); + if(ctx->heading_def_hashtable == NULL) { + MD_LOG("malloc() failed."); + goto abort; + } + memset(ctx->heading_def_hashtable, 0, ctx->heading_def_hashtable_size * sizeof(void*)); + + /* Each member of ctx->heading_def_hashtable[] can be: + * -- NULL, + * -- pointer to the MD_HEADING_DEF in ctx->heading_defs[], or + * -- pointer to a MD_HEADING_DEF_LIST, which holds multiple pointers to + * such MD_HEADING_DEFs. + */ + for(i = 0; i < ctx->n_heading_defs; i++) { + MD_HEADING_DEF* def = &ctx->heading_defs[i]; + void* bucket; + MD_HEADING_DEF_LIST* list; + + // compute identifier hash reusing the link label hash function + def->identifier = &ctx->identifiers[def->ident_beg]; + def->hash = md_link_label_hash(def->identifier, def->ident_size); + bucket = ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size]; + + if(bucket == NULL) { + /* The bucket is empty. Make it just point to the def. */ + ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size] = def; + continue; + } + + if(ctx->heading_defs <= (MD_HEADING_DEF*) bucket && (MD_HEADING_DEF*) bucket < ctx->heading_defs + ctx->n_heading_defs) { + /* The bucket already contains one heading def.*/ + MD_HEADING_DEF* old_def = (MD_HEADING_DEF*) bucket; + + /* Make the bucket complex, i.e. able to hold more heading defs. */ + list = (MD_HEADING_DEF_LIST*) malloc(sizeof(MD_HEADING_DEF_LIST) + 2 * sizeof(MD_HEADING_DEF*)); + if(list == NULL) { + MD_LOG("malloc() failed."); + goto abort; + } + list->heading_defs[0] = old_def; + list->heading_defs[1] = def; + list->n_heading_defs = 2; + list->alloc_heading_defs = 2; + ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size] = list; + continue; + } + + /* Append the def to the complex bucket list. */ + list = (MD_HEADING_DEF_LIST*) bucket; + if(list->n_heading_defs >= list->alloc_heading_defs) { + int alloc_heading_defs = list->alloc_heading_defs + list->alloc_heading_defs / 2; + MD_HEADING_DEF_LIST* list_tmp = (MD_HEADING_DEF_LIST*) realloc(list, + sizeof(MD_HEADING_DEF_LIST) + alloc_heading_defs * sizeof(MD_HEADING_DEF*)); + if(list_tmp == NULL) { + MD_LOG("realloc() failed."); + goto abort; + } + list = list_tmp; + list->alloc_heading_defs = alloc_heading_defs; + ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size] = list; + } + + list->heading_defs[list->n_heading_defs] = def; + list->n_heading_defs++; + } + + /* Sort the complex buckets so we can use bsearch() with them. */ + for(i = 0; i < ctx->heading_def_hashtable_size; i++) { + void* bucket = ctx->heading_def_hashtable[i]; + MD_HEADING_DEF_LIST* list; + + if(bucket == NULL) + continue; + if(ctx->heading_defs <= (MD_HEADING_DEF*) bucket && (MD_HEADING_DEF*) bucket < ctx->heading_defs + ctx->n_heading_defs) + continue; + + list = (MD_HEADING_DEF_LIST*) bucket; + qsort(list->heading_defs, list->n_heading_defs, sizeof(MD_HEADING_DEF*), md_heading_def_cmp_for_sort); + + for(j = 1; j < list->n_heading_defs; j++) { + if(md_heading_def_cmp(&list->heading_defs[j-1], &list->heading_defs[j]) == 0) + list->heading_defs[j]->postfix = list->heading_defs[j-1]->postfix + 1; + } + } + + return 0; + +abort: + return -1; +} + +static void +md_free_heading_def_hashtable(MD_CTX* ctx) +{ + if(ctx->heading_def_hashtable != NULL) { + int i; + + for(i = 0; i < ctx->heading_def_hashtable_size; i++) { + void* bucket = ctx->heading_def_hashtable[i]; + if(bucket == NULL) + continue; + if(ctx->heading_defs <= (MD_HEADING_DEF*) bucket && (MD_HEADING_DEF*) bucket < ctx->heading_defs + ctx->n_heading_defs) + continue; + free(bucket); + } + + free(ctx->heading_def_hashtable); + } +} + +static void +md_free_heading_defs(MD_CTX* ctx) +{ + free(ctx->heading_defs); +} /****************************************** *** Processing Inlines (a.k.a Spans) *** @@ -3627,8 +4096,8 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines) /* If it is a link, we store the destination and title in the two * dummy marks after the opener. */ MD_ASSERT(ctx->marks[opener_index+1].ch == 'D'); - ctx->marks[opener_index+1].beg = attr.dest_beg; - ctx->marks[opener_index+1].end = attr.dest_end; + md_mark_store_ptr(ctx, opener_index+1, attr.dest); + ctx->marks[opener_index+1].prev = attr.dest_size; MD_ASSERT(ctx->marks[opener_index+2].ch == 'D'); md_mark_store_ptr(ctx, opener_index+2, attr.title); @@ -4148,6 +4617,8 @@ md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ targ return ret; } +/** forward declaration */ +static int md_output_toc(MD_CTX *ctx); /* Render the output, accordingly to the analyzed ctx->marks. */ static int @@ -4295,7 +4766,8 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'), (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A), - STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE, + md_mark_get_ptr(ctx, (int)(dest_mark - ctx->marks)), + dest_mark->prev, FALSE, md_mark_get_ptr(ctx, (int)(title_mark - ctx->marks)), title_mark->prev)); @@ -4617,6 +5089,10 @@ struct MD_BLOCK_tag { * MD_BLOCK_OL: Start item number. */ unsigned n_lines; + /* MD_BLOCK_H: reference definition index + */ + unsigned heading_def; // todo rename me to heading_idx ? + }; struct MD_CONTAINER_tag { @@ -4746,6 +5222,27 @@ md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DE return ret; } +static int +md_setup_H_identifier(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_H_DETAIL* det, + MD_ATTRIBUTE_BUILD* id_build) +{ + + int ret = 0; + + /* Build info string attribute. */ + + MD_HEADING_DEF * heading = &ctx->heading_defs[block->heading_def]; + if(heading->postfix == 0) { + MD_CHECK(md_build_trivial_attribute(ctx, &ctx->identifiers[heading->ident_beg]+1, + heading->ident_size-1, &det->identifier, id_build)); + } else { + MD_CHECK(md_build_attribute_postfix(ctx, &ctx->identifiers[heading->ident_beg]+1, + heading->ident_size-1, heading->postfix, &det->identifier, id_build)); + } +abort: + return ret; +} + static int md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) { @@ -4754,6 +5251,8 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) MD_BLOCK_CODE_DETAIL code; MD_BLOCK_TABLE_DETAIL table; } det; + MD_ATTRIBUTE_BUILD identifier_build; + int clean_header_detail = FALSE; MD_ATTRIBUTE_BUILD info_build; MD_ATTRIBUTE_BUILD lang_build; int is_in_tight_list; @@ -4770,7 +5269,11 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) switch(block->type) { case MD_BLOCK_H: det.header.level = block->data; - break; + if (ctx->parser.flags & MD_FLAG_HEADINGAUTOID){ + clean_header_detail = TRUE; + MD_CHECK(md_setup_H_identifier(ctx, block, &det.header, &identifier_build )); + } + break; case MD_BLOCK_CODE: /* For fenced code block, we may need to set the info string. */ @@ -4816,6 +5319,10 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) (const MD_LINE*)(block + 1), block->n_lines)); break; + case MD_BLOCK_NAV: + MD_CHECK(md_output_toc(ctx)); + break; + default: MD_CHECK(md_process_normal_block_contents(ctx, (const MD_LINE*)(block + 1), block->n_lines)); @@ -4826,6 +5333,9 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) MD_LEAVE_BLOCK(block->type, (void*) &det); abort: + if(clean_header_detail) { + md_free_attribute(ctx, &identifier_build); + } if(clean_fence_code_detail) { md_free_attribute(ctx, &info_build); md_free_attribute(ctx, &lang_build); @@ -4986,6 +5496,10 @@ md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line) block->type = MD_BLOCK_HTML; break; + case MD_LINE_TOC: + block->type = MD_BLOCK_NAV; + break; + case MD_LINE_BLANK: case MD_LINE_SETEXTUNDERLINE: case MD_LINE_TABLEUNDERLINE: @@ -5053,6 +5567,47 @@ md_consume_link_reference_definitions(MD_CTX* ctx) return 0; } +/* Build the identifier for this heading and remember them so we can + * resolve any link referring to them. + * + */ +static int +md_make_heading(MD_CTX* ctx) +{ + int ret = 0; + + MD_BLOCK* block = ctx->current_block; + MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1); + + MD_HEADING_DEF * def = NULL; + MD_REF_DEF * rdef = NULL; + MD_CHECK(md_push_heading_def(ctx)); + def = &ctx->heading_defs[ctx->n_heading_defs]; + memset(def, 0, sizeof(MD_HEADING_DEF)); + + // filling of the heading def + MD_CHECK(md_heading_build_ident(ctx, def, lines, block->n_lines, block->data)); + block->heading_def = ctx->n_heading_defs; + ctx->n_heading_defs++; + + // remember the heading as a reference definition + MD_CHECK(md_push_ref_def(ctx)); + rdef = &ctx->ref_defs[ctx->n_ref_defs]; + memset(rdef, 0, sizeof(MD_REF_DEF)); + rdef->label = def->heading; + rdef->label_size = def->heading_size; + + rdef->dest = &ctx->identifiers[def->ident_beg]; + rdef->dest_size = def->ident_size; + + + /* Success. */ + ctx->n_ref_defs++; + +abort: + return ret; +} + static int md_end_current_block(MD_CTX* ctx) { @@ -5090,6 +5645,10 @@ md_end_current_block(MD_CTX* ctx) } } + if(ctx->current_block->type == MD_BLOCK_H && (ctx->parser.flags & MD_FLAG_HEADINGAUTOID)){ + MD_CHECK(md_make_heading(ctx)); + } + /* Mark we are not building any block anymore. */ ctx->current_block = NULL; @@ -5284,6 +5843,32 @@ md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count) return TRUE; } +static int +md_is_toc_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end) +{ + OFF off = beg; + const CHAR * toc = ctx->parser.toc_options.toc_placeholder; + + // allow for blank chars before the TOC mark + while(off < ctx->size && ISBLANK(off)) + off++; + + if(off < ctx->size && ISNEWLINE(off)) + return FALSE; + + while(off < ctx->size && '\0' != *toc){ + if(CH(off) != *toc) + return FALSE; + toc++; + off++; + } + if('\0' == *toc){ + *p_beg = off; + *p_end = off; + } + return '\0' == *toc; +} + static int md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end) { @@ -5671,8 +6256,9 @@ md_leave_child_containers(MD_CTX* ctx, int n_keep) static int md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container) { - OFF off = beg; OFF max_end; + OFF off = beg; + if(off >= ctx->size || indent >= ctx->code_indent_offset) return FALSE; @@ -5726,6 +6312,152 @@ md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTA return FALSE; } +static int +md_heading_build_ident(MD_CTX* ctx, MD_HEADING_DEF* def, MD_LINE* lines, int n_lines, int level) +{ + MD_MARK* mark; + CHAR* ptr; + int ret = 0; + + const MD_LINE* line = lines; + OFF beg = lines[0].beg; + OFF off = beg; + OFF end = lines[n_lines-1].end; + + /* store the heading */ + def->heading = (CHAR*)STR(beg); + def->heading_size = end-beg; + /* store the heading level */ + def->level = level; + + /* Reset the previously collected stack of marks. */ + ctx->n_marks = 0; + + MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE)); + + /* Find first resolved mark. Note there is always at least one resolved + * mark, the dummy last one after the end of the latest line we actually + * never really reach. This saves us of a lot of special checks and cases + * in this function. */ + mark = ctx->marks; + while(!(mark->flags & MD_MARK_RESOLVED)) + mark++; + + /* The identifier will not be bigger than the heading + '#' */ + def->ident_size = end - beg + 1; + MD_CHECK(md_alloc_identifiers(ctx, def)); + + /* copy the ident and transform as needed */ + ptr = &ctx->identifiers[def->ident_beg]; + *ptr++ = _T('#'); // start with a '#' + while(1) { + + OFF line_end = line->end; + /* Process the text up to the next mark or end-of-line. */ + OFF tmp = (line->end < mark->beg ? line->end : mark->beg); + if(end < line_end) + line_end = end; + + while(off < tmp) { + unsigned codepoint; + SZ char_size; + + if( CH(off) == _T('-') ){ // '-' are not replaced + *ptr++ = _T('-'); + off++; + continue; + } + + codepoint = md_decode_unicode(ctx->text, off, line_end, &char_size); + if(ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE(off)) {// replace white spaces by '-' + *ptr++ = _T('-'); + off = md_skip_unicode_whitespace(ctx->text, off, line_end); + } else if (ISUNICODEPUNCT_(codepoint) || ISUNICODESYMBOL_(codepoint)) { // skip ponctuation and symbols + off += char_size; + continue; + } else { // make lower case + MD_UNICODE_FOLD_INFO fold_info; + md_get_unicode_fold_info(codepoint, &fold_info); + for (unsigned i = 0; i < fold_info.n_codepoints; i++) { + SZ n = md_encode_unicode(fold_info.codepoints[i], ptr); + ptr += n; + } + off += char_size; + } + } + /* If reached the mark, process it and move to next one. */ + if(off >= mark->beg) { + switch(mark->ch) { + + case '[': /* Link, wiki link, image. */ + case '!': + case ']': + { + const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]); + const MD_MARK* closer = &ctx->marks[opener->next]; + const MD_MARK* dest_mark; + const MD_MARK* title_mark; + + if ((opener->ch == '[' && closer->ch == ']') && + opener->end - opener->beg >= 2 && + closer->end - closer->beg >= 2) + { + break; + } + + dest_mark = opener+1; + MD_ASSERT(dest_mark->ch == 'D'); + title_mark = opener+2; + if (title_mark->ch != 'D') break; + + /* link/image closer may span multiple lines. */ + if(mark->ch == ']') { + while(mark->end > line->end) + line++; + } + + break; + } + } + + off = mark->end; + + /* Move to next resolved mark. But not past the last mark */ + if(mark < &ctx->marks[ctx->n_marks]) + mark++; + while((mark < &ctx->marks[ctx->n_marks]) && + ( !(mark->flags & MD_MARK_RESOLVED) || mark->beg < off)) + { + mark++; + } + } + + /* If reached end of line, move to next one. */ + if(off >= line->end) { + /* If it is the last line, we are done. */ + if(off >= end) { + // update real identifier size + def->ident_size = (MD_SIZE)(ptr - &ctx->identifiers[def->ident_beg]); + break; + } + + *ptr = _T('-'); // end of line + ptr++; + + /* Move to the next line. */ + line++; + off = line->beg; + } + } + // update used identifier buffer size + ctx->identifiers_size += def->ident_size; + + return 0; +abort: + + return -1; +} + static unsigned md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end) { @@ -6104,6 +6836,15 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, } } + /* check for TOC mark */ + if(ctx->parser.toc_options.toc_placeholder != NULL && !ctx->toc_found && + md_is_toc_line(ctx, off, &line->beg, &off)) + { + line->type = MD_LINE_TOC; + ctx->toc_found = TRUE; + break; + } + /* By default, we are normal text line. */ line->type = MD_LINE_TEXT; if(pivot_line->type == MD_LINE_TEXT && n_brothers + n_children == 0) { @@ -6303,6 +7044,69 @@ md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANAL return ret; } +static int +md_output_toc(MD_CTX *ctx) +{ + MD_HEADING_DEF *hd; + MD_BLOCK_LI_DETAIL li_det = {0}; + + MD_ATTRIBUTE_BUILD href_build = {0}; + MD_ATTRIBUTE_BUILD title_build = {0}; + MD_SPAN_A_DETAIL a_det; + int ret = 0; + int level = 0; + int i; + + for (i = 0; i < ctx->n_heading_defs; ++i){ + hd = &ctx->heading_defs[i]; + while (hd->level > level){ + ++level; + if (level <= ctx->parser.toc_options.depth) + MD_ENTER_BLOCK(MD_BLOCK_UL, NULL); + } + while (hd->level < level){ + if (level <= ctx->parser.toc_options.depth) + MD_LEAVE_BLOCK(MD_BLOCK_UL, NULL); + --level; + } + + if (level <= ctx->parser.toc_options.depth){ + MD_ENTER_BLOCK(MD_BLOCK_LI, &li_det); + memset(&a_det, 0, sizeof(MD_SPAN_A_DETAIL)); + if (hd->postfix == 0){ + MD_CHECK(md_build_attribute(ctx, hd->identifier, hd->ident_size, + MD_BUILD_ATTR_NO_ESCAPES, + &a_det.href, &href_build)); + } else { + MD_CHECK(md_build_attribute_postfix(ctx, + hd->identifier, hd->ident_size, + hd->postfix, &a_det.href, &href_build)); + } + + MD_CHECK(md_build_attribute(ctx, NULL, 0, 0, &a_det.title, &title_build)); + + MD_ENTER_SPAN(MD_SPAN_A, &a_det); + + MD_TEXT(MD_TEXT_NORMAL, hd->heading, hd->heading_size); + MD_LEAVE_SPAN(MD_SPAN_A, NULL); + MD_LEAVE_BLOCK(MD_BLOCK_LI, NULL); + } + + } + + // close remaining opened level + while (level > 0){ + if (level <= ctx->parser.toc_options.depth) + MD_LEAVE_BLOCK(MD_BLOCK_UL, NULL); + --level; + } + +abort: + md_free_attribute(ctx, &href_build); + md_free_attribute(ctx, &title_build); + return ret; +} + static int md_process_doc(MD_CTX *ctx) { @@ -6324,8 +7128,18 @@ md_process_doc(MD_CTX *ctx) md_end_current_block(ctx); + if(ctx->parser.flags & MD_FLAG_HEADINGAUTOID) { + MD_CHECK(md_build_heading_def_hashtable(ctx)); + } MD_CHECK(md_build_ref_def_hashtable(ctx)); + /* Output the TOC */ + if(ctx->parser.toc_options.depth > 0 && !ctx->toc_found) { + MD_ENTER_BLOCK(MD_BLOCK_NAV, NULL); + MD_CHECK(md_output_toc(ctx)); + MD_LEAVE_BLOCK(MD_BLOCK_NAV, NULL); + } + /* Process all blocks. */ MD_CHECK(md_leave_child_containers(ctx, 0)); MD_CHECK(md_process_all_blocks(ctx)); @@ -6353,6 +7167,19 @@ md_process_doc(MD_CTX *ctx) sprintf(buffer, "Alloced %u bytes for aux. buffer.", (unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR))); MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for reference definition buffer.", + (unsigned)(ctx->alloc_ref_defs * sizeof(MD_REF_DEF))); + MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for identifiers buffer.", + (unsigned)(ctx->alloc_identifiers * sizeof(MD_CHAR))); + MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for heading definition buffer.", + (unsigned)(ctx->alloc_heading_defs * sizeof(MD_HEADING_DEF))); + MD_LOG(buffer); + } #endif @@ -6371,7 +7198,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd int i; int ret; - if(parser->abi_version != 0) { + if(parser->abi_version != 1) { if(parser->debug_log != NULL) parser->debug_log("Unsupported abi_version.", userdata); return -1; @@ -6399,6 +7226,9 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd ret = md_process_doc(&ctx); /* Clean-up. */ + md_free_heading_defs(&ctx); + md_free_heading_def_hashtable(&ctx); + free(ctx.identifiers); md_free_ref_defs(&ctx); md_free_ref_def_hashtable(&ctx); free(ctx.buffer); diff --git a/src/md4c.h b/src/md4c.h index 95f78f9b..8dbd417f 100644 --- a/src/md4c.h +++ b/src/md4c.h @@ -99,7 +99,8 @@ typedef enum MD_BLOCKTYPE { MD_BLOCK_TBODY, MD_BLOCK_TR, MD_BLOCK_TH, - MD_BLOCK_TD + MD_BLOCK_TD, + MD_BLOCK_NAV } MD_BLOCKTYPE; /* Span represents an in-line piece of a document which should be rendered with @@ -259,6 +260,7 @@ typedef struct MD_BLOCK_LI_DETAIL { /* Detailed info for MD_BLOCK_H. */ typedef struct MD_BLOCK_H_DETAIL { unsigned level; /* Header level (1 - 6) */ + MD_ATTRIBUTE identifier; /* identifier, eg {#some-id} or autogenerated from the heading text*/ } MD_BLOCK_H_DETAIL; /* Detailed info for MD_BLOCK_CODE. */ @@ -316,6 +318,7 @@ typedef struct MD_SPAN_WIKILINK { #define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */ #define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */ #define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */ +#define MD_FLAG_HEADINGAUTOID 0x8000 /* Enable header auto identifiers like github. */ #define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS) #define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS) @@ -330,12 +333,28 @@ typedef struct MD_SPAN_WIKILINK { * extensions, bringing the dialect closer to the original, are implemented. */ #define MD_DIALECT_COMMONMARK 0 -#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS) +#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS | MD_FLAG_HEADINGAUTOID) + +/* Table of content option structure + */ +typedef struct MD_TOC_OPTIONS { + /* Specify the maximum level of heading to include in the table of contents. + * a value of 0 disable Table of content generation + */ + int depth; + + /* Specify a table of content placeholder. + * + * Providing a empty or NULL placeholder will output the TOC at document start. + */ + const MD_CHAR* toc_placeholder; + +} MD_TOC_OPTIONS; /* Parser structure. */ typedef struct MD_PARSER { - /* Reserved. Set to zero. + /* Reserved. Set to 1. */ unsigned abi_version; @@ -375,6 +394,12 @@ typedef struct MD_PARSER { */ void (*debug_log)(const char* /*msg*/, void* /*userdata*/); + /* Table of content parameters + * + * + */ + MD_TOC_OPTIONS toc_options; + /* Reserved. Set to NULL. */ void (*syntax)(void); diff --git a/test/heading-auto-identifier.txt b/test/heading-auto-identifier.txt new file mode 100644 index 00000000..09969626 --- /dev/null +++ b/test/heading-auto-identifier.txt @@ -0,0 +1,163 @@ + +# Heading auto identifiers + +With the flag `MD_FLAG_HEADINGAUTOID`, MD4C generate an identifier for a heading. + +```````````````````````````````` example +# heading +. +

heading

+```````````````````````````````` + +Spaces are replaced by `-` and uppercase are replaced by lower case + +```````````````````````````````` example +# The Heading +. +

The Heading

+```````````````````````````````` + +Unicode characters can also be put lower case + +```````````````````````````````` example +# ĀĄŁŇŢŰŽבあИЯ𐒰 +. +

ĀĄŁŇŢŰŽבあИЯ𐒰

+```````````````````````````````` + + +The non-alphanumeric characters are discarded except for `-. + +```````````````````````````````` example +# The %@!= stupid _ heading ! +. +

The %@!= stupid _ heading !

+```````````````````````````````` + +As a result, you can get some empty heading with no identifier. + +```````````````````````````````` example +# ! +. +

!

+```````````````````````````````` + +Heading starting with numbers are not treated differently + +```````````````````````````````` example +# 1.1 The start +. +

1.1 The start

+```````````````````````````````` + +Heading can contain link inside + +```````````````````````````````` example +# Title with a [link](hidden) inside +. +

Title with a link inside

+```````````````````````````````` + +Heading can contain wiki link inside but requiere the MD_FLAG_WIKILINKS + +```````````````````````````````` example +# Title with a [[hidden-wiki|link]] inside +. +

Title with a [[hidden-wiki|link]] inside

+```````````````````````````````` + +Heading can contain formatting + +```````````````````````````````` example +# Title with *emphasis* inside +. +

Title with emphasis inside

+```````````````````````````````` + +Heading can contain some emoji code like :emoji:, they are treated as normal text + +```````````````````````````````` example +# emoji1 :+1: +# emoji2 :-1: +# emoji3 :100: +. +

emoji1 :+1:

+

emoji2 :-1:

+

emoji3 :100:

+```````````````````````````````` + +But unicode emoji characters are stripped + +```````````````````````````````` example +# emoji4 👍 +# emoji5 💯 +# the + sign +. +

emoji4 👍

+

emoji5 💯

+

the + sign

+```````````````````````````````` + +Same heading get a suffix number. + +```````````````````````````````` example +# title +# title +## title +### title +# Title +# title +# ti!tle +# title +# title +# title +# title +# title +. +

title

+

title

+

title

+

title

+

Title

+

title

+

ti!tle

+

title

+

title

+

title

+

title

+

title

+```````````````````````````````` + +# Coverage + +additional test to improve test coverage. + +No heading in a document + +```````````````````````````````` example +no heading +. +

no heading

+```````````````````````````````` + +Multi line heading require a link so it can contain a new line. + +```````````````````````````````` example +Title with a [multi +line +link](link) inside +====================== +. +

Title with a multi +line +link inside

+ +```````````````````````````````` + +We need to be able to parse empty title +```````````````````````````````` example +# +. +

+```````````````````````````````` + diff --git a/test/pathological_auto_ident_tests.py b/test/pathological_auto_ident_tests.py new file mode 100755 index 00000000..269fb917 --- /dev/null +++ b/test/pathological_auto_ident_tests.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import re +import argparse +import sys +import platform +from cmark import CMark +from timeit import default_timer as timer + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Run cmark tests.') + parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, + help='program to test') + parser.add_argument('--library-dir', dest='library_dir', nargs='?', + default=None, help='directory containing dynamic library') + args = parser.parse_args(sys.argv[1:]) + +cmark = CMark(prog=args.program, library_dir=args.library_dir) + +# list of pairs consisting of input and a regex that must match the output. +pathological = { + # note - some pythons have limit of 65535 for {num-matches} in re. + + "many identical heading": + (("# a\n" * (50000+1)), + re.compile("^

a

\n(

a

\n){50000}$")), + "too many identical heading": + (("# a\n" * (70000+2)), + re.compile("^

a

\n(

a

\n){70000}(

a

\n)$")), + "heading realocation": + (("# A long title to trigger a reallocation\n"*(300+1)), + re.compile("^

A long title to trigger a reallocation

\n(

A long title to trigger a reallocation

\n){300}$")) +} + +whitespace_re = re.compile('/s+/') +passed = 0 +errored = 0 +failed = 0 + +#print("Testing pathological cases:") +for description in pathological: + (inp, regex) = pathological[description] + start = timer() + [rc, actual, err] = cmark.to_html(inp) + end = timer() + if rc != 0: + errored += 1 + print('{:35} [ERRORED (return code %d)]'.format(description, rc)) + print(err) + elif regex.search(actual): + print('{:35} [PASSED] {:.3f} secs'.format(description, end-start)) + passed += 1 + else: + print('{:35} [FAILED]'.format(description)) + print(repr(actual)) + failed += 1 + +print("%d passed, %d failed, %d errored" % (passed, failed, errored)) +if (failed == 0 and errored == 0): + exit(0) +else: + exit(1) diff --git a/test/toc-mark.txt b/test/toc-mark.txt new file mode 100644 index 00000000..68283728 --- /dev/null +++ b/test/toc-mark.txt @@ -0,0 +1,85 @@ +# Table of content mark + +The TOC mark allow to place the toc where you need it. +Run the example with --toc=[[__TOC__]] + +```````````````````````````````` example +# title +# table of content +[[__TOC__]] +# some chapter +. +

title

+

table of content

+ +

some chapter

+```````````````````````````````` + +Only the first mark is replaced by the TOC + + + +```````````````````````````````` example +# title +[[__TOC__]] +[[__TOC__]] +. +

title

+ +

[[TOC]]

+```````````````````````````````` + +The TOC mark must be alone at start of a line or it is invalid: + +```````````````````````````````` example +# title +invalid [[__TOC__]] mark +. + +

title

+

invalid [[TOC]] mark

+ +```````````````````````````````` + +But you can have space at start of a line: + +```````````````````````````````` example +# title + [[__TOC__]] mark +. +

title

+ +```````````````````````````````` + +The text after the TOC mark is discarded: + +```````````````````````````````` example +# title +[[__TOC__]] discarded text +. +

title

+ +```````````````````````````````` + diff --git a/test/toc.txt b/test/toc.txt new file mode 100644 index 00000000..a21fc788 --- /dev/null +++ b/test/toc.txt @@ -0,0 +1,104 @@ +# Table of content + +With the option `--table-of-content`, MD4C enables extension for output of +toc. + +Basic toc may look as follows: + +```````````````````````````````` example +# title +. + +

title

+```````````````````````````````` + +By default, the toc-depth is limited to heading of level 3 + +```````````````````````````````` example +# title level 1 +## title level 2 +### title level 3 +#### title level 4 +##### title level 5 +. + +

title level 1

+

title level 2

+

title level 3

+

title level 4

+
title level 5
+```````````````````````````````` + +The toc can skip some level + +```````````````````````````````` example +### title level 3 +# title level 1 +## title level 2 +##### title level 5 +### title level 3 again +. + +

title level 3

+

title level 1

+

title level 2

+
title level 5
+

title level 3 again

+```````````````````````````````` + +# Coverage + +Additional test to improve test coverage. + +This sample will output TOC with heading suffix numbers. + + +```````````````````````````````` example +# title +## title +### title +. + +

title

+

title

+

title

+```````````````````````````````` \ No newline at end of file