diff --git a/README.md b/README.md
index 9abe987e..35d26cd4 100644
--- a/README.md
+++ b/README.md
@@ -128,6 +128,10 @@ extensions:
* With the flag `MD_FLAG_UNDERLINE`, underscore (`_`) denotes an underline
instead of an ordinary emphasis or strong emphasis.
+* With the flag `MD_FLAG_HEADINGAUTOID`, unique identifiers are generated for
+ headings. The HTML render output them as `id` in the heading tag. For example
+ `
Title
`.
+
Few features of CommonMark (those some people see as mis-features) may be
disabled with the following flags:
diff --git a/md2html/md2html.c b/md2html/md2html.c
index 06b2b74b..139a6eda 100644
--- a/md2html/md2html.c
+++ b/md2html/md2html.c
@@ -42,8 +42,10 @@ static unsigned parser_flags = 0;
#endif
static int want_fullhtml = 0;
static int want_xhtml = 0;
+static int want_toc = 0;
static int want_stat = 0;
+MD_TOC_OPTIONS toc_options = { 0, NULL};
/*********************************
*** Simple grow-able buffer ***
@@ -142,7 +144,7 @@ process_file(FILE* in, FILE* out)
t0 = clock();
ret = md_html(buf_in.data, (MD_SIZE)buf_in.size, process_output, (void*) &buf_out,
- parser_flags, renderer_flags);
+ parser_flags, renderer_flags, &toc_options);
t1 = clock();
if(ret != 0) {
@@ -200,6 +202,9 @@ static const CMDLINE_OPTION cmdline_options[] = {
{ 'o', "output", 'o', CMDLINE_OPTFLAG_REQUIREDARG },
{ 'f', "full-html", 'f', 0 },
{ 'x', "xhtml", 'x', 0 },
+ { 't', "table-of-content", 't', CMDLINE_OPTFLAG_OPTIONALARG },
+ { 0, "toc", 't', CMDLINE_OPTFLAG_OPTIONALARG },
+ { 0, "toc-depth", 'd', CMDLINE_OPTFLAG_REQUIREDARG },
{ 's', "stat", 's', 0 },
{ 'h', "help", 'h', 0 },
{ 'v', "version", 'v', 0 },
@@ -220,6 +225,7 @@ static const CMDLINE_OPTION cmdline_options[] = {
{ 0, "funderline", '_', 0 },
{ 0, "fverbatim-entities", 'E', 0 },
{ 0, "fwiki-links", 'K', 0 },
+ { 0, "fheading-auto-id", '#', 0 },
{ 0, "fno-html-blocks", 'F', 0 },
{ 0, "fno-html-spans", 'G', 0 },
@@ -240,6 +246,11 @@ usage(void)
" -o --output=FILE Output file (default is standard output)\n"
" -f, --full-html Generate full HTML document, including header\n"
" -x, --xhtml Generate XHTML instead of HTML\n"
+ " -t, --table-of-content=MARK, --toc=MARK\n"
+ " Generate a table of content in place of MARK line\n"
+ " If no MARK is given, the toc is generated at start\n"
+ " --toc-depth=D Set the maximum level of heading in the table\n"
+ " of content. 1 to 6. Default is 3\n"
" -s, --stat Measure time of input parsing\n"
" -h, --help Display this help and exit\n"
" -v, --version Display version and exit\n"
@@ -269,6 +280,8 @@ usage(void)
" --ftasklists Enable task lists\n"
" --funderline Enable underline spans\n"
" --fwiki-links Enable wiki links\n"
+ " --fheading-auto-id\n"
+ " Enable heading auto identifier\n"
"\n"
"Markdown suppression options:\n"
" --fno-html-blocks\n"
@@ -295,6 +308,12 @@ version(void)
static const char* input_path = NULL;
static const char* output_path = NULL;
+static int parse_toc_depth(char const* value){
+ toc_options.depth = -1;
+ toc_options.depth = *value - '0';
+ return (toc_options.depth>0 && toc_options.depth <= 6);
+}
+
static int
cmdline_callback(int opt, char const* value, void* data)
{
@@ -311,6 +330,20 @@ cmdline_callback(int opt, char const* value, void* data)
case 'o': output_path = value; break;
case 'f': want_fullhtml = 1; break;
case 'x': want_xhtml = 1; renderer_flags |= MD_HTML_FLAG_XHTML; break;
+ case 't':
+ want_toc = 1;
+ parser_flags |= MD_FLAG_HEADINGAUTOID;
+ toc_options.toc_placeholder = value;
+ if(toc_options.depth == 0)
+ toc_options.depth = 3;
+ break;
+ case 'd':
+ if(!parse_toc_depth(value)){
+ fprintf(stderr, "Invalid toc-depth: %s\n", value);
+ fprintf(stderr, "Must be a number in the range 1-6\n");
+ exit(1);
+ }
+ break;
case 's': want_stat = 1; break;
case 'h': usage(); exit(0); break;
case 'v': version(); exit(0); break;
@@ -335,6 +368,7 @@ cmdline_callback(int opt, char const* value, void* data)
case 'K': parser_flags |= MD_FLAG_WIKILINKS; break;
case 'X': parser_flags |= MD_FLAG_TASKLISTS; break;
case '_': parser_flags |= MD_FLAG_UNDERLINE; break;
+ case '#': parser_flags |= MD_FLAG_HEADINGAUTOID; break;
default:
fprintf(stderr, "Illegal option: %s\n", value);
diff --git a/scripts/build_symbol_map.py b/scripts/build_symbol_map.py
new file mode 100644
index 00000000..bd19f5a5
--- /dev/null
+++ b/scripts/build_symbol_map.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import textwrap
+
+
+self_path = os.path.dirname(os.path.realpath(__file__));
+f = open(self_path + "/unicode/DerivedGeneralCategory.txt", "r")
+
+codepoint_list = []
+category_list = [ "Sm", "Sc", "Sk", "So" ]
+
+# Filter codepoints falling in the right category:
+for line in f:
+ comment_off = line.find("#")
+ if comment_off >= 0:
+ line = line[:comment_off]
+ line = line.strip()
+ if not line:
+ continue
+
+ char_range, category = line.split(";")
+ char_range = char_range.strip()
+ category = category.strip()
+
+ if not category in category_list:
+ continue
+
+ delim_off = char_range.find("..")
+ if delim_off >= 0:
+ codepoint0 = int(char_range[:delim_off], 16)
+ codepoint1 = int(char_range[delim_off+2:], 16)
+ for codepoint in range(codepoint0, codepoint1 + 1):
+ codepoint_list.append(codepoint)
+ else:
+ codepoint = int(char_range, 16)
+ codepoint_list.append(codepoint)
+f.close()
+
+
+codepoint_list.sort()
+
+
+index0 = 0
+count = len(codepoint_list)
+
+records = list()
+while index0 < count:
+ index1 = index0 + 1
+ while index1 < count and codepoint_list[index1] == codepoint_list[index1-1] + 1:
+ index1 += 1
+
+ if index1 - index0 > 1:
+ # Range of codepoints
+ records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
+ else:
+ # Single codepoint
+ records.append("S(0x{:04x})".format(codepoint_list[index0]))
+
+ index0 = index1
+
+sys.stdout.write("static const unsigned SYMBOL_MAP[] = {\n")
+sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
+ initial_indent = " ", subsequent_indent=" ")))
+sys.stdout.write("\n};\n\n")
diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh
index c00b36a9..6ed95ffa 100755
--- a/scripts/run-tests.sh
+++ b/scripts/run-tests.sh
@@ -70,6 +70,22 @@ echo
echo "Underline extension:"
$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/underline.txt" -p "$PROGRAM --funderline"
+echo
+echo "Heading auto identifiers extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/heading-auto-identifier.txt" -p "$PROGRAM --fheading-auto-id"
+
echo
echo "Pathological input:"
$PYTHON "$TEST_DIR/pathological_tests.py" -p "$PROGRAM"
+
+echo
+echo "Heading auto identifiers pathological input:"
+$PYTHON "$TEST_DIR/pathological_auto_ident_tests.py" -p "$PROGRAM --fheading-auto-id"
+
+echo
+echo "Table of content extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/toc.txt" -p "$PROGRAM --table-of-content"
+
+echo
+echo "Table of content placement extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/toc-mark.txt" -p "$PROGRAM --table-of-content=[[__TOC__]]"
diff --git a/src/md4c-html.c b/src/md4c-html.c
index d604aecb..b6cdf7af 100644
--- a/src/md4c-html.c
+++ b/src/md4c-html.c
@@ -309,6 +309,20 @@ render_open_code_block(MD_HTML* r, const MD_BLOCK_CODE_DETAIL* det)
RENDER_VERBATIM(r, ">");
}
+static void
+render_header_block(MD_HTML* r, const MD_BLOCK_H_DETAIL* det)
+{
+ static const MD_CHAR* head[6] = { "
\n"); break;
case MD_BLOCK_TH: render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL*)detail); break;
case MD_BLOCK_TD: render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL*)detail); break;
+ case MD_BLOCK_NAV: RENDER_VERBATIM(r, "
\n"); break;
case MD_BLOCK_TH: RENDER_VERBATIM(r, "\n"); break;
case MD_BLOCK_TD: RENDER_VERBATIM(r, "\n"); break;
+ case MD_BLOCK_NAV: RENDER_VERBATIM(r, "\n"); break;
}
return 0;
@@ -531,13 +546,14 @@ debug_log_callback(const char* msg, void* userdata)
int
md_html(const MD_CHAR* input, MD_SIZE input_size,
void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
- void* userdata, unsigned parser_flags, unsigned renderer_flags)
+ void* userdata, unsigned parser_flags, unsigned renderer_flags,
+ MD_TOC_OPTIONS* toc_options)
{
MD_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } };
int i;
MD_PARSER parser = {
- 0,
+ 1,
parser_flags,
enter_block_callback,
leave_block_callback,
@@ -545,6 +561,7 @@ md_html(const MD_CHAR* input, MD_SIZE input_size,
leave_span_callback,
text_callback,
debug_log_callback,
+ *toc_options,
NULL
};
diff --git a/src/md4c-html.h b/src/md4c-html.h
index 23d3f739..aeac7f52 100644
--- a/src/md4c-html.h
+++ b/src/md4c-html.h
@@ -52,13 +52,16 @@
* Param userdata is just propagated back to process_output() callback.
* Param parser_flags are flags from md4c.h propagated to md_parse().
* Param render_flags is bitmask of MD_HTML_FLAG_xxxx.
+ * Param toc_options is a pointer to toc options from md4c.h propagated to md_parse().
*
* Returns -1 on error (if md_parse() fails.)
* Returns 0 on success.
*/
int md_html(const MD_CHAR* input, MD_SIZE input_size,
void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
- void* userdata, unsigned parser_flags, unsigned renderer_flags);
+ void* userdata, unsigned parser_flags, unsigned renderer_flags,
+ MD_TOC_OPTIONS* toc_options
+ );
#ifdef __cplusplus
diff --git a/src/md4c.c b/src/md4c.c
index 3677c0e0..89f4642c 100644
--- a/src/md4c.c
+++ b/src/md4c.c
@@ -128,7 +128,7 @@ typedef struct MD_MARK_tag MD_MARK;
typedef struct MD_BLOCK_tag MD_BLOCK;
typedef struct MD_CONTAINER_tag MD_CONTAINER;
typedef struct MD_REF_DEF_tag MD_REF_DEF;
-
+typedef struct MD_HEADING_DEF_tag MD_HEADING_DEF;
/* During analyzes of inline marks, we need to manage some "mark chains",
* of (yet unresolved) openers. This structure holds start/end of the chain.
@@ -163,6 +163,20 @@ struct MD_CTX_tag {
void** ref_def_hashtable;
int ref_def_hashtable_size;
+ /* Heading definitions. */
+ MD_HEADING_DEF* heading_defs;
+ int n_heading_defs;
+ int alloc_heading_defs;
+ void** heading_def_hashtable;
+ int heading_def_hashtable_size;
+ /* autogenerated identifiers for heading */
+ CHAR* identifiers;
+ SZ identifiers_size;
+ SZ alloc_identifiers;
+
+ /* Toc informations */
+ int toc_found;
+
/* Stack of inline/span markers.
* This is only used for parsing a single block contents but by storing it
* here we may reuse the stack for subsequent blocks; i.e. we have fewer
@@ -245,7 +259,8 @@ enum MD_LINETYPE_tag {
MD_LINE_HTML,
MD_LINE_TEXT,
MD_LINE_TABLE,
- MD_LINE_TABLEUNDERLINE
+ MD_LINE_TABLEUNDERLINE,
+ MD_LINE_TOC
};
typedef enum MD_LINETYPE_tag MD_LINETYPE;
@@ -292,6 +307,7 @@ struct MD_VERBATIMLINE_tag {
#define ISWHITESPACE_(ch) (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
+#define ISSYMBOL_(ch) (ISANYOF3_(ch, _T('+'), _T('|'), _T('~')) || ISIN_(ch, 60, 62))
#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z')))
#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z')))
#define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch))
@@ -308,6 +324,7 @@ struct MD_VERBATIMLINE_tag {
#define ISWHITESPACE(off) ISWHITESPACE_(CH(off))
#define ISCNTRL(off) ISCNTRL_(CH(off))
#define ISPUNCT(off) ISPUNCT_(CH(off))
+#define ISSYMBOL(off) ISSYMBOL_(CH(off))
#define ISUPPER(off) ISUPPER_(CH(off))
#define ISLOWER(off) ISLOWER_(CH(off))
#define ISALPHA(off) ISALPHA_(CH(off))
@@ -611,6 +628,64 @@ struct MD_UNICODE_FOLD_INFO_tag {
return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0);
}
+ static int
+ md_is_unicode_symbol__(unsigned codepoint)
+ {
+#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
+#define S(cp) (cp)
+ /* Unicode "Sm", "Sc", "Sk", "So" categories.
+ * (generated by scripts/build_symbol_map.py) */
+ static const unsigned SYMBOL_MAP[] = {
+ S(0x0024), S(0x002b), R(0x003c,0x003e), S(0x005e), S(0x0060), S(0x007c), S(0x007e), R(0x00a2,0x00a6),
+ R(0x00a8,0x00a9), S(0x00ac), R(0x00ae,0x00b1), S(0x00b4), S(0x00b8), S(0x00d7), S(0x00f7),
+ R(0x02c2,0x02c5), R(0x02d2,0x02df), R(0x02e5,0x02eb), S(0x02ed), R(0x02ef,0x02ff), S(0x0375),
+ R(0x0384,0x0385), S(0x03f6), S(0x0482), R(0x058d,0x058f), R(0x0606,0x0608), S(0x060b), R(0x060e,0x060f),
+ S(0x06de), S(0x06e9), R(0x06fd,0x06fe), S(0x07f6), R(0x07fe,0x07ff), R(0x09f2,0x09f3), R(0x09fa,0x09fb),
+ S(0x0af1), S(0x0b70), R(0x0bf3,0x0bfa), S(0x0c7f), S(0x0d4f), S(0x0d79), S(0x0e3f), R(0x0f01,0x0f03),
+ S(0x0f13), R(0x0f15,0x0f17), R(0x0f1a,0x0f1f), S(0x0f34), S(0x0f36), S(0x0f38), R(0x0fbe,0x0fc5),
+ R(0x0fc7,0x0fcc), R(0x0fce,0x0fcf), R(0x0fd5,0x0fd8), R(0x109e,0x109f), R(0x1390,0x1399), S(0x166d),
+ S(0x17db), S(0x1940), R(0x19de,0x19ff), R(0x1b61,0x1b6a), R(0x1b74,0x1b7c), S(0x1fbd), R(0x1fbf,0x1fc1),
+ R(0x1fcd,0x1fcf), R(0x1fdd,0x1fdf), R(0x1fed,0x1fef), R(0x1ffd,0x1ffe), S(0x2044), S(0x2052),
+ R(0x207a,0x207c), R(0x208a,0x208c), R(0x20a0,0x20bf), R(0x2100,0x2101), R(0x2103,0x2106),
+ R(0x2108,0x2109), S(0x2114), R(0x2116,0x2118), R(0x211e,0x2123), S(0x2125), S(0x2127), S(0x2129),
+ S(0x212e), R(0x213a,0x213b), R(0x2140,0x2144), R(0x214a,0x214d), S(0x214f), R(0x218a,0x218b),
+ R(0x2190,0x2307), R(0x230c,0x2328), R(0x232b,0x2426), R(0x2440,0x244a), R(0x249c,0x24e9),
+ R(0x2500,0x2767), R(0x2794,0x27c4), R(0x27c7,0x27e5), R(0x27f0,0x2982), R(0x2999,0x29d7),
+ R(0x29dc,0x29fb), R(0x29fe,0x2b73), R(0x2b76,0x2b95), R(0x2b97,0x2bff), R(0x2ce5,0x2cea),
+ R(0x2e50,0x2e51), R(0x2e80,0x2e99), R(0x2e9b,0x2ef3), R(0x2f00,0x2fd5), R(0x2ff0,0x2ffb), S(0x3004),
+ R(0x3012,0x3013), S(0x3020), R(0x3036,0x3037), R(0x303e,0x303f), R(0x309b,0x309c), R(0x3190,0x3191),
+ R(0x3196,0x319f), R(0x31c0,0x31e3), R(0x3200,0x321e), R(0x322a,0x3247), S(0x3250), R(0x3260,0x327f),
+ R(0x328a,0x32b0), R(0x32c0,0x33ff), R(0x4dc0,0x4dff), R(0xa490,0xa4c6), R(0xa700,0xa716),
+ R(0xa720,0xa721), R(0xa789,0xa78a), R(0xa828,0xa82b), R(0xa836,0xa839), R(0xaa77,0xaa79), S(0xab5b),
+ R(0xab6a,0xab6b), S(0xfb29), R(0xfbb2,0xfbc1), R(0xfdfc,0xfdfd), S(0xfe62), R(0xfe64,0xfe66), S(0xfe69),
+ S(0xff04), S(0xff0b), R(0xff1c,0xff1e), S(0xff3e), S(0xff40), S(0xff5c), S(0xff5e), R(0xffe0,0xffe6),
+ R(0xffe8,0xffee), R(0xfffc,0xfffd), R(0x10137,0x1013f), R(0x10179,0x10189), R(0x1018c,0x1018e),
+ R(0x10190,0x1019c), S(0x101a0), R(0x101d0,0x101fc), R(0x10877,0x10878), S(0x10ac8), S(0x1173f),
+ R(0x11fd5,0x11ff1), R(0x16b3c,0x16b3f), S(0x16b45), S(0x1bc9c), R(0x1d000,0x1d0f5), R(0x1d100,0x1d126),
+ R(0x1d129,0x1d164), R(0x1d16a,0x1d16c), R(0x1d183,0x1d184), R(0x1d18c,0x1d1a9), R(0x1d1ae,0x1d1e8),
+ R(0x1d200,0x1d241), S(0x1d245), R(0x1d300,0x1d356), S(0x1d6c1), S(0x1d6db), S(0x1d6fb), S(0x1d715),
+ S(0x1d735), S(0x1d74f), S(0x1d76f), S(0x1d789), S(0x1d7a9), S(0x1d7c3), R(0x1d800,0x1d9ff),
+ R(0x1da37,0x1da3a), R(0x1da6d,0x1da74), R(0x1da76,0x1da83), R(0x1da85,0x1da86), S(0x1e14f), S(0x1e2ff),
+ S(0x1ecac), S(0x1ecb0), S(0x1ed2e), R(0x1eef0,0x1eef1), R(0x1f000,0x1f02b), R(0x1f030,0x1f093),
+ R(0x1f0a0,0x1f0ae), R(0x1f0b1,0x1f0bf), R(0x1f0c1,0x1f0cf), R(0x1f0d1,0x1f0f5), R(0x1f10d,0x1f1ad),
+ R(0x1f1e6,0x1f202), R(0x1f210,0x1f23b), R(0x1f240,0x1f248), R(0x1f250,0x1f251), R(0x1f260,0x1f265),
+ R(0x1f300,0x1f6d7), R(0x1f6e0,0x1f6ec), R(0x1f6f0,0x1f6fc), R(0x1f700,0x1f773), R(0x1f780,0x1f7d8),
+ R(0x1f7e0,0x1f7eb), R(0x1f800,0x1f80b), R(0x1f810,0x1f847), R(0x1f850,0x1f859), R(0x1f860,0x1f887),
+ R(0x1f890,0x1f8ad), R(0x1f8b0,0x1f8b1), R(0x1f900,0x1f978), R(0x1f97a,0x1f9cb), R(0x1f9cd,0x1fa53),
+ R(0x1fa60,0x1fa6d), R(0x1fa70,0x1fa74), R(0x1fa78,0x1fa7a), R(0x1fa80,0x1fa86), R(0x1fa90,0x1faa8),
+ R(0x1fab0,0x1fab6), R(0x1fac0,0x1fac2), R(0x1fad0,0x1fad6), R(0x1fb00,0x1fb92), R(0x1fb94,0x1fbca)
+ };
+
+#undef R
+#undef S
+
+ /* The ASCII ones are the most frequently used ones. */
+ if(codepoint <= 0x7f)
+ return ISSYMBOL_(codepoint);
+
+ return (md_unicode_bsearch__(codepoint, SYMBOL_MAP, SIZEOF_ARRAY(SYMBOL_MAP)) >= 0);
+ }
+
static void
md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
{
@@ -850,6 +925,36 @@ struct MD_UNICODE_FOLD_INFO_tag {
return (unsigned) str[0];
}
+/*
+ * encode a codepoint into the corresponding utf8 byte sequence
+ * the string buffer passed must be large enough
+ * return the number of bytes written to the buffer
+ */
+ static unsigned
+ md_encode_utf8__(unsigned codepoint, CHAR* str )
+ {
+ if(codepoint <= 0x7f){
+ *str++ = (char)codepoint;
+ return 1;
+ } else if (codepoint <= 0x7FF){
+ *str++ = 0xc0 | (codepoint >> 6);
+ *str++ = 0x80 | ((codepoint >> 0) & 0x3f);
+ return 2;
+ } else if ( codepoint <= 0xFFFF) {
+ *str++ = 0xe0 | (codepoint >> 12);
+ *str++ = 0x80 | ((codepoint >> 6 ) & 0x3f);
+ *str++ = 0x80 | ((codepoint >> 0 ) & 0x3f);
+ return 3;
+ } else if ( codepoint <= 0x10FFFF) {
+ *str++ = 0xf0 | (codepoint >> 18);
+ *str++ = 0x80 | ((codepoint >> 12) & 0x3f);
+ *str++ = 0x80 | ((codepoint >> 6 ) & 0x3f);
+ *str++ = 0x80 | ((codepoint >> 0 ) & 0x3f);
+ return 4;
+ }
+ return 0;
+ }
+
static unsigned
md_decode_utf8_before__(MD_CTX* ctx, OFF off)
{
@@ -877,22 +982,34 @@ struct MD_UNICODE_FOLD_INFO_tag {
#define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
#define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
+ #define ISUNICODEPUNCT_(codepoint) md_is_unicode_punct__(codepoint)
#define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
#define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
+ #define ISUNICODESYMBOL_(codepoint) md_is_unicode_symbol__(codepoint)
+
static inline unsigned
md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
{
return md_decode_utf8__(str+off, str_size-off, p_char_size);
}
+
+ static inline unsigned
+ md_encode_unicode(unsigned codepoint, CHAR* str )
+ {
+ return md_encode_utf8__(codepoint, str);
+ }
#else
#define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
#define ISUNICODEWHITESPACE(off) ISWHITESPACE(off)
#define ISUNICODEWHITESPACEBEFORE(off) ISWHITESPACE((off)-1)
+ #define ISUNICODEPUNCT_(codepoint) ISPUNCT_(codepoint)
#define ISUNICODEPUNCT(off) ISPUNCT(off)
#define ISUNICODEPUNCTBEFORE(off) ISPUNCT((off)-1)
+ #define ISUNICODESYMBOL_(codepoint) ISSYMBOL_(codepoint)
+
static inline void
md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
{
@@ -902,6 +1019,13 @@ struct MD_UNICODE_FOLD_INFO_tag {
info->n_codepoints = 1;
}
+ static unsigned
+ md_encode_unicode(unsigned codepoint, CHAR* str )
+ {
+ *str = codepoint;
+ return 1;
+ }
+
static inline unsigned
md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
{
@@ -1417,11 +1541,107 @@ md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
if(build->substr_alloc > 0) {
free(build->text);
- free(build->substr_types);
- free(build->substr_offsets);
+ if( build->substr_types != build->trivial_types)
+ free(build->substr_types);
+ if( build->substr_offsets != build->trivial_offsets)
+ free(build->substr_offsets);
}
}
+static int
+md_build_trivial_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
+ MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
+{
+ MD_UNUSED(ctx);
+ memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));
+ build->substr_types = build->trivial_types;
+ build->substr_offsets = build->trivial_offsets;
+ build->substr_count = 1;
+ build->substr_alloc = 0;
+ build->trivial_types[0] = MD_TEXT_NORMAL;
+ build->trivial_offsets[0] = 0;
+ build->trivial_offsets[1] = raw_size;
+
+ attr->text = (CHAR*) (raw_size ? raw_text : NULL);
+ attr->size = raw_size;
+ attr->substr_offsets = build->substr_offsets;
+ attr->substr_types = build->substr_types;
+ return 0;
+}
+
+/* Convert a 16 bits unsigned word to a string
+* the dest buffer must be at least 5 char long
+* It does not nul terminat the string
+* Return the number of characters used by the string
+*/
+static int
+md_int16_to_str(unsigned short n, CHAR* dest){
+ char count = 5;
+
+ if(n <10 ){
+ static const CHAR numbers[] = _T("0123456789");
+ *dest = numbers[n];
+ return 1;
+ }
+ while(1){
+ if(n< 100){ count = 2; break;}
+ if(n< 1000){ count = 3; break;}
+ if(n< 10000){ count = 4; break;}
+ break;
+ }
+ // start from end
+ dest += count;
+ while (n) {
+ *--dest = '0' + ( n % 10);
+ n /= 10;
+ }
+ return count;
+}
+
+static int
+md_build_attribute_postfix(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
+ unsigned postfix, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
+{
+ OFF off;
+ const SZ MAX_POSTFIX_SIZE = 5; // but also add 1 for the '-'
+
+ memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));
+ build->substr_types = build->trivial_types;
+ build->substr_offsets = build->trivial_offsets;
+ build->substr_count = 1;
+ build->substr_alloc = 1;
+ build->trivial_types[0] = MD_TEXT_NORMAL;
+ build->trivial_offsets[0] = 0;
+ off = raw_size;
+ if (postfix > 0xffff) {
+ // postfix is not allowed to be bigger than 65535 (2^16) , so maximum 5 char
+ postfix = 0xffff;
+ }
+
+ build->text = (CHAR*) malloc((raw_size + MAX_POSTFIX_SIZE+1) * sizeof(CHAR));
+ if(build->text == NULL) {
+ MD_LOG("malloc() failed.");
+ goto abort;
+ }
+
+ // copy original text
+ memcpy(build->text, raw_text, raw_size);
+ // append postfix
+ build->text[off++] = _T('-');
+ off+= md_int16_to_str(postfix, &build->text[off]);
+
+ attr->text = build->text;
+ build->trivial_offsets[1] = off;
+ attr->size = off;
+ attr->substr_offsets = build->substr_offsets;
+ attr->substr_types = build->substr_types;
+ return 0;
+
+abort:
+ md_free_attribute(ctx, build);
+ return -1;
+}
+
static int
md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
@@ -1507,7 +1727,6 @@ md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
return -1;
}
-
/*********************************************
*** Dictionary of Reference Definitions ***
*********************************************/
@@ -1534,11 +1753,11 @@ md_fnv1a(unsigned base, const void* data, size_t n)
struct MD_REF_DEF_tag {
CHAR* label;
CHAR* title;
+ CHAR* dest;
unsigned hash;
SZ label_size;
SZ title_size;
- OFF dest_beg;
- OFF dest_end;
+ SZ dest_size;
unsigned char label_needs_free : 1;
unsigned char title_needs_free : 1;
};
@@ -1872,8 +2091,8 @@ md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
struct MD_LINK_ATTR_tag {
- OFF dest_beg;
- OFF dest_end;
+ CHAR* dest;
+ SZ dest_size;
CHAR* title;
SZ title_size;
@@ -1955,7 +2174,7 @@ md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
static int
md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
- OFF* p_contents_beg, OFF* p_contents_end)
+ CHAR** p_contents, SZ* p_contents_size)
{
OFF off = beg;
@@ -1974,8 +2193,8 @@ md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
if(CH(off) == _T('>')) {
/* Success. */
- *p_contents_beg = beg+1;
- *p_contents_end = off;
+ *p_contents = (CHAR*)STR(beg+1);
+ *p_contents_size = off - (beg+1);
*p_end = off+1;
return TRUE;
}
@@ -1988,7 +2207,7 @@ md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
static int
md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
- OFF* p_contents_beg, OFF* p_contents_end)
+ CHAR** p_contents, SZ* p_contents_size)
{
OFF off = beg;
int parenthesis_level = 0;
@@ -2022,20 +2241,20 @@ md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
return FALSE;
/* Success. */
- *p_contents_beg = beg;
- *p_contents_end = off;
+ *p_contents = (CHAR*)STR(beg);
+ *p_contents_size = off - beg;
*p_end = off;
return TRUE;
}
static inline int
md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
- OFF* p_contents_beg, OFF* p_contents_end)
+ CHAR** p_contents, SZ* p_contents_size)
{
if(CH(beg) == _T('<'))
- return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
+ return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents, p_contents_size);
else
- return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
+ return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents, p_contents_size);
}
static int
@@ -2098,6 +2317,26 @@ md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
return FALSE;
}
+static int
+md_push_ref_def(MD_CTX* ctx)
+{
+ if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
+ MD_REF_DEF* new_defs;
+
+ ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0
+ ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2
+ : 16);
+ new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
+ if(new_defs == NULL) {
+ MD_LOG("realloc() failed.");
+ return -1;
+ }
+
+ ctx->ref_defs = new_defs;
+ }
+ return 0;
+}
+
/* Returns 0 if it is not a reference definition.
*
* Returns N > 0 if it is a reference definition. N then corresponds to the
@@ -2113,8 +2352,8 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
OFF label_contents_end;
int label_contents_line_index = -1;
int label_is_multiline = FALSE;
- OFF dest_contents_beg;
- OFF dest_contents_end;
+ CHAR* dest_contents;
+ SZ dest_contents_size;
OFF title_contents_beg;
OFF title_contents_end;
int title_contents_line_index;
@@ -2149,7 +2388,7 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
/* Link destination. */
if(!md_is_link_destination(ctx, off, lines[line_index].end,
- &off, &dest_contents_beg, &dest_contents_end))
+ &off, &dest_contents, &dest_contents_size))
return FALSE;
/* (Optional) title. Note we interpret it as an title only if nothing
@@ -2175,20 +2414,7 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
return FALSE;
/* So, it _is_ a reference definition. Remember it. */
- if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
- MD_REF_DEF* new_defs;
-
- ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0
- ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2
- : 16);
- new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
- if(new_defs == NULL) {
- MD_LOG("realloc() failed.");
- goto abort;
- }
-
- ctx->ref_defs = new_defs;
- }
+ MD_CHECK(md_push_ref_def(ctx));
def = &ctx->ref_defs[ctx->n_ref_defs];
memset(def, 0, sizeof(MD_REF_DEF));
@@ -2212,8 +2438,8 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
def->title_size = title_contents_end - title_contents_beg;
}
- def->dest_beg = dest_contents_beg;
- def->dest_end = dest_contents_end;
+ def->dest = dest_contents;
+ def->dest_size = dest_contents_size;
/* Success. */
ctx->n_ref_defs++;
@@ -2259,8 +2485,8 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
def = md_lookup_ref_def(ctx, label, label_size);
if(def != NULL) {
- attr->dest_beg = def->dest_beg;
- attr->dest_end = def->dest_end;
+ attr->dest = def->dest;
+ attr->dest_size = def->dest_size;
attr->title = def->title;
attr->title_size = def->title_size;
attr->title_needs_free = FALSE;
@@ -2306,8 +2532,8 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
/* Link destination may be omitted, but only when not also having a title. */
if(off < ctx->size && CH(off) == _T(')')) {
- attr->dest_beg = off;
- attr->dest_end = off;
+ attr->dest = (CHAR*)STR(off);
+ attr->dest_size = 0;
attr->title = NULL;
attr->title_size = 0;
attr->title_needs_free = FALSE;
@@ -2318,7 +2544,7 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
/* Link destination. */
if(!md_is_link_destination(ctx, off, lines[line_index].end,
- &off, &attr->dest_beg, &attr->dest_end))
+ &off, &attr->dest, &attr->dest_size))
return FALSE;
/* (Optional) title. */
@@ -2389,6 +2615,249 @@ md_free_ref_defs(MD_CTX* ctx)
free(ctx->ref_defs);
}
+/*********************************************
+ *** Dictionary of Heading Definitions ***
+ *********************************************/
+
+struct MD_HEADING_DEF_tag {
+ CHAR* heading;
+ SZ heading_size;
+ CHAR* identifier; // only valid after all heading are known
+ unsigned hash;
+ OFF ident_beg;
+ SZ ident_size;
+ unsigned postfix;
+ unsigned level:8;
+};
+
+static int
+md_push_heading_def(MD_CTX* ctx)
+{
+ if(ctx->n_heading_defs >= ctx->alloc_heading_defs) {
+ MD_HEADING_DEF* new_defs;
+
+ ctx->alloc_heading_defs = (ctx->alloc_heading_defs > 0
+ ? ctx->alloc_heading_defs + ctx->alloc_heading_defs / 2
+ : 16);
+ new_defs = (MD_HEADING_DEF*) realloc(ctx->heading_defs, ctx->alloc_heading_defs * sizeof(MD_HEADING_DEF));
+ if(new_defs == NULL) {
+ MD_LOG("realloc() failed.");
+ return -1;
+ }
+
+ ctx->heading_defs = new_defs;
+ }
+ return 0;
+}
+
+static int
+md_alloc_identifiers(MD_CTX *ctx, MD_HEADING_DEF* def)
+{
+ if (ctx->identifiers_size + def->ident_size >= ctx->alloc_identifiers)
+ {
+ CHAR *new_identifiers;
+
+ ctx->alloc_identifiers = (ctx->alloc_identifiers > 0
+ ? ctx->alloc_identifiers + ctx->alloc_identifiers / 2
+ : 512);
+
+ new_identifiers = (CHAR *)realloc(ctx->identifiers, sizeof(CHAR) * ctx->alloc_identifiers);
+ if (new_identifiers == NULL)
+ {
+ MD_LOG("realloc() failed.");
+ return -1;
+ }
+ if (ctx->identifiers != new_identifiers){
+ // rebuild all ref_def pointing to identifiers
+ int i;
+ for(i = 0; i < ctx->n_ref_defs; i++) {
+ MD_REF_DEF* def = &ctx->ref_defs[i];
+ if (def->dest > ctx->identifiers
+ && def->dest <= ctx->identifiers+ctx->identifiers_size ){
+ def->dest = new_identifiers + (def->dest - ctx->identifiers);
+ }
+ }
+ }
+ ctx->identifiers = new_identifiers;
+ }
+
+ def->ident_beg = ctx->identifiers_size;
+ return 0;
+}
+
+/** forward declaration */
+static int
+md_heading_build_ident(MD_CTX* ctx, MD_HEADING_DEF* def, MD_LINE* lines, int n_lines, int level);
+
+typedef struct MD_HEADING_DEF_LIST_tag MD_HEADING_DEF_LIST;
+struct MD_HEADING_DEF_LIST_tag {
+ int n_heading_defs;
+ int alloc_heading_defs;
+ MD_HEADING_DEF* heading_defs[]; /* Valid items always point into ctx->heading_defs[] */
+};
+
+static int
+md_heading_def_cmp(const void* a, const void* b)
+{
+ const MD_HEADING_DEF* a_ref = *(const MD_HEADING_DEF**)a;
+ const MD_HEADING_DEF* b_ref = *(const MD_HEADING_DEF**)b;
+
+ if(a_ref->hash < b_ref->hash)
+ return -1;
+ else if(a_ref->hash > b_ref->hash)
+ return +1;
+ else
+ return md_link_label_cmp(a_ref->identifier, a_ref->ident_size,
+ b_ref->identifier, b_ref->ident_size);
+}
+
+static int
+md_heading_def_cmp_for_sort(const void* a, const void* b)
+{
+ int cmp;
+
+ cmp = md_heading_def_cmp(a, b);
+
+ /* Ensure stability of the sorting. */
+ if(cmp == 0) {
+ const MD_HEADING_DEF* a_ref = *(const MD_HEADING_DEF**)a;
+ const MD_HEADING_DEF* b_ref = *(const MD_HEADING_DEF**)b;
+
+ if(a_ref < b_ref)
+ cmp = -1;
+ else if(a_ref > b_ref)
+ cmp = +1;
+ else
+ cmp = 0;
+ }
+
+ return cmp;
+}
+
+static int
+md_build_heading_def_hashtable(MD_CTX* ctx)
+{
+ int i, j;
+
+ if(ctx->n_heading_defs == 0)
+ return 0;
+
+ ctx->heading_def_hashtable_size = (ctx->n_heading_defs * 5) / 4;
+ ctx->heading_def_hashtable = malloc(ctx->heading_def_hashtable_size * sizeof(void*));
+ if(ctx->heading_def_hashtable == NULL) {
+ MD_LOG("malloc() failed.");
+ goto abort;
+ }
+ memset(ctx->heading_def_hashtable, 0, ctx->heading_def_hashtable_size * sizeof(void*));
+
+ /* Each member of ctx->heading_def_hashtable[] can be:
+ * -- NULL,
+ * -- pointer to the MD_HEADING_DEF in ctx->heading_defs[], or
+ * -- pointer to a MD_HEADING_DEF_LIST, which holds multiple pointers to
+ * such MD_HEADING_DEFs.
+ */
+ for(i = 0; i < ctx->n_heading_defs; i++) {
+ MD_HEADING_DEF* def = &ctx->heading_defs[i];
+ void* bucket;
+ MD_HEADING_DEF_LIST* list;
+
+ // compute identifier hash reusing the link label hash function
+ def->identifier = &ctx->identifiers[def->ident_beg];
+ def->hash = md_link_label_hash(def->identifier, def->ident_size);
+ bucket = ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size];
+
+ if(bucket == NULL) {
+ /* The bucket is empty. Make it just point to the def. */
+ ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size] = def;
+ continue;
+ }
+
+ if(ctx->heading_defs <= (MD_HEADING_DEF*) bucket && (MD_HEADING_DEF*) bucket < ctx->heading_defs + ctx->n_heading_defs) {
+ /* The bucket already contains one heading def.*/
+ MD_HEADING_DEF* old_def = (MD_HEADING_DEF*) bucket;
+
+ /* Make the bucket complex, i.e. able to hold more heading defs. */
+ list = (MD_HEADING_DEF_LIST*) malloc(sizeof(MD_HEADING_DEF_LIST) + 2 * sizeof(MD_HEADING_DEF*));
+ if(list == NULL) {
+ MD_LOG("malloc() failed.");
+ goto abort;
+ }
+ list->heading_defs[0] = old_def;
+ list->heading_defs[1] = def;
+ list->n_heading_defs = 2;
+ list->alloc_heading_defs = 2;
+ ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size] = list;
+ continue;
+ }
+
+ /* Append the def to the complex bucket list. */
+ list = (MD_HEADING_DEF_LIST*) bucket;
+ if(list->n_heading_defs >= list->alloc_heading_defs) {
+ int alloc_heading_defs = list->alloc_heading_defs + list->alloc_heading_defs / 2;
+ MD_HEADING_DEF_LIST* list_tmp = (MD_HEADING_DEF_LIST*) realloc(list,
+ sizeof(MD_HEADING_DEF_LIST) + alloc_heading_defs * sizeof(MD_HEADING_DEF*));
+ if(list_tmp == NULL) {
+ MD_LOG("realloc() failed.");
+ goto abort;
+ }
+ list = list_tmp;
+ list->alloc_heading_defs = alloc_heading_defs;
+ ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size] = list;
+ }
+
+ list->heading_defs[list->n_heading_defs] = def;
+ list->n_heading_defs++;
+ }
+
+ /* Sort the complex buckets so we can use bsearch() with them. */
+ for(i = 0; i < ctx->heading_def_hashtable_size; i++) {
+ void* bucket = ctx->heading_def_hashtable[i];
+ MD_HEADING_DEF_LIST* list;
+
+ if(bucket == NULL)
+ continue;
+ if(ctx->heading_defs <= (MD_HEADING_DEF*) bucket && (MD_HEADING_DEF*) bucket < ctx->heading_defs + ctx->n_heading_defs)
+ continue;
+
+ list = (MD_HEADING_DEF_LIST*) bucket;
+ qsort(list->heading_defs, list->n_heading_defs, sizeof(MD_HEADING_DEF*), md_heading_def_cmp_for_sort);
+
+ for(j = 1; j < list->n_heading_defs; j++) {
+ if(md_heading_def_cmp(&list->heading_defs[j-1], &list->heading_defs[j]) == 0)
+ list->heading_defs[j]->postfix = list->heading_defs[j-1]->postfix + 1;
+ }
+ }
+
+ return 0;
+
+abort:
+ return -1;
+}
+
+static void
+md_free_heading_def_hashtable(MD_CTX* ctx)
+{
+ if(ctx->heading_def_hashtable != NULL) {
+ int i;
+
+ for(i = 0; i < ctx->heading_def_hashtable_size; i++) {
+ void* bucket = ctx->heading_def_hashtable[i];
+ if(bucket == NULL)
+ continue;
+ if(ctx->heading_defs <= (MD_HEADING_DEF*) bucket && (MD_HEADING_DEF*) bucket < ctx->heading_defs + ctx->n_heading_defs)
+ continue;
+ free(bucket);
+ }
+
+ free(ctx->heading_def_hashtable);
+ }
+}
+
+static void
+md_free_heading_defs(MD_CTX* ctx)
+{
+ free(ctx->heading_defs);
+}
/******************************************
*** Processing Inlines (a.k.a Spans) ***
@@ -3627,8 +4096,8 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
/* If it is a link, we store the destination and title in the two
* dummy marks after the opener. */
MD_ASSERT(ctx->marks[opener_index+1].ch == 'D');
- ctx->marks[opener_index+1].beg = attr.dest_beg;
- ctx->marks[opener_index+1].end = attr.dest_end;
+ md_mark_store_ptr(ctx, opener_index+1, attr.dest);
+ ctx->marks[opener_index+1].prev = attr.dest_size;
MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
md_mark_store_ptr(ctx, opener_index+2, attr.title);
@@ -4148,6 +4617,8 @@ md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ targ
return ret;
}
+/** forward declaration */
+static int md_output_toc(MD_CTX *ctx);
/* Render the output, accordingly to the analyzed ctx->marks. */
static int
@@ -4295,7 +4766,8 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'),
(opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A),
- STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
+ md_mark_get_ptr(ctx, (int)(dest_mark - ctx->marks)),
+ dest_mark->prev, FALSE,
md_mark_get_ptr(ctx, (int)(title_mark - ctx->marks)),
title_mark->prev));
@@ -4617,6 +5089,10 @@ struct MD_BLOCK_tag {
* MD_BLOCK_OL: Start item number.
*/
unsigned n_lines;
+ /* MD_BLOCK_H: reference definition index
+ */
+ unsigned heading_def; // todo rename me to heading_idx ?
+
};
struct MD_CONTAINER_tag {
@@ -4746,6 +5222,27 @@ md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DE
return ret;
}
+static int
+md_setup_H_identifier(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_H_DETAIL* det,
+ MD_ATTRIBUTE_BUILD* id_build)
+{
+
+ int ret = 0;
+
+ /* Build info string attribute. */
+
+ MD_HEADING_DEF * heading = &ctx->heading_defs[block->heading_def];
+ if(heading->postfix == 0) {
+ MD_CHECK(md_build_trivial_attribute(ctx, &ctx->identifiers[heading->ident_beg]+1,
+ heading->ident_size-1, &det->identifier, id_build));
+ } else {
+ MD_CHECK(md_build_attribute_postfix(ctx, &ctx->identifiers[heading->ident_beg]+1,
+ heading->ident_size-1, heading->postfix, &det->identifier, id_build));
+ }
+abort:
+ return ret;
+}
+
static int
md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
{
@@ -4754,6 +5251,8 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
MD_BLOCK_CODE_DETAIL code;
MD_BLOCK_TABLE_DETAIL table;
} det;
+ MD_ATTRIBUTE_BUILD identifier_build;
+ int clean_header_detail = FALSE;
MD_ATTRIBUTE_BUILD info_build;
MD_ATTRIBUTE_BUILD lang_build;
int is_in_tight_list;
@@ -4770,7 +5269,11 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
switch(block->type) {
case MD_BLOCK_H:
det.header.level = block->data;
- break;
+ if (ctx->parser.flags & MD_FLAG_HEADINGAUTOID){
+ clean_header_detail = TRUE;
+ MD_CHECK(md_setup_H_identifier(ctx, block, &det.header, &identifier_build ));
+ }
+ break;
case MD_BLOCK_CODE:
/* For fenced code block, we may need to set the info string. */
@@ -4816,6 +5319,10 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
(const MD_LINE*)(block + 1), block->n_lines));
break;
+ case MD_BLOCK_NAV:
+ MD_CHECK(md_output_toc(ctx));
+ break;
+
default:
MD_CHECK(md_process_normal_block_contents(ctx,
(const MD_LINE*)(block + 1), block->n_lines));
@@ -4826,6 +5333,9 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
MD_LEAVE_BLOCK(block->type, (void*) &det);
abort:
+ if(clean_header_detail) {
+ md_free_attribute(ctx, &identifier_build);
+ }
if(clean_fence_code_detail) {
md_free_attribute(ctx, &info_build);
md_free_attribute(ctx, &lang_build);
@@ -4986,6 +5496,10 @@ md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
block->type = MD_BLOCK_HTML;
break;
+ case MD_LINE_TOC:
+ block->type = MD_BLOCK_NAV;
+ break;
+
case MD_LINE_BLANK:
case MD_LINE_SETEXTUNDERLINE:
case MD_LINE_TABLEUNDERLINE:
@@ -5053,6 +5567,47 @@ md_consume_link_reference_definitions(MD_CTX* ctx)
return 0;
}
+/* Build the identifier for this heading and remember them so we can
+ * resolve any link referring to them.
+ *
+ */
+static int
+md_make_heading(MD_CTX* ctx)
+{
+ int ret = 0;
+
+ MD_BLOCK* block = ctx->current_block;
+ MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
+
+ MD_HEADING_DEF * def = NULL;
+ MD_REF_DEF * rdef = NULL;
+ MD_CHECK(md_push_heading_def(ctx));
+ def = &ctx->heading_defs[ctx->n_heading_defs];
+ memset(def, 0, sizeof(MD_HEADING_DEF));
+
+ // filling of the heading def
+ MD_CHECK(md_heading_build_ident(ctx, def, lines, block->n_lines, block->data));
+ block->heading_def = ctx->n_heading_defs;
+ ctx->n_heading_defs++;
+
+ // remember the heading as a reference definition
+ MD_CHECK(md_push_ref_def(ctx));
+ rdef = &ctx->ref_defs[ctx->n_ref_defs];
+ memset(rdef, 0, sizeof(MD_REF_DEF));
+ rdef->label = def->heading;
+ rdef->label_size = def->heading_size;
+
+ rdef->dest = &ctx->identifiers[def->ident_beg];
+ rdef->dest_size = def->ident_size;
+
+
+ /* Success. */
+ ctx->n_ref_defs++;
+
+abort:
+ return ret;
+}
+
static int
md_end_current_block(MD_CTX* ctx)
{
@@ -5090,6 +5645,10 @@ md_end_current_block(MD_CTX* ctx)
}
}
+ if(ctx->current_block->type == MD_BLOCK_H && (ctx->parser.flags & MD_FLAG_HEADINGAUTOID)){
+ MD_CHECK(md_make_heading(ctx));
+ }
+
/* Mark we are not building any block anymore. */
ctx->current_block = NULL;
@@ -5284,6 +5843,32 @@ md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
return TRUE;
}
+static int
+md_is_toc_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end)
+{
+ OFF off = beg;
+ const CHAR * toc = ctx->parser.toc_options.toc_placeholder;
+
+ // allow for blank chars before the TOC mark
+ while(off < ctx->size && ISBLANK(off))
+ off++;
+
+ if(off < ctx->size && ISNEWLINE(off))
+ return FALSE;
+
+ while(off < ctx->size && '\0' != *toc){
+ if(CH(off) != *toc)
+ return FALSE;
+ toc++;
+ off++;
+ }
+ if('\0' == *toc){
+ *p_beg = off;
+ *p_end = off;
+ }
+ return '\0' == *toc;
+}
+
static int
md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
{
@@ -5671,8 +6256,9 @@ md_leave_child_containers(MD_CTX* ctx, int n_keep)
static int
md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container)
{
- OFF off = beg;
OFF max_end;
+ OFF off = beg;
+
if(off >= ctx->size || indent >= ctx->code_indent_offset)
return FALSE;
@@ -5726,6 +6312,152 @@ md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTA
return FALSE;
}
+static int
+md_heading_build_ident(MD_CTX* ctx, MD_HEADING_DEF* def, MD_LINE* lines, int n_lines, int level)
+{
+ MD_MARK* mark;
+ CHAR* ptr;
+ int ret = 0;
+
+ const MD_LINE* line = lines;
+ OFF beg = lines[0].beg;
+ OFF off = beg;
+ OFF end = lines[n_lines-1].end;
+
+ /* store the heading */
+ def->heading = (CHAR*)STR(beg);
+ def->heading_size = end-beg;
+ /* store the heading level */
+ def->level = level;
+
+ /* Reset the previously collected stack of marks. */
+ ctx->n_marks = 0;
+
+ MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
+
+ /* Find first resolved mark. Note there is always at least one resolved
+ * mark, the dummy last one after the end of the latest line we actually
+ * never really reach. This saves us of a lot of special checks and cases
+ * in this function. */
+ mark = ctx->marks;
+ while(!(mark->flags & MD_MARK_RESOLVED))
+ mark++;
+
+ /* The identifier will not be bigger than the heading + '#' */
+ def->ident_size = end - beg + 1;
+ MD_CHECK(md_alloc_identifiers(ctx, def));
+
+ /* copy the ident and transform as needed */
+ ptr = &ctx->identifiers[def->ident_beg];
+ *ptr++ = _T('#'); // start with a '#'
+ while(1) {
+
+ OFF line_end = line->end;
+ /* Process the text up to the next mark or end-of-line. */
+ OFF tmp = (line->end < mark->beg ? line->end : mark->beg);
+ if(end < line_end)
+ line_end = end;
+
+ while(off < tmp) {
+ unsigned codepoint;
+ SZ char_size;
+
+ if( CH(off) == _T('-') ){ // '-' are not replaced
+ *ptr++ = _T('-');
+ off++;
+ continue;
+ }
+
+ codepoint = md_decode_unicode(ctx->text, off, line_end, &char_size);
+ if(ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE(off)) {// replace white spaces by '-'
+ *ptr++ = _T('-');
+ off = md_skip_unicode_whitespace(ctx->text, off, line_end);
+ } else if (ISUNICODEPUNCT_(codepoint) || ISUNICODESYMBOL_(codepoint)) { // skip ponctuation and symbols
+ off += char_size;
+ continue;
+ } else { // make lower case
+ MD_UNICODE_FOLD_INFO fold_info;
+ md_get_unicode_fold_info(codepoint, &fold_info);
+ for (unsigned i = 0; i < fold_info.n_codepoints; i++) {
+ SZ n = md_encode_unicode(fold_info.codepoints[i], ptr);
+ ptr += n;
+ }
+ off += char_size;
+ }
+ }
+ /* If reached the mark, process it and move to next one. */
+ if(off >= mark->beg) {
+ switch(mark->ch) {
+
+ case '[': /* Link, wiki link, image. */
+ case '!':
+ case ']':
+ {
+ const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
+ const MD_MARK* closer = &ctx->marks[opener->next];
+ const MD_MARK* dest_mark;
+ const MD_MARK* title_mark;
+
+ if ((opener->ch == '[' && closer->ch == ']') &&
+ opener->end - opener->beg >= 2 &&
+ closer->end - closer->beg >= 2)
+ {
+ break;
+ }
+
+ dest_mark = opener+1;
+ MD_ASSERT(dest_mark->ch == 'D');
+ title_mark = opener+2;
+ if (title_mark->ch != 'D') break;
+
+ /* link/image closer may span multiple lines. */
+ if(mark->ch == ']') {
+ while(mark->end > line->end)
+ line++;
+ }
+
+ break;
+ }
+ }
+
+ off = mark->end;
+
+ /* Move to next resolved mark. But not past the last mark */
+ if(mark < &ctx->marks[ctx->n_marks])
+ mark++;
+ while((mark < &ctx->marks[ctx->n_marks]) &&
+ ( !(mark->flags & MD_MARK_RESOLVED) || mark->beg < off))
+ {
+ mark++;
+ }
+ }
+
+ /* If reached end of line, move to next one. */
+ if(off >= line->end) {
+ /* If it is the last line, we are done. */
+ if(off >= end) {
+ // update real identifier size
+ def->ident_size = (MD_SIZE)(ptr - &ctx->identifiers[def->ident_beg]);
+ break;
+ }
+
+ *ptr = _T('-'); // end of line
+ ptr++;
+
+ /* Move to the next line. */
+ line++;
+ off = line->beg;
+ }
+ }
+ // update used identifier buffer size
+ ctx->identifiers_size += def->ident_size;
+
+ return 0;
+abort:
+
+ return -1;
+}
+
static unsigned
md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
{
@@ -6104,6 +6836,15 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
}
}
+ /* check for TOC mark */
+ if(ctx->parser.toc_options.toc_placeholder != NULL && !ctx->toc_found &&
+ md_is_toc_line(ctx, off, &line->beg, &off))
+ {
+ line->type = MD_LINE_TOC;
+ ctx->toc_found = TRUE;
+ break;
+ }
+
/* By default, we are normal text line. */
line->type = MD_LINE_TEXT;
if(pivot_line->type == MD_LINE_TEXT && n_brothers + n_children == 0) {
@@ -6303,6 +7044,69 @@ md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANAL
return ret;
}
+static int
+md_output_toc(MD_CTX *ctx)
+{
+ MD_HEADING_DEF *hd;
+ MD_BLOCK_LI_DETAIL li_det = {0};
+
+ MD_ATTRIBUTE_BUILD href_build = {0};
+ MD_ATTRIBUTE_BUILD title_build = {0};
+ MD_SPAN_A_DETAIL a_det;
+ int ret = 0;
+ int level = 0;
+ int i;
+
+ for (i = 0; i < ctx->n_heading_defs; ++i){
+ hd = &ctx->heading_defs[i];
+ while (hd->level > level){
+ ++level;
+ if (level <= ctx->parser.toc_options.depth)
+ MD_ENTER_BLOCK(MD_BLOCK_UL, NULL);
+ }
+ while (hd->level < level){
+ if (level <= ctx->parser.toc_options.depth)
+ MD_LEAVE_BLOCK(MD_BLOCK_UL, NULL);
+ --level;
+ }
+
+ if (level <= ctx->parser.toc_options.depth){
+ MD_ENTER_BLOCK(MD_BLOCK_LI, &li_det);
+ memset(&a_det, 0, sizeof(MD_SPAN_A_DETAIL));
+ if (hd->postfix == 0){
+ MD_CHECK(md_build_attribute(ctx, hd->identifier, hd->ident_size,
+ MD_BUILD_ATTR_NO_ESCAPES,
+ &a_det.href, &href_build));
+ } else {
+ MD_CHECK(md_build_attribute_postfix(ctx,
+ hd->identifier, hd->ident_size,
+ hd->postfix, &a_det.href, &href_build));
+ }
+
+ MD_CHECK(md_build_attribute(ctx, NULL, 0, 0, &a_det.title, &title_build));
+
+ MD_ENTER_SPAN(MD_SPAN_A, &a_det);
+
+ MD_TEXT(MD_TEXT_NORMAL, hd->heading, hd->heading_size);
+ MD_LEAVE_SPAN(MD_SPAN_A, NULL);
+ MD_LEAVE_BLOCK(MD_BLOCK_LI, NULL);
+ }
+
+ }
+
+ // close remaining opened level
+ while (level > 0){
+ if (level <= ctx->parser.toc_options.depth)
+ MD_LEAVE_BLOCK(MD_BLOCK_UL, NULL);
+ --level;
+ }
+
+abort:
+ md_free_attribute(ctx, &href_build);
+ md_free_attribute(ctx, &title_build);
+ return ret;
+}
+
static int
md_process_doc(MD_CTX *ctx)
{
@@ -6324,8 +7128,18 @@ md_process_doc(MD_CTX *ctx)
md_end_current_block(ctx);
+ if(ctx->parser.flags & MD_FLAG_HEADINGAUTOID) {
+ MD_CHECK(md_build_heading_def_hashtable(ctx));
+ }
MD_CHECK(md_build_ref_def_hashtable(ctx));
+ /* Output the TOC */
+ if(ctx->parser.toc_options.depth > 0 && !ctx->toc_found) {
+ MD_ENTER_BLOCK(MD_BLOCK_NAV, NULL);
+ MD_CHECK(md_output_toc(ctx));
+ MD_LEAVE_BLOCK(MD_BLOCK_NAV, NULL);
+ }
+
/* Process all blocks. */
MD_CHECK(md_leave_child_containers(ctx, 0));
MD_CHECK(md_process_all_blocks(ctx));
@@ -6353,6 +7167,19 @@ md_process_doc(MD_CTX *ctx)
sprintf(buffer, "Alloced %u bytes for aux. buffer.",
(unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR)));
MD_LOG(buffer);
+
+ sprintf(buffer, "Alloced %u bytes for reference definition buffer.",
+ (unsigned)(ctx->alloc_ref_defs * sizeof(MD_REF_DEF)));
+ MD_LOG(buffer);
+
+ sprintf(buffer, "Alloced %u bytes for identifiers buffer.",
+ (unsigned)(ctx->alloc_identifiers * sizeof(MD_CHAR)));
+ MD_LOG(buffer);
+
+ sprintf(buffer, "Alloced %u bytes for heading definition buffer.",
+ (unsigned)(ctx->alloc_heading_defs * sizeof(MD_HEADING_DEF)));
+ MD_LOG(buffer);
+
}
#endif
@@ -6371,7 +7198,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd
int i;
int ret;
- if(parser->abi_version != 0) {
+ if(parser->abi_version != 1) {
if(parser->debug_log != NULL)
parser->debug_log("Unsupported abi_version.", userdata);
return -1;
@@ -6399,6 +7226,9 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd
ret = md_process_doc(&ctx);
/* Clean-up. */
+ md_free_heading_defs(&ctx);
+ md_free_heading_def_hashtable(&ctx);
+ free(ctx.identifiers);
md_free_ref_defs(&ctx);
md_free_ref_def_hashtable(&ctx);
free(ctx.buffer);
diff --git a/src/md4c.h b/src/md4c.h
index 95f78f9b..8dbd417f 100644
--- a/src/md4c.h
+++ b/src/md4c.h
@@ -99,7 +99,8 @@ typedef enum MD_BLOCKTYPE {
MD_BLOCK_TBODY,
MD_BLOCK_TR,
MD_BLOCK_TH,
- MD_BLOCK_TD
+ MD_BLOCK_TD,
+ MD_BLOCK_NAV
} MD_BLOCKTYPE;
/* Span represents an in-line piece of a document which should be rendered with
@@ -259,6 +260,7 @@ typedef struct MD_BLOCK_LI_DETAIL {
/* Detailed info for MD_BLOCK_H. */
typedef struct MD_BLOCK_H_DETAIL {
unsigned level; /* Header level (1 - 6) */
+ MD_ATTRIBUTE identifier; /* identifier, eg {#some-id} or autogenerated from the heading text*/
} MD_BLOCK_H_DETAIL;
/* Detailed info for MD_BLOCK_CODE. */
@@ -316,6 +318,7 @@ typedef struct MD_SPAN_WIKILINK {
#define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */
#define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */
#define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */
+#define MD_FLAG_HEADINGAUTOID 0x8000 /* Enable header auto identifiers like github. */
#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
@@ -330,12 +333,28 @@ typedef struct MD_SPAN_WIKILINK {
* extensions, bringing the dialect closer to the original, are implemented.
*/
#define MD_DIALECT_COMMONMARK 0
-#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS)
+#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS | MD_FLAG_HEADINGAUTOID)
+
+/* Table of content option structure
+ */
+typedef struct MD_TOC_OPTIONS {
+ /* Specify the maximum level of heading to include in the table of contents.
+ * a value of 0 disable Table of content generation
+ */
+ int depth;
+
+ /* Specify a table of content placeholder.
+ *
+ * Providing a empty or NULL placeholder will output the TOC at document start.
+ */
+ const MD_CHAR* toc_placeholder;
+
+} MD_TOC_OPTIONS;
/* Parser structure.
*/
typedef struct MD_PARSER {
- /* Reserved. Set to zero.
+ /* Reserved. Set to 1.
*/
unsigned abi_version;
@@ -375,6 +394,12 @@ typedef struct MD_PARSER {
*/
void (*debug_log)(const char* /*msg*/, void* /*userdata*/);
+ /* Table of content parameters
+ *
+ *
+ */
+ MD_TOC_OPTIONS toc_options;
+
/* Reserved. Set to NULL.
*/
void (*syntax)(void);
diff --git a/test/heading-auto-identifier.txt b/test/heading-auto-identifier.txt
new file mode 100644
index 00000000..09969626
--- /dev/null
+++ b/test/heading-auto-identifier.txt
@@ -0,0 +1,163 @@
+
+# Heading auto identifiers
+
+With the flag `MD_FLAG_HEADINGAUTOID`, MD4C generate an identifier for a heading.
+
+```````````````````````````````` example
+# heading
+.
+
heading
+````````````````````````````````
+
+Spaces are replaced by `-` and uppercase are replaced by lower case
+
+```````````````````````````````` example
+# The Heading
+.
+
The Heading
+````````````````````````````````
+
+Unicode characters can also be put lower case
+
+```````````````````````````````` example
+# ĀĄŁŇŢŰŽבあИЯ𐒰
+.
+
ĀĄŁŇŢŰŽבあИЯ𐒰
+````````````````````````````````
+
+
+The non-alphanumeric characters are discarded except for `-.
+
+```````````````````````````````` example
+# The %@!= stupid _ heading !
+.
+
The %@!= stupid _ heading !
+````````````````````````````````
+
+As a result, you can get some empty heading with no identifier.
+
+```````````````````````````````` example
+# !
+.
+
!
+````````````````````````````````
+
+Heading starting with numbers are not treated differently
+
+```````````````````````````````` example
+# 1.1 The start
+.
+
1.1 The start
+````````````````````````````````
+
+Heading can contain link inside
+
+```````````````````````````````` example
+# Title with a [link](hidden) inside
+.
+
+````````````````````````````````
+
+Heading can contain wiki link inside but requiere the MD_FLAG_WIKILINKS
+
+```````````````````````````````` example
+# Title with a [[hidden-wiki|link]] inside
+.
+
Title with a [[hidden-wiki|link]] inside
+````````````````````````````````
+
+Heading can contain formatting
+
+```````````````````````````````` example
+# Title with *emphasis* inside
+.
+
Title with emphasis inside
+````````````````````````````````
+
+Heading can contain some emoji code like :emoji:, they are treated as normal text
+
+```````````````````````````````` example
+# emoji1 :+1:
+# emoji2 :-1:
+# emoji3 :100:
+.
+
emoji1 :+1:
+
emoji2 :-1:
+
emoji3 :100:
+````````````````````````````````
+
+But unicode emoji characters are stripped
+
+```````````````````````````````` example
+# emoji4 👍
+# emoji5 💯
+# the + sign
+.
+
emoji4 👍
+
emoji5 💯
+
the + sign
+````````````````````````````````
+
+Same heading get a suffix number.
+
+```````````````````````````````` example
+# title
+# title
+## title
+### title
+# Title
+# title
+# ti!tle
+# title
+# title
+# title
+# title
+# title
+.
+
title
+
title
+
title
+
title
+
Title
+
title
+
ti!tle
+
title
+
title
+
title
+
title
+
title
+````````````````````````````````
+
+# Coverage
+
+additional test to improve test coverage.
+
+No heading in a document
+
+```````````````````````````````` example
+no heading
+.
+
no heading
+````````````````````````````````
+
+Multi line heading require a link so it can contain a new line.
+
+```````````````````````````````` example
+Title with a [multi
+line
+link](link) inside
+======================
+.
+
\n)$")),
+ "heading realocation":
+ (("# A long title to trigger a reallocation\n"*(300+1)),
+ re.compile("^
A long title to trigger a reallocation
\n(
A long title to trigger a reallocation
\n){300}$"))
+}
+
+whitespace_re = re.compile('/s+/')
+passed = 0
+errored = 0
+failed = 0
+
+#print("Testing pathological cases:")
+for description in pathological:
+ (inp, regex) = pathological[description]
+ start = timer()
+ [rc, actual, err] = cmark.to_html(inp)
+ end = timer()
+ if rc != 0:
+ errored += 1
+ print('{:35} [ERRORED (return code %d)]'.format(description, rc))
+ print(err)
+ elif regex.search(actual):
+ print('{:35} [PASSED] {:.3f} secs'.format(description, end-start))
+ passed += 1
+ else:
+ print('{:35} [FAILED]'.format(description))
+ print(repr(actual))
+ failed += 1
+
+print("%d passed, %d failed, %d errored" % (passed, failed, errored))
+if (failed == 0 and errored == 0):
+ exit(0)
+else:
+ exit(1)
diff --git a/test/toc-mark.txt b/test/toc-mark.txt
new file mode 100644
index 00000000..68283728
--- /dev/null
+++ b/test/toc-mark.txt
@@ -0,0 +1,85 @@
+# Table of content mark
+
+The TOC mark allow to place the toc where you need it.
+Run the example with --toc=[[__TOC__]]
+
+```````````````````````````````` example
+# title
+# table of content
+[[__TOC__]]
+# some chapter
+.
+
title
+
table of content
+
+
some chapter
+````````````````````````````````
+
+Only the first mark is replaced by the TOC
+
+
+
+```````````````````````````````` example
+# title
+[[__TOC__]]
+[[__TOC__]]
+.
+
title
+
+
[[TOC]]
+````````````````````````````````
+
+The TOC mark must be alone at start of a line or it is invalid:
+
+```````````````````````````````` example
+# title
+invalid [[__TOC__]] mark
+.
+
+
title
+
invalid [[TOC]] mark
+
+````````````````````````````````
+
+But you can have space at start of a line:
+
+```````````````````````````````` example
+# title
+ [[__TOC__]] mark
+.
+
title
+
+````````````````````````````````
+
+The text after the TOC mark is discarded:
+
+```````````````````````````````` example
+# title
+[[__TOC__]] discarded text
+.
+
title
+
+````````````````````````````````
+
diff --git a/test/toc.txt b/test/toc.txt
new file mode 100644
index 00000000..a21fc788
--- /dev/null
+++ b/test/toc.txt
@@ -0,0 +1,104 @@
+# Table of content
+
+With the option `--table-of-content`, MD4C enables extension for output of
+toc.
+
+Basic toc may look as follows:
+
+```````````````````````````````` example
+# title
+.
+
+
title
+````````````````````````````````
+
+By default, the toc-depth is limited to heading of level 3
+
+```````````````````````````````` example
+# title level 1
+## title level 2
+### title level 3
+#### title level 4
+##### title level 5
+.
+
+
title level 1
+
title level 2
+
title level 3
+
title level 4
+
title level 5
+````````````````````````````````
+
+The toc can skip some level
+
+```````````````````````````````` example
+### title level 3
+# title level 1
+## title level 2
+##### title level 5
+### title level 3 again
+.
+
+
title level 3
+
title level 1
+
title level 2
+
title level 5
+
title level 3 again
+````````````````````````````````
+
+# Coverage
+
+Additional test to improve test coverage.
+
+This sample will output TOC with heading suffix numbers.
+
+
+```````````````````````````````` example
+# title
+## title
+### title
+.
+
+
title
+
title
+
title
+````````````````````````````````
\ No newline at end of file