Skip to content

Commit 4aa6133

Browse files
add ConvertOptions type to enable custom cmark options/extensions
1 parent d491147 commit 4aa6133

File tree

5 files changed

+192
-21
lines changed

5 files changed

+192
-21
lines changed

Sources/Markdown/Base/Document.swift

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,32 +38,52 @@ public extension Document {
3838
/// Parse a string into a `Document`.
3939
///
4040
/// - parameter string: the input Markdown text to parse.
41-
/// - parameter options: options for parsing Markdown text.
41+
/// - parameter options: options for parsing Markdown text, including
42+
/// Commonmark-specific options and extensions.
4243
/// - parameter source: an explicit source URL from which the input `string` came for marking source locations.
4344
/// This need not be a file URL.
44-
init(parsing string: String, source: URL? = nil, options: ParseOptions = []) {
45-
if options.contains(.parseBlockDirectives) {
45+
init(parsing string: String, source: URL? = nil, convertOptions options: ConvertOptions) {
46+
if options.parseOptions.contains(.parseBlockDirectives) {
4647
self = BlockDirectiveParser.parse(string, source: source,
4748
options: options)
4849
} else {
4950
self = MarkupParser.parseString(string, source: source, options: options)
5051
}
5152
}
5253

54+
/// Parse a string into a `Document`.
55+
///
56+
/// - parameter string: the input Markdown text to parse.
57+
/// - parameter options: options for parsing Markdown text.
58+
/// - parameter source: an explicit source URL from which the input `string` came for marking source locations.
59+
/// This need not be a file URL.
60+
init(parsing string: String, source: URL? = nil, options: ParseOptions = []) {
61+
self.init(parsing: string, source: source, convertOptions: .init(fromParseOptions: options))
62+
}
63+
5364
/// Parse a file's contents into a `Document`.
5465
///
5566
/// - parameter file: a file URL from which to load Markdown text to parse.
56-
/// - parameter options: options for parsing Markdown text.
57-
init(parsing file: URL, options: ParseOptions = []) throws {
67+
/// - parameter options: options for parsing Markdown text, including
68+
/// Commonmark-specific options and extensions.
69+
init(parsing file: URL, convertOptions options: ConvertOptions) throws {
5870
let string = try String(contentsOf: file)
59-
if options.contains(.parseBlockDirectives) {
71+
if options.parseOptions.contains(.parseBlockDirectives) {
6072
self = BlockDirectiveParser.parse(string, source: file,
6173
options: options)
6274
} else {
6375
self = MarkupParser.parseString(string, source: file, options: options)
6476
}
6577
}
6678

79+
/// Parse a file's contents into a `Document`.
80+
///
81+
/// - parameter file: a file URL from which to load Markdown text to parse.
82+
/// - parameter options: options for parsing Markdown text.
83+
init(parsing file: URL, options: ParseOptions = []) throws {
84+
try self.init(parsing: file, convertOptions: .init(fromParseOptions: options))
85+
}
86+
6787
/// Create a document from a sequence of block markup elements.
6888
init<Children: Sequence>(_ children: Children) where Children.Element == BlockMarkup {
6989
try! self.init(.document(parsedRange: nil, children.map { $0.raw.markup }))

Sources/Markdown/Parser/BlockDirectiveParser.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -615,7 +615,7 @@ private enum ParseContainer: CustomStringConvertible {
615615
/// Convert this container to the corresponding ``RawMarkup`` node.
616616
func convertToRawMarkup(ranges: inout RangeTracker,
617617
parent: ParseContainer?,
618-
options: ParseOptions) -> [RawMarkup] {
618+
options: ConvertOptions) -> [RawMarkup] {
619619
switch self {
620620
case let .root(children):
621621
let rawChildren = children.flatMap {
@@ -945,7 +945,7 @@ extension Document {
945945
///
946946
/// - Precondition: The `rootContainer` must be the `.root` case.
947947
fileprivate init(converting rootContainer: ParseContainer, from source: URL?,
948-
options: ParseOptions) {
948+
options: ConvertOptions) {
949949
guard case .root = rootContainer else {
950950
fatalError("Tried to convert a non-root container to a `Document`")
951951
}
@@ -968,14 +968,14 @@ extension Document {
968968
}
969969

970970
struct BlockDirectiveParser {
971-
static func parse(_ input: URL, options: ParseOptions = []) throws -> Document {
971+
static func parse(_ input: URL, options: ConvertOptions = .init()) throws -> Document {
972972
let string = try String(contentsOf: input, encoding: .utf8)
973973
return parse(string, source: input, options: options)
974974
}
975975

976976
/// Parse the input.
977977
static func parse(_ input: String, source: URL?,
978-
options: ParseOptions = []) -> Document {
978+
options: ConvertOptions = .init()) -> Document {
979979
// Phase 0: Split the input into lines lazily, keeping track of
980980
// line numbers, consecutive blank lines, and start positions on each line where indentation ends.
981981
// These trim points may be used to adjust the indentation seen by the CommonMark parser when

Sources/Markdown/Parser/CommonMarkConverter.swift

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -578,22 +578,18 @@ struct MarkupParser {
578578
return MarkupConversion(state: childConversion.state.next(), result: .tableCell(parsedRange: parsedRange, colspan: colspan, rowspan: rowspan, childConversion.result))
579579
}
580580

581-
static func parseString(_ string: String, source: URL?, options: ParseOptions) -> Document {
581+
static func parseString(_ string: String, source: URL?, options: ConvertOptions) -> Document {
582582
cmark_gfm_core_extensions_ensure_registered()
583583

584-
var cmarkOptions = CMARK_OPT_TABLE_SPANS
585-
if !options.contains(.disableSmartOpts) {
586-
cmarkOptions |= CMARK_OPT_SMART
584+
let parser = cmark_parser_new(options.commonmarkOptions.rawValue)
585+
586+
for ext in options.commonmarkExtensions {
587+
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension(ext))
587588
}
588-
589-
let parser = cmark_parser_new(cmarkOptions)
590-
591-
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("table"))
592-
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("strikethrough"))
593-
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("tasklist"))
589+
594590
cmark_parser_feed(parser, string, string.utf8.count)
595591
let rawDocument = cmark_parser_finish(parser)
596-
let initialState = MarkupConverterState(source: source, iterator: cmark_iter_new(rawDocument), event: CMARK_EVENT_NONE, node: nil, options: options, headerSeen: false, pendingTableBody: nil).next()
592+
let initialState = MarkupConverterState(source: source, iterator: cmark_iter_new(rawDocument), event: CMARK_EVENT_NONE, node: nil, options: options.parseOptions, headerSeen: false, pendingTableBody: nil).next()
597593
precondition(initialState.event == CMARK_EVENT_ENTER)
598594
precondition(initialState.nodeType == .document)
599595
let conversion = convertAnyElement(initialState)
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
/*
2+
This source file is part of the Swift.org open source project
3+
4+
Copyright (c) 2021 Apple Inc. and the Swift project authors
5+
Licensed under Apache License v2.0 with Runtime Library Exception
6+
7+
See https://swift.org/LICENSE.txt for license information
8+
See https://swift.org/CONTRIBUTORS.txt for Swift project authors
9+
*/
10+
11+
import cmark_gfm
12+
13+
/// Options to use when converting Markdown.
14+
public struct ConvertOptions {
15+
public let parseOptions: ParseOptions
16+
public let commonmarkOptions: CommonmarkOptions
17+
public let commonmarkExtensions: [String]
18+
19+
public init(parseOptions: ParseOptions, commonmarkOptions: CommonmarkOptions, extensions: [String]) {
20+
self.parseOptions = parseOptions
21+
self.commonmarkOptions = commonmarkOptions
22+
self.commonmarkExtensions = extensions
23+
}
24+
25+
public init(fromParseOptions options: ParseOptions) {
26+
var commonmarkOptions = ConvertOptions.defaultCommonmarkOptions
27+
if options.contains(.disableSmartOpts) {
28+
commonmarkOptions.remove(.smart)
29+
}
30+
self.init(
31+
parseOptions: options,
32+
commonmarkOptions: commonmarkOptions,
33+
extensions: ConvertOptions.defaultCommonmarkExtensions
34+
)
35+
}
36+
37+
public init() {
38+
self.init(fromParseOptions: ConvertOptions.defaultParseOptions)
39+
}
40+
41+
public static let defaultParseOptions: ParseOptions = []
42+
public static let defaultCommonmarkOptions: CommonmarkOptions = [
43+
.smart,
44+
.tableSpans,
45+
]
46+
public static let defaultCommonmarkExtensions: [String] = [
47+
"table",
48+
"strikethrough",
49+
"tasklist",
50+
]
51+
}
52+
53+
/// Options given to the Commonmark converter.
54+
public struct CommonmarkOptions: OptionSet {
55+
public var rawValue: Int32
56+
57+
public init(rawValue: Int32) {
58+
self.rawValue = rawValue
59+
}
60+
61+
/// The default Commonmark behavior, no special options.
62+
public static let `default` = CommonmarkOptions(rawValue: CMARK_OPT_DEFAULT)
63+
64+
/// Include a `data-sourcepos` element on all block elements.
65+
public static let sourcepos = CommonmarkOptions(rawValue: CMARK_OPT_SOURCEPOS)
66+
67+
/// Render `softbreak` elements as hard line breaks.
68+
public static let hardBreaks = CommonmarkOptions(rawValue: CMARK_OPT_HARDBREAKS)
69+
70+
/// Render raw HTML and unsafe links.
71+
///
72+
/// Unsafe links are `javascript:`, `vbscript:`, `file:`, and
73+
/// `data:`, except for `image/png`, `image/gif`, `image/jpeg`
74+
/// or `image/webp` MIME types. Without this option, raw HTML
75+
/// is replaced by a placeholder HTML comment. Unsafe links
76+
/// are replaced by empty strings.
77+
public static let unsafe = CommonmarkOptions(rawValue: CMARK_OPT_UNSAFE)
78+
79+
/// Render `softbreak` elements as spaces.
80+
public static let noBreaks = CommonmarkOptions(rawValue: CMARK_OPT_NOBREAKS)
81+
82+
/// Validate UTF-8 in the input before parsing, replacing illegal
83+
/// sequences with the replacement character `U+FFFD`.
84+
public static let validateUtf8 = CommonmarkOptions(rawValue: CMARK_OPT_VALIDATE_UTF8)
85+
86+
/// Convert straight quotes to curly, `---` to em dashes, `--` to en dashes.
87+
public static let smart = CommonmarkOptions(rawValue: CMARK_OPT_SMART)
88+
89+
/// Use GitHub-style `<pre lang="x">` tags for code blocks instead of
90+
/// `<pre><code class="language-x">`.
91+
public static let githubPreLang = CommonmarkOptions(rawValue: CMARK_OPT_GITHUB_PRE_LANG)
92+
93+
/// Be liberal in interpreting inline HTML tags.
94+
public static let liberalHtmlTag = CommonmarkOptions(rawValue: CMARK_OPT_LIBERAL_HTML_TAG)
95+
96+
/// Parse footnotes.
97+
public static let footnotes = CommonmarkOptions(rawValue: CMARK_OPT_FOOTNOTES)
98+
99+
/// Only parse strikethroughs if surrounded by exactly 2 tildes.
100+
///
101+
/// Strikethroughs are still only parsed when the `"strikethrough"`
102+
/// extension is enabled.
103+
public static let strikethroughDoubleTilde = CommonmarkOptions(rawValue: CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE)
104+
105+
/// Use style attributes to align table cells instead of align attributes.
106+
public static let tablePreferStyleAttributes = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES)
107+
108+
/// Include the remainder of the info string in code blocks in
109+
/// a separate attribute.
110+
public static let fullInfoString = CommonmarkOptions(rawValue: CMARK_OPT_FULL_INFO_STRING)
111+
112+
/// Parse only inline markdown directives. Block directives will not be
113+
/// parsed (their literal representations will remain in the output).
114+
public static let inlineOnly = CommonmarkOptions(rawValue: CMARK_OPT_INLINE_ONLY)
115+
116+
/// Parse the markdown input without removing preceding/trailing whitespace and
117+
/// without converting newline characters to breaks.
118+
///
119+
/// Using this option also enables the `CMARK_OPT_INLINE_ONLY` option.
120+
// FIXME: the original `CMARK_OPT_PRESERVE_WHITESPACE` isn't available to the swift compiler?
121+
public static let preserveWhitespace = CommonmarkOptions(rawValue: (1 << 19) | CMARK_OPT_INLINE_ONLY)
122+
123+
/// Enable the row- and column-span syntax in the tables extension.
124+
public static let tableSpans = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_SPANS)
125+
126+
/// Use a "ditto mark" (`"`) instead of a caret (`^`) to indicate row-spans in the tables extension.
127+
public static let tableRowspanDitto = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_ROWSPAN_DITTO)
128+
}

Tests/MarkdownTests/Parsing/CommonMarkConverterTests.swift

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,31 @@ class CommonMarkConverterTests: XCTestCase {
3333
let document = Document(parsing: text, source: nil, options: [.parseBlockDirectives, .parseSymbolLinks])
3434
XCTAssertEqual(expectedDump, document.debugDescription(options: .printSourceLocations))
3535
}
36+
37+
/// Test using a custom set of Commonmark options to convert Markdown.
38+
func testCustomOpts() {
39+
let text = "~This is not strikethrough~ -- but ~~this is strikethrough~~."
40+
41+
// Because the "smart" option is not set, the `--` should not be converted
42+
// to an en-dash.
43+
let expectedDump = """
44+
Document @1:1-1:62
45+
└─ Paragraph @1:1-1:62
46+
├─ Text @1:1-1:36 "~This is not strikethrough~ -- but "
47+
├─ Strikethrough @1:36-1:61
48+
│ └─ Text @1:38-1:59 "this is strikethrough"
49+
└─ Text @1:61-1:62 "."
50+
"""
51+
52+
let document = Document(
53+
parsing: text,
54+
source: nil,
55+
convertOptions: .init(
56+
parseOptions: ConvertOptions.defaultParseOptions,
57+
commonmarkOptions: .strikethroughDoubleTilde,
58+
extensions: ConvertOptions.defaultCommonmarkExtensions
59+
)
60+
)
61+
XCTAssertEqual(expectedDump, document.debugDescription(options: .printSourceLocations))
62+
}
3663
}

0 commit comments

Comments
 (0)