From d997d4078deb63ccc9a7a05d273617d63d7b3fc1 Mon Sep 17 00:00:00 2001
From: Evgeny Gagauz <evgenij.gagauz@gmail.com>
Date: Mon, 1 Aug 2016 23:37:00 +0300
Subject: [PATCH 1/3] Empty lines do not break options sections.

There are several reasons why an options section can be split by an
empty line, among them are:
- logically grouping;
- aesthetic reasons.

Such style is used, for example, in 'man' program.

An example:
    Options:
      --before-empty-lines  An option before empty lines.

      --after-empty-lines   An option after empty lines.
---
 docopt.cpp       |  4 ++--
 testcases.docopt | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/docopt.cpp b/docopt.cpp
index e875d2f..67d222d 100644
--- a/docopt.cpp
+++ b/docopt.cpp
@@ -168,8 +168,8 @@ static std::vector<std::string> parse_section(std::string const& name, std::stri
 	std::regex const re_section_pattern {
 		"(?:^|\\n)"  // anchored at a linebreak (or start of string)
 		"("
-		   "[^\\n]*" + name + "[^\\n]*(?=\\n?)" // a line that contains the name
-		   "(?:\\n[ \\t].*?(?=\\n|$))*"         // followed by any number of lines that are indented
+		   "[^\\n]*" + name + "[^\\n]*(?=\\n?)" // a line that contains the section name
+		   "(?:\\n+[ \\t].*?(?=\\n|$))*"        // followed by any number of indented or empty lines
 		")",
 		std::regex::icase
 	};
diff --git a/testcases.docopt b/testcases.docopt
index efe9a07..3954d33 100644
--- a/testcases.docopt
+++ b/testcases.docopt
@@ -955,3 +955,21 @@ other options:
 """
 $ prog --baz --egg
 {"--foo": false, "--baz": true, "--bar": false, "--egg": true, "--spam": false}
+
+
+# An empty line must not break an options section.
+r"""
+Usage: prog [options]
+
+Options:
+  --before-empty-lines  An option before empty lines.
+
+
+  --after-empty-lines   An option after empty lines.
+"""
+
+$ prog --before-empty-lines
+{"--before-empty-lines": true, "--after-empty-lines": false}
+
+$ prog --after-empty-line
+{"--before-empty-lines": false, "--after-empty-lines": true}

From fb64b5c7c712a14e481eca2a3ce5b0b9fab71084 Mon Sep 17 00:00:00 2001
From: Evgeny Gagauz <evgenij.gagauz@gmail.com>
Date: Fri, 5 Aug 2016 00:48:14 +0300
Subject: [PATCH 2/3] Regex in parse_section() is aligned with proposal for
 docopt.

The proposed changes for docopt can be found in the pull request
[339](https://github.com/docopt/docopt/pull/339).
---
 docopt.cpp | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/docopt.cpp b/docopt.cpp
index 67d222d..554bcc2 100644
--- a/docopt.cpp
+++ b/docopt.cpp
@@ -161,16 +161,20 @@ std::vector<T*> flat_filter(Pattern& pattern) {
 }
 
 static std::vector<std::string> parse_section(std::string const& name, std::string const& source) {
+	// There is no a multiline strings concept in std::regex, therefore the symbols `^` and `$` match
+	// only once at the start and at the end of a string, even if this string contains new line
+	// characters. For this reason, following constructions are used instead:
+	// (?:^|\\n) - start of a line;
+	// (?=\\n|$) - end of a line.
 	// ECMAScript regex only has "?=" for a non-matching lookahead. In order to make sure we always have
 	// a newline to anchor our matching, we have to avoid matching the final newline of each grouping.
-	// Therefore, our regex is adjusted from the docopt Python one to use ?= to match the newlines before
-	// the following lines, rather than after.
 	std::regex const re_section_pattern {
-		"(?:^|\\n)"  // anchored at a linebreak (or start of string)
-		"("
-		   "[^\\n]*" + name + "[^\\n]*(?=\\n?)" // a line that contains the section name
-		   "(?:\\n+[ \\t].*?(?=\\n|$))*"        // followed by any number of indented or empty lines
-		")",
+		"(?:^|\\n)("          // A section begins at start of a line and consists of:
+		  ".*" + name + ".*"  //  - a line that contains the section's name; and
+		  "(?:"               //  - several
+		    "\\n+[ \\t].*"    // indented lines possibly separated by empty lines.
+		  ")*"
+		")(?=\\n|$)",         // The section ends at the end of a line.
 		std::regex::icase
 	};
 

From 1c4b042c4ac0e37696ca608768cab94d21ea2e8f Mon Sep 17 00:00:00 2001
From: Evgeny Gagauz <evgenij.gagauz@gmail.com>
Date: Fri, 5 Aug 2016 02:31:01 +0300
Subject: [PATCH 3/3] Wildcards are repalced with `[^\\n]` as a workaround for
 Boost.Regex.

The wildcard `.` matches any single character including the newline
character in Boost.Regex. So, `[^\\n]` construction is used instead.
---
 docopt.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/docopt.cpp b/docopt.cpp
index 554bcc2..fcdc07e 100644
--- a/docopt.cpp
+++ b/docopt.cpp
@@ -166,15 +166,19 @@ static std::vector<std::string> parse_section(std::string const& name, std::stri
 	// characters. For this reason, following constructions are used instead:
 	// (?:^|\\n) - start of a line;
 	// (?=\\n|$) - end of a line.
+	//
 	// ECMAScript regex only has "?=" for a non-matching lookahead. In order to make sure we always have
 	// a newline to anchor our matching, we have to avoid matching the final newline of each grouping.
+	//
+	// The wildcard `.` matches any single character including the newline character in Boost.Regex. So,
+	// `[^\\n]` construction is used instead.
 	std::regex const re_section_pattern {
-		"(?:^|\\n)("          // A section begins at start of a line and consists of:
-		  ".*" + name + ".*"  //  - a line that contains the section's name; and
-		  "(?:"               //  - several
-		    "\\n+[ \\t].*"    // indented lines possibly separated by empty lines.
+		"(?:^|\\n)("                    // A section begins at start of a line and consists of:
+		  "[^\\n]*" + name + "[^\\n]*"  //  - a line that contains the section's name; and
+		  "(?:"                         //  - several
+		    "\\n+[ \\t][^\\n]*"         // indented lines possibly separated by empty lines.
 		  ")*"
-		")(?=\\n|$)",         // The section ends at the end of a line.
+		")(?=\\n|$)",                   // The section ends at the end of a line.
 		std::regex::icase
 	};