diff --git a/doc/settings.md b/doc/settings.md
index b09c9ae129..3cc9d9f349 100644
--- a/doc/settings.md
+++ b/doc/settings.md
@@ -5,21 +5,21 @@ graph TB
subgraph Settings loading order
direction LR
A1["1"] --- B1("Init hardcoded <include=.../> list")
- B1 --> C1("Take <include=.../> list from the $VTM_CONFIG value or file it referencing")
- C1 --> G1("Take <include=.../> list from the received DirectVT packet")
- G1 --> H1("Take <include=.../> list from the --config' CLI option value or file it referencing")
+ B1 --> C1("Take <include=.../> file list from the $VTM_CONFIG value or from the file it directly references")
+ C1 --> G1("Take <include=.../> file list from the received DirectVT packet")
+ G1 --> H1("Take <include=.../> file list from the --config' CLI option value or from the file it directly references")
direction LR
- A2["2"] --- B2("Overlay <config/> subsections from the resultant <include=.../> list")
- B2 --> C2("Overlay <config/> subsection from the $VTM_CONFIG value or file it referencing")
- C2 --> G2("Overlay <config/> subsection from the received DirectVT packet")
- G2 --> H2("Overlay <config/> subsection from the --config' CLI option value or file it referencing")
+ A2["2"] --- B2("Overlay settings from the resultant <include=.../> file list")
+ B2 --> C2("Overlay settings from the $VTM_CONFIG value or from the file it directly references")
+ C2 --> G2("Overlay settings from the received DirectVT packet")
+ G2 --> H2("Overlay settings from the --config' CLI option value or from the file it directly references")
end
```
## TL;DR
-The settings are stored in a Pure 'XML' which is an XML-like format, storing a hierarchical list of key=value pairs.
-See [`/src/vtm.xml`](../src/vtm.xml) for reference.
+The settings are stored in the "Pure XML" file format, which looks like classical XML but with dynamic element refrencing and templating.
+See [`/src/vtm.xml`](../src/vtm.xml) for example.
We call the text data in the settings file "plain XML data" even though our file format is not technically XML, but only visually resembles it.
@@ -30,11 +30,11 @@ There are two predefined settings source locations and this can be changed as ne
```
The process of loading settings consists of the following steps:
-- Build an ordered list of the setting source files by looking for the root `` subsections.
-- Overlay the XML data from the source files in the specified order.
-- Overlay the XML data from the value of the `$VTM_CONFIG` environment variable or from a settings file it references.
-- Overlay the XML data from the DirectVT config payload received from the parent process.
-- Overlay the XML data from the specified `--config <...>` CLI option value or from a settings file it referencing.
+- Build an ordered list of the setting source files by looking for the root `` elements.
+- Overlay settings from the source files in the specified order.
+- Overlay settings from the value of the `$VTM_CONFIG` environment variable or from a settings file it directly references.
+- Overlay settings from the DirectVT config payload received from the parent process.
+- Overlay settings from the specified `--config <...>` CLI option value or from a settings file it directly references.
The file list is built in the following order from the following sources:
- The settings file list from the hardcoded configuration containing a list of two files:
@@ -44,14 +44,14 @@ The file list is built in the following order from the following sources:
...
```
-- The settings file list from the `$VTM_CONFIG` environment variable value or from a settings file it referencing.
+- The settings file list from the `$VTM_CONFIG` environment variable value or from a settings file it directly references.
- A case with a plain XML-data:
- `$VTM_CONFIG=...` - Clear the current file list and begin a new file list containing a single file '/path/to/override_defaults.xml'.
- `$VTM_CONFIG=...` - Append the current file list with the files '/path/to/first.xml' and '/path/to/second.xml'.
- A case with a file reference:
- `$VTM_CONFIG='/path/to/override_defaults.xml'` - Take the file list from the '/path/to/override_defaults.xml'.
- The settings file list from the DirectVT config received from the parent process.
-- The settings file list from the specified `--config <...>` CLI option value or from a settings file it referencing.
+- The settings file list from the specified `--config <...>` CLI option value or from a settings file it directly references.
- A case with a plain XML-data:
- `./vtm --config "..."` - Clear the current file list and begin a new file list containing a single file '/path/to/override_defaults.xml/'.
- A case with a file reference:
@@ -59,9 +59,11 @@ The file list is built in the following order from the following sources:
## Pure XML
-### Key differences from classical XML
+### Differences from classical XML
- - Document encoding is UTF-8.
+Pure XML is based on the XML 1.1 standard, with the following exceptions:
+
+ - Document encoding is UTF-8 only.
- Any Unicode characters are allowed, including the U+0000 (null) character.
- There is no support for named XML character entities.
- The stored data forms a hierarchical list of `name=value` pairs.
@@ -71,21 +73,24 @@ The file list is built in the following order from the following sources:
- `<... name="value" />`, `<...> "value" `, and `<...> ` have the same meaning.
- The XML-attribute `param` in `` and the XML-element `param` in ` ` are semantically identical sub-elements of the `name` element.
- No spaces are allowed between the opening angle bracket and the element name:
- - `... < name ...`, `... <= ...`, `... << ...` are treated as parts of the element's value content.
+ - `... < name ...` should not be treated as an opening tag.
- Every element has its own text value.
- For example, `` - the `name` element has the text value `names_value`, and its `param` sub-element has the text value `params_value`.
- All stored values are strings (the data requester decides on its side how to interpret it):
- `name=2000` and `name="2000"` have the same meaning.
- All value strings, except those that begin with a decimal digit character (ASCII `0` - `9`), must be quoted with either double or single quotes (`"` U+0022 or `'` U+0027).
- The value string can be fragmented. Fragments can be located after the equal sign following the element name, as well as between the opening and closing tags.
- - The fragments located between the opening and closing tags can be either quoted or in raw form. The quoted form sets strict boundaries for the string value. The raw form pulls all characters between the opening and closing tags, including line breaks.
- - The following compact syntax for elements is allowed:
+ - The fragments located between the opening and closing tags can be either quoted or in raw form. The quoted form sets strict boundaries for the string value. The raw form pulls all characters between the opening and closing tags, excluding trailing whitespaces (whitespaces immediately before a nested opening tag or an element's closing tag).
+ - The following compact syntax for element declaration is allowed:
- `` and `` have the same meaning.
- - Elements can reference any element using relative and absolute references, in the form of an unquoted name or an XML path to the referenced element.
+ - Elements can reference any other elements using relative and absolute references, in the form of an unquoted name or an XML path to the referenced element.
- `thing2` refers to the value `/node1/thing1` in ``.
- `thing2` refers to the value `thing1` within the scope of ``.
+ - Each element forms its own namespace.
+ - The value of an element containing relative references is obtained by traversing the element's namespace and all its surrounding namespaces until the first hit.
+ - A recursive reference is a reference encountered during the resolving of another reference.
+ - All recursive references are resolved starting from the element's namespace, regardless of where the recursive references are encountered.
- Circular references are silently ignored.
- - //todo describe the reference resolution order.
- The element reference includes all of the element's contents, including the element's value and all nested elements.
- The element's content may include any number of substrings, as well as references to other elements, combined in the required order using the vertical bar character ASCII 0x7C `|`.
- `` and `` have the same meaning.
diff --git a/src/netxs/desktopio/application.hpp b/src/netxs/desktopio/application.hpp
index f13659ad5a..0ae936233c 100644
--- a/src/netxs/desktopio/application.hpp
+++ b/src/netxs/desktopio/application.hpp
@@ -22,7 +22,7 @@ namespace netxs::app
namespace netxs::app::shared
{
- static const auto version = "v2025.05.28";
+ static const auto version = "v2025.06.03";
static const auto repository = "https://github.com/directvt/vtm";
static const auto usr_config = "~/.config/vtm/settings.xml"s;
static const auto sys_config = "/etc/vtm/settings.xml"s;
@@ -699,35 +699,33 @@ namespace netxs::app::shared
}
return faux;
}
- auto attach_file_list(xml::document& defcfg, xml::document& cfg)
+ auto attach_file_list(txts& file_list, xml::document& src_cfg)
{
- if (cfg)
+ auto file_ptr_list = src_cfg.take_ptr_list("/include");
+ if (file_ptr_list.size())
{
- auto file_list = cfg.take_ptr_list("/include");
- if (file_list.size())
+ log("%%Update settings source files from %src%", prompt::apps, src_cfg.page.file);
+ for (auto& file_ptr : file_ptr_list)
{
- log("%%Update settings source files from %src%", prompt::apps, cfg.page.file);
- for (auto& file : file_list)
+ if (file_ptr->base)
{
- if (file && !file->base)
- {
- log("%%%file%", prompt::pads, file->_concat_values());
- }
+ file_list.clear();
+ }
+ auto file_path = file_ptr->_concat_values();
+ if (file_path.size())
+ {
+ log("%%%file%", prompt::pads, file_path);
+ file_list.emplace_back(std::move(file_path));
}
- defcfg.attach("/", file_list);
}
}
}
- auto overlay_config(xml::document& defcfg, xml::document& cfg)
+ auto overlay_config(xml::document& def_cfg, xml::document& src_cfg)
{
- if (cfg)
+ if (src_cfg)
{
- auto config_data = cfg.take_ptr_list("/");
- if (config_data.size())
- {
- log(prompt::pads, "Merging settings from ", cfg.page.file);
- defcfg.overlay(config_data.front(), "");
- }
+ log(prompt::pads, "Merging settings from ", src_cfg.page.file);
+ def_cfg.combine_item(src_cfg.root_ptr);
}
}
auto settings(qiew cliopt, bool print = faux)
@@ -740,6 +738,7 @@ namespace netxs::app::shared
auto envcfg = xml::document{};
auto dvtcfg = xml::document{};
auto clicfg = xml::document{};
+ auto file_list = txts{};
auto show_cfg = [&](auto& cfg){ if (print && cfg) log("%source%:\n%config%", cfg.page.file, cfg.page.show()); };
@@ -781,16 +780,15 @@ namespace netxs::app::shared
show_cfg(clicfg);
}
- attach_file_list(defcfg, envcfg);
- attach_file_list(defcfg, dvtcfg);
- attach_file_list(defcfg, clicfg);
+ attach_file_list(file_list, defcfg);
+ attach_file_list(file_list, envcfg);
+ attach_file_list(file_list, dvtcfg);
+ attach_file_list(file_list, clicfg);
- auto config_sources = defcfg.take_ptr_list("/include");
- for (auto& file_rec : config_sources) if (file_rec && !file_rec->base) // Overlay configs from the specified sources if it is.
+ for (auto& file_path : file_list) // Overlay configs from the specified sources if it is.
{
- auto src_file = file_rec->_concat_values();
auto src_conf = xml::document{};
- load_from_file(src_conf, src_file);
+ load_from_file(src_conf, file_path);
show_cfg(src_conf);
overlay_config(defcfg, src_conf);
}
@@ -799,7 +797,7 @@ namespace netxs::app::shared
overlay_config(defcfg, dvtcfg);
overlay_config(defcfg, clicfg);
- auto resultant = xml::settings{ defcfg };
+ auto resultant = xml::settings{ std::move(defcfg) };
return resultant;
}
}
diff --git a/src/netxs/desktopio/utf.hpp b/src/netxs/desktopio/utf.hpp
index 93b62e4cba..7a59b9477e 100644
--- a/src/netxs/desktopio/utf.hpp
+++ b/src/netxs/desktopio/utf.hpp
@@ -18,7 +18,7 @@ namespace netxs
using txts = std::vector;
using namespace std::literals;
- static constexpr auto whitespaces = " \n\r\t"sv;
+ static constexpr auto whitespaces = " \t\r\n\v\f"sv;
static constexpr auto onlydigits = "0123456789"sv;
static constexpr auto alphabetic = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"sv;
static constexpr auto base64code = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
@@ -1164,6 +1164,21 @@ namespace netxs::utf
}
return from.substr(0, s_size);
}
+ void replace_all(view utf8, view what, view to, text& dest)
+ {
+ auto last = 0_sz;
+ if (!what.empty() && utf8.length() >= what.length())
+ {
+ auto spot = 0_sz;
+ while ((spot = utf8.find(what, last)) != text::npos)
+ {
+ dest += utf8.substr(last, spot - last);
+ dest += to;
+ last = spot + what.size();
+ }
+ }
+ dest += utf8.substr(last);
+ }
void replace_all(text& utf8, auto const& from, auto const& to)
{
auto frag = view{ from };
@@ -1366,6 +1381,19 @@ namespace netxs::utf
return crop;
}
}
+ auto split2(view utf8, char delimiter, auto proc)
+ {
+ auto cur = 0_sz;
+ auto pos = 0_sz;
+ while ((pos = utf8.find(delimiter, cur)) != text::npos)
+ {
+ auto frag = view{ utf8.data() + cur, pos - cur };
+ if (!proc(frag, faux)) return faux;
+ cur = pos + 1;
+ }
+ auto end = view{ utf8.data() + cur, utf8.size() - cur };
+ return proc(end, true);
+ }
template>>
auto split(view utf8, char delimiter, P proc)
{
@@ -1852,18 +1880,28 @@ namespace netxs::utf
{
utf::trim_back_if(utf8, [&](char c){ return delims.find(c) == text::npos; });
}
- // utf: Trim the utf8 front and return trims.
- auto pop_front_chars(view& utf8, view delims)
+ // utf: Trim the utf8 while any of delims front and return trims.
+ auto pop_front_chars(view& utf8, view while_any_of)
{
auto temp = utf8;
- utf::trim_front(utf8, delims);
+ utf::trim_front(utf8, while_any_of);
return temp.substr(0, temp.size() - utf8.size());
}
- // utf: Trim the utf8 back and return trims.
- auto pop_back_chars(view& utf8, view delims)
+ // utf: Trim the utf8 front until any of delims is found.
+ template
+ void pop_front_until(view& utf8, auto until_any_of)
+ {
+ auto head = utf8.begin();
+ auto tail = utf8.end();
+ auto stop = utf::find_char(head, tail, until_any_of);
+ auto prefix_len = stop - head;
+ utf8.remove_prefix(prefix_len);
+ }
+ // utf: Trim the utf8 back while any of delims and return trims.
+ auto pop_back_chars(view& utf8, view while_any_of)
{
auto temp = utf8;
- utf::trim_back(utf8, delims);
+ utf::trim_back(utf8, while_any_of);
return temp.substr(utf8.size());
}
@@ -2063,29 +2101,28 @@ namespace netxs::utf
return std::pair{ qiew{}, utf8 };
}
}
+ // utf: Trim utf8 until any of delims is found, and return trims.
template
auto take_front(view& utf8, view delims)
{
- auto head = utf8.begin();
- auto tail = utf8.end();
- auto stop = find_char(head, tail, delims);
- if (stop == tail)
+ auto temp = qiew{ utf8 };
+ utf::pop_front_until(utf8, delims);
+ if (utf8.empty()) // If not found.
{
if constexpr (Lazy)
{
- utf8 = {};
- return qiew{ utf8 };
+ return qiew{};
}
else
{
- auto crop = qiew{ utf8 };
- utf8 = {};
- return crop;
+ return temp;
}
}
- auto str = qiew{ head, stop };
- utf8.remove_prefix(str.size());
- return str;
+ else
+ {
+ auto crop = temp.substr(0, temp.size() - utf8.size());
+ return crop;
+ }
}
template
auto take_front(view& utf8, std::tuple const& delims)
@@ -2168,6 +2205,7 @@ namespace netxs::utf
}
return args;
}
+ // utf: Remove utf8 tail until any of delims (including delim).
auto eat_tail(view& utf8, view delims)
{
auto head = utf8.begin();
diff --git a/src/netxs/desktopio/xml.hpp b/src/netxs/desktopio/xml.hpp
index 32b95b29c7..9f213c4834 100644
--- a/src/netxs/desktopio/xml.hpp
+++ b/src/netxs/desktopio/xml.hpp
@@ -225,19 +225,20 @@ namespace netxs::xml
struct document
{
- enum class type
+ enum type
{
na, // Start of file
eof, // End of file
eol, // End of line
- top_token, // Open tag name
- end_token, // Close tag name
+ top_token, // Opening tag name
+ end_token, // Closing tag name
token, // Tag name
raw_text, // ex: raw text
quotes, // '"' ex: " or '
quoted_text, // '"' ex: " text "
begin_tag, // '<' ex:
+ comment, // '' ex: ... -->
close_inline, // '>' ex: ... >
@@ -251,6 +252,8 @@ namespace netxs::xml
lua_op_less_eq,// '<=' ex: Lua's less than or equal operator
compact, // '/[^>]' ex: Compact syntax:
spaces, // ' ' ex: \s\t\r\n...
+ insA, // '' ex: Attribute form insertion point.
+ insB, // '' ex: Block form insertion point.
unknown, //
tag_value, // Quoted value. ex: object="value"
tag_numvalue, // Value begins with digit. ex: object=123ms
@@ -261,115 +264,99 @@ namespace netxs::xml
error, // Inline error message.
};
- struct literal;
- using fptr = netxs::sptr;
- using heap = std::vector;
+ // whitespaces ws = [ \t\r\n\v\f]* // " \t\r\n\v\f"
+ // asterisk = [*] // ASCII 0x2A '*'
+ // equal = [=] // ASCII 0x3D '='
+ // digits = [0-9] // ASCII decimal digits
+ // markup = [[:whitespaces:]!"#$%&'()*+,/;<=>?@\[\\\]^`{|}~] // the characters forbidden for name
+ // name = [^-.[:digits:][:markup:]][^[:markup:]]* // alphanumeric literal begining with non-digit, '-'(minus) and '.'(period)
+ // numeric_value = [[:digits:]][^[:markup:]]* // alphanumeric literal begining with digit
+ // double_quoted = "(\\.|[^\\"])*" // "quoted'text"
+ // single_quoted = '(\\.|[^\\'])*' // 'quoted"text'
+ // quoted_text = double_quoted | single_quoted // "quoted'text" or 'quoted"text'
+ // reference = /?([:name:])(/[:name:])* // path to the element, relative or absolute
+ // single_value = reference | quoted_text | numeric_value // one of
+ // union = (whitespaces)* [|] (whitespaces)* // element union operator
+ // value = single_value([:union:] single_value)* // combined values
+ // comment = // commentary block
+ // comments = ((whitespaces)* comment)*$ // comments at the end of line excluding the LF character
+ // name_value_pair = (whitespaces)* name(asterisk)?((equal) value)? // name=value pair
+ // raw_text = (?<=[>])(.+?)*(?= <[:name:] | [:name:] | "sv;
+ static constexpr auto raw_end = ""sv;
+ {
+ auto [dst_vbeg, dst_vend] = *dst_segment_iter++;
+ auto prev_is_raw = 0; // 0: uninitialized.
+ while (src_segment_iter != item.value_segments.end())
+ {
+ auto& [src_vbeg, src_vend] = *src_segment_iter++;
+ auto next_src_vbeg = std::next(src_vbeg);
+ if (next_src_vbeg != src_vend)
+ {
+ auto next_is_raw = (si32)(next_src_vbeg->kind == type::raw_text) + 1; // 0: uninitialized, 1: not a raw, 2: a raw text.
+ if (prev_is_raw && prev_is_raw != next_is_raw) // Insert either raw_begin or raw_end.
+ {
+ frag_list.insert(dst_vend, literal(type::comment, next_is_raw == 2 ? raw_begin : raw_end)); // Sync the first segment, the stripe of frags that is located right after the equal sign.
+ //todo insert formatting spaces
+ //...
+ prev_is_raw = next_is_raw;
+ }
+ frag_list.splice(dst_vend, item.frag_list, next_src_vbeg, src_vend); // Sync the first segment, the stripe of frags that is located right after the equal sign.
+ }
+ }
+ }
+ // Remove unused empty segments. Leave only two placeholders: near the equal sign and for the outer value.
+ while (dst_segment_iter != value_segments.end())
+ {
+ auto& [dst_vbeg, dst_vend] = *dst_segment_iter++;
+ frag_list.erase(dst_vbeg, std::next(dst_vend));
+ }
+ value_segments.resize(2);
}
}
- //todo sync suit
- //
- // inline:
- //vbeg
- //type::tag_reference
- //type::quoted_text
- //type::tag_numvalue
- //vend
- //
- // outside:
- //insB
- //type::raw_reference
- //type::raw_quoted
- //type::raw_text
- //type::unknown
}
}
- auto snapshot()
+ auto snapshot() const
{
auto crop = text{};
+ auto size = arch{};
auto head = from;
- while (head)
+ auto tail = std::next(upto);
+ while (head == tail)
{
- crop += head->utf8;
- if (head == upto) break;
- head = head->next;
+ auto& frag = *head++;
+ size += frag.utf8.size();
}
- if (crop.starts_with('\n')
- || crop.starts_with('\r'))
+ crop.reserve(size);
+ head = from;
+ while (head == tail)
+ {
+ auto& frag = *head++;
+ crop += frag.utf8;
+ }
+ if (crop.starts_with('\n') || crop.starts_with('\r')) // Normalize indents.
{
auto temp = view{ crop };
- auto dent = text{ utf::pop_front_chars(temp, whitespaces) };
- crop = temp;
- utf::replace_all(crop, dent, "\n");
+ auto dent = utf::pop_front_chars(temp, whitespaces);
+ if (dent.size() > sizeof('\n'))
+ {
+ crop.clear(); // We can do this because the capacity is not released (de facto), remains the same, and the string decreases.
+ utf::replace_all(temp, dent, "\n", crop);
+ }
}
return crop;
}
};
- static constexpr auto find_start = "<"sv;
- static constexpr auto rawtext_delims = std::tuple{ " "sv, "/>"sv, ">"sv, "<"sv, "\n"sv, "\r"sv, "\t"sv };
- static constexpr auto reference_delims = std::tuple_cat(rawtext_delims, std::tuple{ "|"sv, "\'"sv, "\""sv, "="sv });
- static constexpr auto token_delims = " \t\n\r=*/><"sv;
- static constexpr auto view_comment_begin = ""sv;
- static constexpr auto view_close_tag = ""sv;
- static constexpr auto view_begin_tag = "<"sv;
- static constexpr auto view_empty_tag = "/>"sv;
- static constexpr auto view_slash = "/"sv;
- static constexpr auto view_compact = "/"sv;
- static constexpr auto view_close_inline = ">"sv;
- static constexpr auto view_quoted_text = "\""sv;
- static constexpr auto view_quoted_text_2 = "\'"sv;
- static constexpr auto view_equal = "="sv;
- static constexpr auto view_new_list = "*"sv;
- static constexpr auto view_lua_op_shl = "<<"sv;
- static constexpr auto view_lua_op_less = "< "sv;
- static constexpr auto view_lua_op_less_eq = "<="sv;
- static constexpr auto view_tag_joiner = "|"sv;
-
- suit page;
- sptr root;
-
- document() = default;
- document(document&&) = default;
- document(view data, view file = {})
- : page{ file },
- root{ ptr::shared()}
+ struct parser
{
- read(data);
- }
- operator bool () const { return root ? !root->hive.empty() : faux; }
+ static constexpr auto view_find_start = "<"sv;
+ static constexpr auto view_token_first = " \t\r\n\v\f!\"#$%&'()*+<=>?@[\\]^`{|}~;,/-.0123456789"sv; // Element name cannot contain any of [[:whitespaces:]!"#$%&'()*+,/;<=>?@[\]^`{|}~], and cannot begin with "-", ".", or a numeric digit.
+ static constexpr auto view_token_delims = " \t\r\n\v\f!\"#$%&'()*+<=>?@[\\]^`{|}~;,/"sv;
+ static constexpr auto view_reference_delims = " \t\r\n\v\f!\"#$%&'()*+<=>?@[\\]^`{|}~;,"sv;
+ static constexpr auto view_digit_delims = " \t\r\n\v\f!\"#$%&'()*+<=>?@[\\]^`{|}~/"sv; // Allow ';' and ',' between digits: (123;456).
+ static constexpr auto view_comment_begin = ""sv;
+ static constexpr auto view_close_tag = ""sv;
+ static constexpr auto view_begin_tag = "<"sv;
+ static constexpr auto view_empty_tag = "/>"sv;
+ static constexpr auto view_slash = "/"sv;
+ static constexpr auto view_compact = "/"sv;
+ static constexpr auto view_close_inline = ">"sv;
+ static constexpr auto view_quoted_text = "\""sv;
+ static constexpr auto view_quoted_text_2 = "\'"sv;
+ static constexpr auto view_equal = "="sv;
+ static constexpr auto view_new_list = "*"sv;
+ static constexpr auto view_lua_op_shl = "<<"sv;
+ static constexpr auto view_lua_op_less = "< "sv;
+ static constexpr auto view_lua_op_less_eq = "<="sv;
+ static constexpr auto view_tag_joiner = "|"sv;
- void load(view data, view file = {})
- {
- page.init(file);
- root = ptr::shared();
- read(data);
- }
- template
- auto take_ptr_list(view path)
- {
- auto item_ptr_list = vect{};
- if (root)
+ sptr& root_ptr;
+ suit& page;
+ view& data;
+ view temp;
+ type what;
+ type last;
+ vect compacted;
+
+ auto append(type kind, view utf8 = {}, bool ignore_if_empty = faux)
{
- utf::trim(path, '/');
- if (path.empty())
- {
- item_ptr_list.push_back(root);
- }
- else
+ if (!ignore_if_empty || utf8.size())
{
- root->get_list3(path, item_ptr_list);
+ page.frag_list.push_back({ kind, utf8 });
}
+ return std::prev(page.frag_list.end());
}
- return item_ptr_list;
- }
- auto join(view path, vect const& list)
- {
- utf::trim(path, '/');
- auto [parent_path, branch_path] = utf::split_back(path, '/');
- auto dest_hosts = take_ptr_list(parent_path);
- auto parent_ptr = dest_hosts.size() ? dest_hosts.front() : root;
- if (parent_ptr->mode == elem::form::pact)
- {
- log("%%Destination path is not suitable for merging '%parent_path%'", prompt::xml, parent_path);
- return;
- }
- auto& hive = parent_ptr->hive;
- auto iter = hive.find(branch_path);
- if (iter == hive.end())
- {
- iter = hive.emplace(branch_path , vect{}).first;
- }
- auto& dest_list = iter->second;
- if (dest_list.size() && dest_list.front()->base == faux) // Start a new list if the existing list was not declared as a list using an asterisk.
+ void fail_msg(text msg)
{
- dest_list.clear();
+ page.fail = true;
+ append(type::error, msg);
+ log("%%%msg% at %page.file%:%lines%", prompt::xml, msg, page.file, page.lines());
}
- for (auto& item_ptr : list) if (item_ptr && item_ptr->name->utf8 == branch_path)
+ void fail()
{
- //todo unify
- if (item_ptr->base)
+ auto str = [](type what)
{
- dest_list.clear();
- }
- auto mode = item_ptr->mode;
- auto from = item_ptr->from;
- auto upto = item_ptr->upto;
- auto next = upto->next;
- if (auto gate = mode == elem::form::attr ? parent_ptr->insA : parent_ptr->insB)
- if (auto prev = gate->prev.lock())
- if (auto past = from->prev.lock())
- {
- from->prev = prev;
- upto->next = gate;
- gate->prev = upto;
- prev->next = from;
- past->next = next; // Release an element from the previous list.
- if (next) next->prev = past;
- item_ptr->boss = parent_ptr;
- dest_list.push_back(item_ptr);
- if (mode != elem::form::attr) // Prepend '\n <' to item when inserting it to gate==insB.
+ switch (what)
{
- if (from->utf8.empty()) // Checking indent. Take indent from parent + pads if it is absent.
- {
- if (parent_ptr->from->utf8.empty()) // Most likely this is the root namespace.
- {
- from->utf8 = "\n";
- }
- else // Ordinary nested item.
- {
- from->utf8 = parent_ptr->from->utf8 + " ";
- }
- }
- //todo revise
- if (from->next && from->next->kind == type::begin_tag) // Checking begin_tag.
- {
- auto shadow = view{ from->next->utf8 };
- if (utf::pop_front_chars(shadow, whitespaces).empty()) // Set it to '<' if it is absent.
- {
- from->next->utf8 = "<";
- }
- }
+ case type::na: return view{ "{START}" } ;
+ case type::eof: return view{ "{EOF}" } ;
+ case type::eol: return view{ "{EOL}" } ;
+ case type::token: return view{ "{token}" } ;
+ case type::raw_text: return view{ "{raw text}" } ;
+ case type::compact: return view{ "{compact}" } ;
+ case type::tag_reference: return view{ "{reference}" };
+ case type::raw_reference: return view{ "{reference}" };
+ case type::tag_value: return view{ "{value}" } ;
+ case type::comment: return view{ "{comment}" } ;
+ case type::quoted_text: return view_quoted_text ;
+ case type::raw_quoted: return view_quoted_text ;
+ case type::begin_tag: return view_begin_tag ;
+ case type::close_tag: return view_close_tag ;
+ case type::comment_begin: return view_comment_begin ;
+ case type::comment_close: return view_comment_close ;
+ case type::close_inline: return view_close_inline ;
+ case type::empty_tag: return view_empty_tag ;
+ case type::equal: return view_equal ;
+ case type::new_list: return view_new_list ;
+ case type::lua_op_shl: return view_lua_op_shl ;
+ case type::lua_op_less: return view_lua_op_less ;
+ case type::lua_op_less_eq: return view_lua_op_less_eq ;
+ default: return view{ "{unknown}" } ;
}
- continue;
- }
- log("%%Unexpected format for item '%parent_path%/%item->name->utf8%'", prompt::xml, parent_path, item_ptr->name->utf8);
+ };
+ fail_msg(ansi::add("Unexpected '", str(what), "' after '", str(last), "'"));
}
- }
- // xml: Attach the item list to the specified path.
- void attach(view mount_point, vect const& sub_list)
- {
- auto dest_list = take_ptr_list(mount_point);
- if (dest_list.size())
+ auto peek()
{
- auto& parent_ptr = dest_list.front();
- if (parent_ptr->mode == elem::form::pact)
+ last = what;
+ if (temp.empty()) what = type::eof;
+ else if (temp.starts_with(view_comment_begin)) what = type::comment_begin;
+ else if (last == type::na && temp.starts_with(view_begin_tag))
{
- log("%%Destination path is not suitable for merging '%parent_path%'", prompt::xml, mount_point);
- return;
+ if (temp.starts_with(view_close_tag)) what = type::close_tag;
+ else what = type::begin_tag;
}
- auto& parent_hive = parent_ptr->hive;
- auto connect = [&](auto& subitem_name)
+ else if (temp.starts_with(view_close_tag )) what = type::close_tag;
+ else if (temp.starts_with(view_begin_tag )) what = type::begin_tag;
+ else if (temp.starts_with(view_empty_tag )) what = type::empty_tag;
+ else if (temp.starts_with(view_close_inline )) what = type::close_inline;
+ else if (temp.starts_with(view_slash ))
{
- auto iter = parent_hive.find(subitem_name);
- if (iter == parent_hive.end())
- {
- iter = parent_hive.emplace(subitem_name, vect{}).first;
- }
- return iter;
- };
- auto iter = connect(sub_list.front()->name->utf8);
- for (auto& item_ptr : sub_list)
+ if (last == type::token) what = type::compact;
+ else what = type::raw_text;
+ }
+ else if (temp.starts_with(view_quoted_text )
+ || temp.starts_with(view_quoted_text_2)) what = type::quoted_text;
+ else if (temp.starts_with(view_equal )) what = type::equal;
+ else if (temp.starts_with(view_tag_joiner )
+ && (last == type::quoted_text
+ || last == type::tag_value
+ || last == type::tag_reference)) what = type::tag_joiner;
+ else if (temp.starts_with(view_new_list )
+ && last == type::token) what = type::new_list;
+ else if (whitespaces.find(temp.front()) != view::npos) what = type::spaces;
+ else if (view_token_first.find(temp.front()) == view::npos
+ && (last == type::close_tag
+ || last == type::begin_tag
+ || last == type::token
+ || last == type::new_list
+ || last == type::tag_value
+ || last == type::tag_reference
+ || last == type::compact
+ || last == type::quoted_text)) what = type::token;
+ else what = type::raw_text;
+ }
+ auto skip()
+ {
+ switch (what)
{
- auto& current_item_name = iter->first;
- auto& subitem_name = sub_list.front()->name->utf8;
- if (current_item_name != subitem_name) // The case when the list is heterogeneous.
- {
- iter = connect(subitem_name);
- }
- //todo unify
- auto& dest_list2 = iter->second;
- if (item_ptr->base)
- {
- dest_list2.clear();
- }
- auto mode = item_ptr->mode;
- auto from = item_ptr->from;
- auto upto = item_ptr->upto;
- auto next = upto->next;
- if (auto gate = mode == elem::form::attr ? parent_ptr->insA : parent_ptr->insB)
- if (auto prev = gate->prev.lock())
- if (auto past = from->prev.lock())
- {
- from->prev = prev;
- upto->next = gate;
- gate->prev = upto;
- prev->next = from;
- past->next = next; // Release an element from the previous list.
- if (next) next->prev = past;
- item_ptr->boss = parent_ptr;
- dest_list2.push_back(item_ptr);
- continue;
- }
- log("%%Unexpected format for item '%mount_point%%item%'", prompt::xml, mount_point, item_ptr->name->utf8);
+ case type::comment_begin: temp.remove_prefix(view_comment_begin.size()); break;
+ case type::comment_close: temp.remove_prefix(view_comment_close.size()); break;
+ case type::close_tag: temp.remove_prefix(view_close_tag .size()); break;
+ case type::begin_tag: temp.remove_prefix(view_begin_tag .size()); break;
+ case type::empty_tag: temp.remove_prefix(view_empty_tag .size()); break;
+ case type::close_inline: temp.remove_prefix(view_close_inline .size()); break;
+ case type::quoted_text: temp.remove_prefix(view_quoted_text .size()); break;
+ case type::equal: temp.remove_prefix(view_equal .size()); break;
+ case type::new_list: temp.remove_prefix(view_new_list .size()); break;
+ case type::tag_joiner: temp.remove_prefix(view_tag_joiner .size()); break;
+ case type::compact: temp.remove_prefix(view_compact .size()); break;
+ case type::token:
+ case type::top_token:
+ case type::end_token: utf::take_front(temp, view_reference_delims); break;
+ case type::raw_text: utf::take_front(temp, view_find_start); break;
+ case type::tag_numvalue:
+ case type::tag_reference: utf::take_front(temp, view_reference_delims); break;
+ case type::quotes:
+ case type::tag_value: utf::take_quote(temp, temp.front()); break;
+ case type::spaces: utf::trim_front(temp, whitespaces); break;
+ case type::na: utf::take_front(temp, view_find_start); break;
+ case type::unknown:
+ default:
+ temp.remove_prefix(std::min(1, (si32)temp.size()));
+ break;
}
}
- else
+ void append_prepending_spaces()
{
- log("%%Destination path not found '%mount_point%'", prompt::xml, mount_point);
+ append(type::spaces, data - temp); // Prepending spaces.
}
- }
- void overlay(sptr item_ptr, text path = {})
- {
- auto& item = *item_ptr;
- auto& name = item.name->utf8;
- path += "/" + name;
- auto dest_list = take_ptr_list(path);
- auto is_dest_list = (dest_list.size() && dest_list.front()->base) || dest_list.size() > 1;
- if (is_dest_list || dest_list.empty())
+ void peek_forward()
{
- join(path, { item_ptr });
+ data = temp;
+ utf::trim_front(temp, whitespaces);
+ peek();
}
- else
+ auto take_pair(sptr& item_ptr, type kind)
{
- auto& dest_ptr = dest_list.front();
- dest_ptr->sync_value(item);
- for (auto& [sub_name, sub_list] : item.hive) // Proceed subelements.
+ append_prepending_spaces();
+ item_ptr->name = append(kind, utf::take_front(temp, view_token_delims));
+ peek_forward();
+ if (what == type::new_list)
{
- auto count = sub_list.size();
- if (count == 1 && sub_list.front()->base == faux)
- {
- overlay(sub_list.front(), path);
- }
- else if (count) // It is a list.
- {
- join(path + "/" + sub_name, sub_list);
- }
- else
+ append_prepending_spaces();
+ item_ptr->base = true;
+ append(type::new_list, utf::pop_front(temp, view_new_list.size()));
+ peek_forward();
+ }
+ if (what == type::equal)
+ {
+ append_prepending_spaces();
+ auto vbeg_ptr = append(type::value_begin);
+ append(type::equal, utf::pop_front(temp, view_equal.size()));
+ peek_forward();
+ auto not_empty = true;
+ do
{
- log("%%Unexpected tag without data: %tag%", prompt::xml, sub_name);
+ if (what == type::quoted_text) // #quoted_text
+ {
+ append_prepending_spaces();
+ what = type::tag_value;
+ auto delim = temp.front();
+ auto delim_view = view(&delim, 1);
+ append(type::quotes, delim_view);
+ auto frag_ptr = append(type::quoted_text, utf::take_quote(temp, delim));
+ append(type::quotes, delim_view);
+ item_ptr->body.push_back(frag_ptr);
+ peek_forward();
+ }
+ else if (what == type::raw_text) // Expected reference or number.
+ {
+ append_prepending_spaces();
+ what = type::tag_value;
+ auto is_digit = netxs::onlydigits.find(temp.front()) != text::npos;
+ if (is_digit) // #number
+ {
+ auto frag_ptr = append(type::tag_numvalue, utf::take_front(temp, view_digit_delims));
+ item_ptr->body.push_back(frag_ptr);
+ }
+ else // #reference
+ {
+ auto temp2 = temp;
+ utf::take_front(temp2, view_token_delims);
+ while (temp2.size() > 1 && temp2[0] == '/' && view_token_first.find(temp2[1]) == view::npos) // Take all reference segments.
+ {
+ utf::pop_front(temp2, 2); // Pop '/' and the first valid letter of the name.
+ utf::take_front(temp2, view_token_delims); // Pop 'token'.
+ }
+ auto frag_ptr = append(type::tag_reference, temp - temp2);
+ item_ptr->body.push_back(frag_ptr);
+ temp = temp2;
+ }
+ peek_forward();
+ }
+ else
+ {
+ fail();
+ break;
+ }
+ not_empty = what == type::tag_joiner;
+ if (not_empty) // Eat tag_joiner.
+ {
+ append_prepending_spaces();
+ append(type::tag_joiner, utf::pop_front(temp, view_tag_joiner.size()));
+ peek_forward();
+ }
}
+ while (not_empty);
+ auto vend_ptr = append(type::value_end);
+ item_ptr->value_segments.push_back({ vbeg_ptr, vend_ptr });
}
- }
- }
-
- private:
- vect compacted;
- void fail(text msg)
- {
- page.fail = true;
- page.append(type::error, msg);
- log("%%%msg% at %page.file%:%lines%", prompt::xml, msg, page.file, page.lines());
- }
- void fail(type last, type what)
- {
- auto str = [&](type what)
- {
- switch (what)
+ else if (what != type::compact) // Add placeholder for absent value.
{
- case type::na: return view{ "{START}" } ;
- case type::eof: return view{ "{EOF}" } ;
- case type::eol: return view{ "{EOL}" } ;
- case type::token: return view{ "{token}" } ;
- case type::raw_text: return view{ "{raw text}" } ;
- case type::compact: return view{ "{compact}" } ;
- case type::tag_reference: return view{ "{reference}" };
- case type::raw_reference: return view{ "{reference}" };
- case type::tag_value: return view{ "{value}" } ;
- case type::quoted_text: return view_quoted_text ;
- case type::raw_quoted: return view_quoted_text ;
- case type::begin_tag: return view_begin_tag ;
- case type::close_tag: return view_close_tag ;
- case type::comment_begin: return view_comment_begin ;
- case type::comment_close: return view_comment_close ;
- case type::close_inline: return view_close_inline ;
- case type::empty_tag: return view_empty_tag ;
- case type::equal: return view_equal ;
- case type::new_list: return view_new_list ;
- case type::lua_op_shl: return view_lua_op_shl ;
- case type::lua_op_less: return view_lua_op_less ;
- case type::lua_op_less_eq: return view_lua_op_less_eq ;
- default: return view{ "{unknown}" } ;
+ auto vbeg_ptr = append(type::value_begin);
+ auto vend_ptr = append(type::value_end);
+ item_ptr->value_segments.push_back({ vbeg_ptr, vend_ptr });
}
- };
- fail(ansi::add("Unexpected '", str(what), "' after '", str(last), "'"));
- }
- auto peek(view& data, type& what, type& last)
- {
- last = what;
- if (data.empty()) what = type::eof;
- else if (data.starts_with(view_comment_begin)) what = type::comment_begin;
- else if (last == type::na && data.starts_with(view_begin_tag))
- {
- if (data.starts_with(view_close_tag)) what = type::close_tag;
- else what = type::begin_tag;
- }
- else if (data.starts_with(view_close_tag )) what = type::close_tag;
- else if (data.starts_with(view_begin_tag )) what = type::begin_tag;
- else if (data.starts_with(view_empty_tag )) what = type::empty_tag;
- else if (data.starts_with(view_close_inline )) what = type::close_inline;
- else if (data.starts_with(view_slash ))
- {
- if (last == type::token) what = type::compact;
- else what = type::raw_text;
- }
- else if (data.starts_with(view_quoted_text )
- || data.starts_with(view_quoted_text_2)) what = type::quoted_text;
- else if (data.starts_with(view_equal )) what = type::equal;
- else if (data.starts_with(view_tag_joiner )
- && (last == type::quoted_text
- || last == type::tag_value
- || last == type::tag_reference)) what = type::tag_joiner;
- else if (data.starts_with(view_new_list )
- && last == type::token) what = type::new_list;
- else if (whitespaces.find(data.front()) != view::npos) what = type::spaces;
- else if (last == type::close_tag
- || last == type::begin_tag
- || last == type::token
- || last == type::new_list
- || last == type::raw_text
- || last == type::tag_value
- || last == type::tag_reference
- || last == type::compact
- || last == type::quoted_text) what = type::token;
- else what = type::raw_text;
- }
- auto skip(view& data, type kind)
- {
- auto temp = data;
- switch (kind)
- {
- case type::comment_begin: data.remove_prefix(view_comment_begin.size()); break;
- case type::comment_close: data.remove_prefix(view_comment_close.size()); break;
- case type::close_tag: data.remove_prefix(view_close_tag .size()); break;
- case type::begin_tag: data.remove_prefix(view_begin_tag .size()); break;
- case type::empty_tag: data.remove_prefix(view_empty_tag .size()); break;
- case type::close_inline: data.remove_prefix(view_close_inline .size()); break;
- case type::quoted_text: data.remove_prefix(view_quoted_text .size()); break;
- case type::equal: data.remove_prefix(view_equal .size()); break;
- case type::new_list: data.remove_prefix(view_new_list .size()); break;
- case type::tag_joiner: data.remove_prefix(view_tag_joiner .size()); break;
- case type::compact: data.remove_prefix(view_compact .size()); break;
- case type::token:
- case type::top_token:
- case type::end_token: utf::eat_tail(data, token_delims); break;
- case type::raw_text: utf::take_front(data, rawtext_delims); break;
- case type::tag_numvalue:
- case type::tag_reference: utf::take_front(data, reference_delims); break;
- case type::quotes:
- case type::tag_value: utf::take_quote(data, data.front()); break;
- case type::spaces: utf::trim_front(data, whitespaces); break;
- case type::na: utf::take_front(data, find_start); break;
- case type::unknown:
- default:
- data.remove_prefix(std::min(1, (si32)data.size()));
- break;
}
- return temp.substr(0, temp.size() - data.size());
- }
- auto take_pair(sptr& item_ptr, view& data, type& what, type& last, type kind)
- {
- item_ptr->name = page.append( kind, utf::take_front(data, token_delims));
- page.append_if_nonempty(type::spaces, utf::pop_front_chars(data, whitespaces));
- peek(data, what, last);
- if (what == type::new_list)
+ auto take_comment()
{
- item_ptr->base = true;
- page.append( type::new_list, utf::pop_front(data, view_new_list.size()));
- page.append_if_nonempty(type::spaces, utf::pop_front_chars(data, whitespaces));
- peek(data, what, last);
+ append_prepending_spaces();
+ append(type::comment_begin, utf::pop_front(temp, view_comment_begin.size()));
+ auto size = temp.find(view_comment_close);
+ if (size == view::npos)
+ {
+ append(type::unknown, temp);
+ data = {};
+ temp = {};
+ last = what;
+ what = type::eof;
+ return faux;
+ }
+ append(type::comment, utf::pop_front(temp, size));
+ append(type::comment_close, utf::pop_front(temp, view_comment_close.size()));
+ peek_forward();
+ return true;
}
- if (what == type::equal)
+ auto pull_comments()
{
- page.append( type::equal, utf::pop_front(data, view_equal.size()));
- page.append_if_nonempty(type::spaces, utf::pop_front_chars(data, whitespaces));
- item_ptr->vbeg = page.append( type::value_begin);
- peek(data, what, last);
- auto not_empty = true;
- do
+ while (true) // Pull inline comments if it is: ... ...
{
- if (what == type::quoted_text)
- {
- what = type::tag_value;
- // #quoted_text
- auto delim = data.front();
- auto delim_view = view(&delim, 1);
- page.append(type::quotes, delim_view);
- auto frag_ptr = page.append(type::quoted_text, utf::take_quote(data, delim));
- page.append(type::quotes, delim_view);
- item_ptr->body.push_back(frag_ptr);
- }
- else if (what == type::raw_text) // Expected reference or number.
+ auto idle = data - temp;
+ if (idle.find('\n') == text::npos && what == type::comment_begin
+ && take_comment())
{
- auto is_digit = netxs::onlydigits.find(data.front()) != text::npos;
- what = is_digit ? type::tag_numvalue
- : type::tag_reference;
- // #reference or number
- auto frag_ptr = page.append(what, utf::take_front(data, reference_delims));
- item_ptr->body.push_back(frag_ptr);
+ continue;
}
else
{
- fail(last, what);
break;
}
- page.append_if_nonempty(type::spaces, utf::pop_front_chars(data, whitespaces));
- peek(data, what, last);
- not_empty = what == type::tag_joiner;
- if (not_empty) // Eat tag_joiner.
- {
- page.append(type::tag_joiner, utf::pop_front(data, view_tag_joiner.size()));
- page.append_if_nonempty(type::spaces, utf::pop_front_chars(data, whitespaces));
- peek(data, what, last);
- }
}
- while (not_empty);
- item_ptr->vend = page.append(type::value_end);
- }
- else if (what != type::compact) // Add placeholder for absent value.
- {
- page.append(type::equal);
- item_ptr->vbeg = page.append(type::value_begin);
- item_ptr->vend = page.append(type::value_end);
}
- }
- auto open(sptr& item_ptr)
- {
- if (!page.data || page.back->kind != type::spaces)
+ void push(sptr& item_ptr, sptr& nested_ptr)
{
- page.append(type::spaces);
+ auto& nested_name = nested_ptr->name->utf8;
+ item_ptr->hive[nested_name].push_back(nested_ptr);
+ nested_ptr->parent_wptr = item_ptr;
}
- item_ptr->from = page.back;
- }
- auto seal(sptr& item_ptr)
- {
- item_ptr->upto = page.back;
- }
- auto note(view& data, type& what, type& last)
- {
- auto size = data.find(view_comment_close);
- if (size == view::npos)
+ void read_subsections_and_close(sptr& item_ptr, si32& deep)
{
- page.append(type::unknown, data);
- data = {};
- last = what;
- what = type::eof;
- return faux;
- }
- size += view_comment_close.size();
- page.append(type::comment_begin, utf::pop_front(data, size));
- return true;
- }
- void push(sptr& item_ptr, sptr& nested_ptr)
- {
- auto& nested_name = nested_ptr->name->utf8;
- item_ptr->hive[nested_name].push_back(nested_ptr);
- nested_ptr->boss = item_ptr;
- }
- void read_subsections(sptr& item_ptr, view& data, type& what, type& last, si32& deep)
- {
- do
- {
- auto temp = data;
- utf::trim_front(temp, whitespaces);
- //auto p = std::vector{ std::tuple{ 0, what, last, temp }};
- peek(temp, what, last);
- while (what != type::close_tag && what != type::eof)
+ do
{
- //p.push_back(std::tuple{ 1, what, last, temp });
- if (what == type::quoted_text)
+ auto inside_value = faux;
+ auto vbeg_ptr = page.frag_list.end();
+ while (what != type::close_tag && what != type::eof)
{
- page.append_if_nonempty(type::spaces, data - temp);
- data = temp;
- // #quoted_text
- auto delim = data.front();
- auto delim_view = view(&delim, 1);
- page.append(type::quotes, delim_view);
- auto frag_ptr = page.append(type::raw_quoted, utf::take_quote(data, delim));
- page.append(type::quotes, delim_view);
- item_ptr->body.push_back(frag_ptr);
-
- page.append_if_nonempty(type::spaces, utf::pop_front_chars(data, whitespaces));
- temp = data;
- }
- else if (what == type::tag_joiner)
- {
- page.append_if_nonempty(type::spaces, data - temp);
- page.append(type::tag_joiner, utf::pop_front(temp, view_tag_joiner.size()));
- page.append_if_nonempty(type::spaces, utf::pop_front_chars(temp, whitespaces));
- data = temp;
- peek(temp, what, last);
- if (what == type::quoted_text)
+ if (what == type::quoted_text) // #quoted_text
{
+ if (!inside_value)
+ {
+ inside_value = true;
+ vbeg_ptr = append(type::value_begin);
+ }
+ append_prepending_spaces();
+ auto delim = temp.front();
+ auto delim_view = view(&delim, 1);
+ append(type::quotes, delim_view);
+ auto frag_ptr = append(type::raw_quoted, utf::take_quote(temp, delim));
+ append(type::quotes, delim_view);
+ item_ptr->body.push_back(frag_ptr);
+ peek_forward();
+ if (what != type::tag_joiner)
+ {
+ auto vend_ptr = append(type::value_end);
+ item_ptr->value_segments.push_back({ vbeg_ptr, vend_ptr });
+ inside_value = faux;
+ }
continue;
}
- auto is_reference = what == type::raw_text && netxs::onlydigits.find(temp.front()) == text::npos; // Only literal raw text is allowed as a reference name.
- if (is_reference)
+ else if (what == type::tag_joiner && inside_value)
{
- what = type::tag_reference;
- // #reference
- auto frag_ptr = page.append(type::raw_reference, utf::take_front(data, reference_delims));
- item_ptr->body.push_back(frag_ptr);
- page.append_if_nonempty(type::spaces, utf::pop_front_chars(data, whitespaces));
- temp = data;
- }
- else
- {
- fail(last, what);
- break;
+ append_prepending_spaces();
+ append(type::tag_joiner, utf::pop_front(temp, view_tag_joiner.size()));
+ peek_forward();
+ if (what != type::quoted_text)
+ {
+ auto is_reference = what == type::raw_text && netxs::onlydigits.find(temp.front()) == text::npos; // Only literal raw text is allowed as a reference name.
+ if (!is_reference)
+ {
+ fail();
+ break;
+ }
+ // #reference
+ what = type::tag_reference;
+ append_prepending_spaces();
+ auto frag_ptr = append(type::raw_reference, utf::take_front(temp, view_reference_delims));
+ item_ptr->body.push_back(frag_ptr);
+ peek_forward();
+ if (what != type::tag_joiner)
+ {
+ auto vend_ptr = append(type::value_end);
+ item_ptr->value_segments.push_back({ vbeg_ptr, vend_ptr });
+ inside_value = faux;
+ }
+ }
}
- }
- else if (what == type::raw_text)
- {
- auto iter = utf::find_char_except_skips(temp, '<', view_lua_op_shl, view_lua_op_less, view_lua_op_less_eq);
- if (iter != temp.end())
+ else if ((what == type::raw_text || what == type::tag_joiner) && !inside_value)
{
- auto spaces_len = data.size() - temp.size();
- auto rest_text_len = iter - temp.begin();
- auto size = spaces_len + rest_text_len;
- // #raw_text
- auto frag_ptr = page.append(type::raw_text, utf::pop_front(data, size));
- item_ptr->body.push_back(frag_ptr);
- temp = data;
+ while (what == type::raw_text && temp.size()) // Iterate until ([^<] | <(?![:name:]) | (?![:name:]) | <(?!!--))*
+ {
+ utf::pop_front_until(temp, '<');
+ if (temp.size() > 3)
+ {
+ if (temp[1] == '/')
+ {
+ if (view_token_first.find(temp[2]) == view::npos) // Closing tag 'body.push_back(frag_ptr2);
+ item_ptr->value_segments.push_back({ vbeg_ptr2, vend_ptr2 });
+ data.remove_prefix(raw_block.size()); // data = temp - trailing_spaces;
+ }
+ else // Unexpected end of data.
+ {
+ auto frag_ptr = append(type::unknown, data);
+ item_ptr->body.push_back(frag_ptr);
+ temp = {};
+ data = {};
+ last = what;
+ what = type::eof;
+ break;
+ }
}
- else // Unexpected end of data.
+ else if (what == type::begin_tag && deep < 30)
{
- auto frag_ptr = page.append(type::unknown, data);
- item_ptr->body.push_back(frag_ptr);
- data = {};
- last = what;
- what = type::eof;
- break;
+ auto nested_ptr = ptr::shared(page.frag_list);
+ read_node(nested_ptr, deep + 1);
+ push(item_ptr, nested_ptr);
}
- }
- else if (what == type::begin_tag && deep < 30)
- {
- page.append_if_nonempty(type::spaces, data - temp);
- data = temp;
- auto nested_ptr = ptr::shared();
- what = read_node(nested_ptr, data, deep + 1);
- push(item_ptr, nested_ptr);
- temp = data;
- utf::trim_front(temp, whitespaces);
- }
- else if (what == type::comment_begin) // Proceed ' ...
- {
- auto temp = data;
- auto idle = utf::pop_front_chars(temp, whitespaces);
- auto w = what;
- auto l = last;
- peek(temp, w, l);
- if (idle.find('\n') == text::npos && w == type::comment_begin)
- {
- data = temp;
- what = w;
- last = l;
- page.append(type::spaces, idle);
- if (note(data, what, last)) continue;
- }
- break;
- }
+ append_prepending_spaces();
+ item_ptr->mode = elem::form::flat;
+ item_ptr->insA = append(type::insA);
+ last = type::spaces;
+ append(type::empty_tag, utf::pop_front(temp, view_empty_tag.size()));
+ peek_forward();
+ pull_comments();
}
else if (compacted.empty() && what == type::close_inline) // Proceed '>' nested subs.
{
- item_ptr->insA = last == type::spaces ? page.back
- : page.append(type::spaces);
- page.append(type::close_inline, utf::pop_front(data, view_close_inline.size()));
- read_subsections(item_ptr, data, what, last, deep);
+ append_prepending_spaces();
+ item_ptr->insA = append(type::insA);
+ append(type::close_inline, utf::pop_front(temp, view_close_inline.size()));
+ peek_forward();
+ read_subsections_and_close(item_ptr, deep);
}
else
{
@@ -1377,53 +1210,251 @@ namespace netxs::xml
{
fire = true;
}
+
+ if (item_ptr->name == page.frag_list.end())
+ {
+ auto head = page.frag_list.rbegin();//back;
+ while (true) // Reverse find a broken open tag and mark all after it as an unknown data.
+ {
+ auto& frag = *head;
+ auto& kind = frag.kind;
+ frag.kind = type::unknown;
+ if (head == page.frag_list.rend() || kind == type::begin_tag) break;
+ ++head;
+ }
+ item_ptr->name = append(type::tag_value);
+ fail_msg("Empty tag name");
+ }
+ if (fire)
+ {
+ fail();
+ }
+ if (what == type::eof)
+ {
+ append(type::eof);
+ }
+ item_ptr->seal();
+ while (!compacted.empty()) // Close compact nodes.
+ {
+ item_ptr = compacted.back();
+ item_ptr->seal();
+ compacted.pop_back();
+ }
}
- else
- {
- fire = true;
+
+ parser(sptr& root_ptr, suit& page, view& data)
+ : root_ptr{ root_ptr },
+ page{ page },
+ data{ data },
+ temp{ data },
+ what{ type::na },
+ last{ type::na }
+ {
+ root_ptr = ptr::shared(page.frag_list);
+ append(type::spaces);
+ root_ptr->open();
+ root_ptr->mode = elem::form::node;
+ root_ptr->name = append(type::na);
+ root_ptr->insB = append(type::insB);
+ if (data.size())
+ {
+ utf::trim_front(temp, whitespaces);
+ peek();
+ auto deep = 0;
+ read_subsections_and_close(root_ptr, deep);
+ }
+ append_prepending_spaces();
+ root_ptr->seal();
+ if (page.fail)
+ {
+ log("%%Inconsistent xml data from %file%:\n%config%\n", prompt::xml, page.file.empty() ? "memory"sv : page.file, page.show());
+ }
}
- if (!item_ptr->name)
+ };
+
+ suit page;
+ sptr root_ptr;
+
+ document(document&&) = default;
+ document(view utf8 = {}, view file = {})
+ : page{ file }
+ {
+ parser{ root_ptr, page, utf8 };
+ }
+ operator bool () const { return root_ptr ? !root_ptr->hive.empty() : faux; }
+
+ void load(view utf8, view file = {})
+ {
+ page.init(file);
+ parser{ root_ptr, page, utf8 };
+ }
+ template
+ auto take_direct_ptr_list(sptr node_ptr, qiew path_str, vect& crop)
+ {
+ utf::trim(path_str, '/');
+ utf::split2(path_str, '/', [&](qiew branch, bool is_end)
+ {
+ if (auto iter = node_ptr->hive.find(branch); iter != node_ptr->hive.end())
+ {
+ auto& item_ptr_list = iter->second;
+ if (is_end)
+ {
+ crop.reserve(item_ptr_list.size());
+ for (auto& item_ptr : item_ptr_list)
+ {
+ if constexpr (WithTemplate) crop.push_back(item_ptr);
+ else if (!item_ptr->base) crop.push_back(item_ptr);
+ }
+ }
+ else if (item_ptr_list.size() && item_ptr_list.front())
+ {
+ node_ptr = item_ptr_list.front();
+ return true;
+ }
+ }
+ return faux;
+ });
+ }
+ template
+ auto take_ptr_list(view path)
+ {
+ auto item_ptr_list = vect{};
+ if (root_ptr)
{
- auto head = page.back;
- while (true) // Reverse find a broken open tag and mark all after it as an unknown data.
+ utf::trim(path, '/');
+ if (path.empty())
+ {
+ item_ptr_list.push_back(root_ptr);
+ }
+ else
{
- auto kind = head->kind;
- head->kind = type::unknown;
- if (head == page.data || kind == type::begin_tag) break;
- head = head->prev.lock();
+ take_direct_ptr_list(root_ptr, path, item_ptr_list);
}
- item_ptr->name = page.append(type::tag_value);
- fail("Empty tag name");
}
- if (fire)
+ return item_ptr_list;
+ }
+ auto combine_list(vect const& list, view path)
+ {
+ utf::trim(path, '/');
+ auto [parent_path, branch_path] = utf::split_back(path, '/');
+ auto dest_hosts = take_ptr_list(parent_path);
+ auto parent_ptr = dest_hosts.size() ? dest_hosts.front() : root_ptr;
+ if (parent_ptr->mode == elem::form::pact)
+ {
+ log("%%Destination path is not suitable for merging '%parent_path%'", prompt::xml, parent_path);
+ return;
+ }
+ auto& hive = parent_ptr->hive;
+ auto iter = hive.find(branch_path);
+ if (iter == hive.end())
{
- fail(last, what);
+ iter = hive.emplace(branch_path , vect{}).first;
}
- if (what == type::eof)
+ auto& dest_list = iter->second;
+ if (dest_list.size() && dest_list.front()->base == faux) // Start a new list if the existing list was not declared as a list using an asterisk.
{
- page.append(type::eof);
+ for (auto& dest_item_ptr : dest_list)
+ {
+ auto from = dest_item_ptr->from;
+ auto upto = dest_item_ptr->upto;
+ page.frag_list.erase(from, std::next(upto));
+ }
+ dest_list.clear();
}
- seal(item_ptr);
- while (!compacted.empty()) // Close compact nodes.
+ for (auto& item_ptr : list) if (item_ptr && item_ptr->name->utf8 == branch_path)
{
- item_ptr = compacted.back();
- seal(item_ptr);
- compacted.pop_back();
+ //todo unify
+ if (item_ptr->base)
+ {
+ for (auto& dest_item_ptr : dest_list)
+ {
+ auto from = dest_item_ptr->from;
+ auto upto = dest_item_ptr->upto;
+ page.frag_list.erase(from, std::next(upto));
+ }
+ dest_list.clear();
+ }
+ auto mode = item_ptr->mode;
+ auto from = item_ptr->from;
+ auto upto = item_ptr->upto;
+ auto& item_frag_list = item_ptr->frag_list;
+ auto inlined = mode == elem::form::attr;
+ auto gate = inlined ? parent_ptr->insA : parent_ptr->insB;
+ if (gate != page.frag_list.end())
+ if (from != item_frag_list.end())
+ if (upto != item_frag_list.end())
+ {
+ page.frag_list.splice(gate, item_frag_list, from, std::next(upto)); // Move utf8 fragments.
+ item_ptr->parent_wptr = parent_ptr;
+ dest_list.push_back(item_ptr);
+ if (inlined)
+ {
+ if (from->utf8.empty())
+ {
+ from->utf8.push_back(' '); // Add space between oldname=value and newname=value.
+ }
+ }
+ else //if (!inlined) // Prepend '\n <' to item when inserting it to gate==insB.
+ {
+ if (from->utf8.empty()) // Checking indent. Take indent from parent + pads if it is absent.
+ {
+ if (parent_ptr->from->utf8.empty()) // Most likely this is the root namespace.
+ {
+ from->utf8 = "\n";
+ }
+ else // Ordinary nested item.
+ {
+ from->utf8 = parent_ptr->from->utf8 + " ";
+ }
+ }
+ //todo revise
+ auto next = std::next(from);
+ if (next != page.frag_list.end() && next->kind == type::begin_tag) // Checking begin_tag.
+ {
+ auto shadow = view{ next->utf8 };
+ if (utf::pop_front_chars(shadow, whitespaces).empty()) // Set it to '<' if it is absent.
+ {
+ next->utf8 = "<";
+ }
+ }
+ }
+ continue;
+ }
+ log("%%Unexpected format for item '%parent_path%/%item->name->utf8%'", prompt::xml, parent_path, item_ptr->name->utf8);
}
- return what;
}
- void read(view& data)
+ void combine_item(sptr item_ptr, text path = {})
{
- auto what = type::na;
- auto last = type::na;
- auto deep = 0;
- open(root);
- root->mode = elem::form::node;
- root->name = page.append(type::na);
- root->insB = page.append(type::spaces);
- read_subsections(root, data, what, last, deep);
- seal(root);
- if (page.fail) log("%%Inconsistent xml data from %file%:\n%config%\n", prompt::xml, page.file.empty() ? "memory"sv : page.file, page.show());
+ auto& item = *item_ptr;
+ auto& name = item.name->utf8;
+ path += "/" + name;
+ auto dest_list = take_ptr_list(path);
+ auto is_dest_list = (dest_list.size() && dest_list.front()->base) || dest_list.size() > 1;
+ if (is_dest_list || dest_list.empty())
+ {
+ combine_list({ item_ptr }, path);
+ }
+ else
+ {
+ auto& dest_ptr = dest_list.front();
+ dest_ptr->sync_value(item);
+ for (auto& [sub_name, sub_list] : item.hive) // Proceed subelements.
+ {
+ auto count = sub_list.size();
+ if (count == 1 && sub_list.front()->base == faux)
+ {
+ combine_item(sub_list.front(), path);
+ }
+ else if (count) // It is a list.
+ {
+ combine_list(sub_list, path + "/" + sub_name);
+ }
+ else
+ {
+ log("%%Unexpected tag without data: %tag%", prompt::xml, sub_name);
+ }
+ }
+ }
}
};
@@ -1433,22 +1464,24 @@ namespace netxs::xml
using sptr = xml::document::sptr;
using list = std::list;
- netxs::sptr document; // settings: XML document.
+ xml::document document; // settings: XML document.
vect tmpbuff; // settings: Temp buffer.
list context; // settings: Current working context stack (reference context).
settings() = default;
- settings(settings const&) = default;
settings(view utf8_xml)
- : document{ ptr::shared(utf8_xml, "") }
+ : document{ utf8_xml }
+ { }
+ settings(settings const& config)
+ : document{ config.document.page.utf8() }
{ }
- settings(xml::document& other)
- : document{ ptr::shared(std::move(other)) }
+ settings(xml::document&& document)
+ : document{ std::move(document) }
{ }
sptr get_context()
{
- auto context_path = context.size() ? context.back() : document->root;
+ auto context_path = context.size() ? context.back() : document.root_ptr;
return context_path;
}
// settings: Push document context by name.
@@ -1490,12 +1523,12 @@ namespace netxs::xml
}
friend auto& operator << (std::ostream& s, settings const& p)
{
- return s << p.document->page.show();
+ return s << p.document.page.show();
}
// settings: Lookup document context for item_ptr by its reference name path.
void _find_namepath(view reference_namepath, sptr& item_ptr)
{
- auto item_ptr_list = document->take_ptr_list(reference_namepath);
+ auto item_ptr_list = document.take_ptr_list(reference_namepath);
if (item_ptr_list.size())
{
item_ptr = item_ptr_list.back();
@@ -1519,7 +1552,7 @@ namespace netxs::xml
item_ptr = item_ptr_list.front();
break;
}
- context_ptr = context_ptr->get_parent_ptr();
+ context_ptr = context_ptr->parent_wptr.lock();
}
if (!context_ptr)
{
@@ -1543,7 +1576,7 @@ namespace netxs::xml
auto& reference_name = value_placeholder->utf8;
if (!value_placeholder->busy)
{
- value_placeholder->busy = true;
+ value_placeholder->busy = 1;
if (auto base_item_ptr = settings::_find_name(reference_name))
{
settings::_take_value(base_item_ptr, value);
@@ -1552,7 +1585,7 @@ namespace netxs::xml
{
log("%%%red%Reference name '%ref%' not found%nil%", prompt::xml, ansi::fgc(redlt), reference_name, ansi::nil());
}
- value_placeholder->busy = faux;
+ value_placeholder->busy = 0;
}
else
{
@@ -1584,17 +1617,17 @@ namespace netxs::xml
auto& reference_name = value_placeholder->utf8;
if (!value_placeholder->busy) // Silently ignore reference loops.
{
- value_placeholder->busy = true;
+ value_placeholder->busy = 1;
if (auto base_ptr = settings::_find_name(reference_name)) // Lookup outside.
{
settings::_take_ptr_list_of(base_ptr, attribute, item_ptr_list);
}
- value_placeholder->busy = faux;
+ value_placeholder->busy = 0;
}
}
}
// Take native attribute list.
- subsection_ptr->get_list3(attribute, item_ptr_list);
+ document.take_direct_ptr_list(subsection_ptr, attribute, item_ptr_list);
}
auto take_ptr_list_of(sptr subsection_ptr, view attribute)
{
@@ -1678,8 +1711,7 @@ namespace netxs::xml
{
auto ctx = settings::push_context(context_ptr);
auto item_ptr_list = vect{};
- context_ptr->get_list3(frompath, item_ptr_list);
- //if (auto item_ptr = settings::_find_name(frompath))
+ document.take_direct_ptr_list(context_ptr, frompath, item_ptr_list);
if (auto item_ptr = item_ptr_list.size() ? item_ptr_list.back() : sptr{})
{
auto crop = settings::take_value(item_ptr);
@@ -1781,20 +1813,20 @@ namespace netxs::xml
}
auto utf8()
{
- return document->page.utf8();
+ return document.page.utf8();
}
template
auto fuse(view utf8_xml, view filepath = {})
{
if (utf8_xml.empty()) return;
- if (filepath.size()) document->page.file = filepath;
+ if (filepath.size()) document.page.file = filepath;
context.clear();
auto tmp_config = xml::document{ utf8_xml, filepath };
if constexpr (Print)
{
log("%%Settings from %file%:\n%config%", prompt::xml, filepath.empty() ? "memory"sv : filepath, tmp_config.page.show());
}
- document->overlay(tmp_config.root);
+ document.combine_item(tmp_config.root_ptr);
}
};
namespace options
diff --git a/src/vtm.hpp b/src/vtm.hpp
index 06c9f876dc..3efcba929b 100644
--- a/src/vtm.hpp
+++ b/src/vtm.hpp
@@ -1253,7 +1253,7 @@ namespace netxs::app::vtm
});
log("%%Run %%", prompt::host, ansi::hi(utf::debase437(utf8_xml)));
auto appconf = settings{ utf8_xml };
- auto item_ptr = appconf.document->root;
+ auto item_ptr = appconf.document.root_ptr;
auto menuid = config.settings::take_value_from(item_ptr, attr::id, ""s);
auto taskbar_context = config.settings::push_context(path::taskbar);
if (menu_list.contains(menuid))
diff --git a/src/vtm.xml b/src/vtm.xml
index 590100fd80..c670a3c02d 100644
--- a/src/vtm.xml
+++ b/src/vtm.xml
@@ -5,7 +5,7 @@ R"==(
-
+