Skip to content

Commit

Permalink
Merge pull request #25 from dwd/predicate-buffer-tr
Browse files Browse the repository at this point in the history
Use buffer_ptr inside predicate skips
  • Loading branch information
dwd authored Nov 7, 2024
2 parents 28b12fe + d91229c commit 728c809
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 26 deletions.
36 changes: 16 additions & 20 deletions rapidxml.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1679,18 +1679,17 @@ namespace rapidxml

template<int Flags>
view_type decode_data_value_low(view_type const & v) {
auto * init = v.data();
auto * first = init;
buffer_ptr first{v};
if (Flags & parse_normalize_whitespace) {
skip<text_pure_with_ws_pred,0>(first);
} else {
skip<text_pure_no_ws_pred,0>(first);
}
if (*first == '<') return v;
auto buf = this->allocate_span(v);
if (!*first) return v;
auto buf = this->allocate_string(v);
auto * start = buf.data();
auto * tmp = start;
auto * end = (Flags & parse_normalize_whitespace) ?
buffer_ptr tmp{buf};
auto end = (Flags & parse_normalize_whitespace) ?
skip_and_expand_character_refs<text_pred,text_pure_with_ws_pred,Flags>(tmp) :
skip_and_expand_character_refs<text_pred,text_pure_no_ws_pred,Flags>(tmp);
// Trim trailing whitespace if flag is set; leading was already trimmed by whitespace skip after >
Expand All @@ -1715,14 +1714,13 @@ namespace rapidxml

template<Ch Q>
view_type decode_attr_value_low(view_type const & v) {
Ch const * init = v.data();
Ch const * first = init;
buffer_ptr first{v};
skip<attribute_value_pure_pred<Q>,0>(first);
if (*first == Q) return v;
auto buf = this->allocate_span(v);
Ch * start = buf.data();
Ch * tmp = start;
Ch * end = skip_and_expand_character_refs<attribute_value_pred<Q>,attribute_value_pure_pred<Q>,0>(tmp);
if (!*first || *first == Q) return v;
auto buf = this->allocate_string(v);
const Ch * start = buf.data();
buffer_ptr tmp{buf};
const Ch * end = skip_and_expand_character_refs<attribute_value_pred<Q>,attribute_value_pure_pred<Q>,0>(tmp);
return {start, end};
}

Expand Down Expand Up @@ -1922,24 +1920,24 @@ namespace rapidxml
// Skip characters until predicate evaluates to true while doing the following:
// - replacing XML character entity references with proper characters (&apos; &amp; &quot; &lt; &gt; &#...;)
// - condensing whitespace sequences to single space character
template<class StopPred, class StopPredPure, int Flags>
static Ch *skip_and_expand_character_refs(Ch *&text)
template<class StopPred, class StopPredPure, int Flags, typename Chp>
static const Ch *skip_and_expand_character_refs(Chp text)
{
// If entity translation, whitespace condense and whitespace trimming is disabled, use plain skip
if (Flags & parse_no_entity_translation &&
!(Flags & parse_normalize_whitespace) &&
!(Flags & parse_trim_whitespace))
{
skip<StopPred, Flags>(text);
return text;
return &*text;
}

// Use simple skip until first modification is detected
skip<StopPredPure, Flags>(text);

// Use translation skip
Ch *src = text;
Ch *dest = src;
Chp src = text;
Ch * dest = const_cast<Ch *>(&*src);
while (StopPred::test(*src))
{
// If entity translation is enabled
Expand Down Expand Up @@ -2063,9 +2061,7 @@ namespace rapidxml
}

// Return new end
text = src;
return dest;

}

///////////////////////////////////////////////////////////////////////
Expand Down
12 changes: 6 additions & 6 deletions test/low-level-parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,31 +35,31 @@ TEST(PredicateBuffer, Skip) {
TEST(Predicates, SkipAndExpand) {
std::string test_data{"&hello;<"};
char * start = const_cast<char *>(test_data.c_str());
start = rapidxml::xml_document<>::skip_and_expand_character_refs<
auto end = rapidxml::xml_document<>::skip_and_expand_character_refs<
rapidxml::xml_document<>::text_pred,
rapidxml::xml_document<>::text_pure_with_ws_pred,
rapidxml::parse_no_entity_translation>(start);
EXPECT_EQ(*start, '<');
EXPECT_EQ(*end, '<');
}

TEST(Predicates, SkipAndExpandShort) {
std::string test_data{"&hello;"};
char * start = const_cast<char *>(test_data.c_str());
start = rapidxml::xml_document<>::skip_and_expand_character_refs<
auto end = rapidxml::xml_document<>::skip_and_expand_character_refs<
rapidxml::xml_document<>::text_pred,
rapidxml::xml_document<>::text_pure_with_ws_pred,
rapidxml::parse_no_entity_translation>(start);
EXPECT_EQ(*start, '\0');
EXPECT_EQ(*end, '\0');
}

TEST(Predicates, SkipAndExpandShorter) {
std::string test_data{"&hell"};
char * start = const_cast<char *>(test_data.c_str());
start = rapidxml::xml_document<>::skip_and_expand_character_refs<
auto end = rapidxml::xml_document<>::skip_and_expand_character_refs<
rapidxml::xml_document<>::text_pred,
rapidxml::xml_document<>::text_pure_with_ws_pred,
rapidxml::parse_no_entity_translation>(start);
EXPECT_EQ(*start, '\0');
EXPECT_EQ(*end, '\0');
}

TEST(ParseFns, ParseBom) {
Expand Down
39 changes: 39 additions & 0 deletions test/parse-simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,42 @@ TEST(ParseOptions, OpenOnlyFastest) {
subdoc.validate();
}
}

TEST(Parser_Emoji, Single) {
std::string foo{"<h>&apos;</h>"};
rapidxml::xml_document<> doc;
doc.parse<rapidxml::parse_default>(foo);
EXPECT_EQ("'", doc.first_node()->value());
}

TEST(Parser_Emoji, SingleUni) {
std::string foo{"<h>&#1234;</h>"};
rapidxml::xml_document<> doc;
doc.parse<rapidxml::parse_default>(foo);
EXPECT_EQ("\xD3\x92", doc.first_node()->value());
}

TEST(Parser_Emoji, SingleEmoji) {
std::string foo{"<h>&#128512;</h>"};
rapidxml::xml_document<> doc;
doc.parse<rapidxml::parse_default>(foo);
EXPECT_EQ("\xF0\x9F\x98\x80", doc.first_node()->value());
EXPECT_EQ(4, doc.first_node()->value().size());
}

TEST(Parser_Emoji, SingleEmojiReuse) {
std::string bar("<h>Sir I bear a rhyme excelling in mystic verse and magic spelling &#128512;</h>");
rapidxml::xml_document<> doc;
rapidxml::xml_document<> parent_doc;
parent_doc.parse<rapidxml::parse_default|rapidxml::parse_open_only>("<open>");
doc.parse<rapidxml::parse_default>(bar, &parent_doc);
EXPECT_EQ("Sir I bear a rhyme excelling in mystic verse and magic spelling \xF0\x9F\x98\x80", doc.first_node()->value());
auto doc_a = doc.first_node()->document();
doc.first_node()->value(doc_a->allocate_string("Sausages are the loneliest fruit, and are but one of the strange things I have witnessed in my long and interesting life."));
EXPECT_EQ("Sausages are the loneliest fruit, and are but one of the strange things I have witnessed in my long and interesting life.", doc.first_node()->value());
bar = "<h>&#128512;</h>";
doc.parse<rapidxml::parse_default>(bar, &parent_doc);
EXPECT_EQ("\xF0\x9F\x98\x80", doc.first_node()->value());
EXPECT_EQ(4, doc.first_node()->value().size());
}

0 comments on commit 728c809

Please sign in to comment.