Skip to content

Commit

Permalink
fix string whitespace behaviour
Browse files Browse the repository at this point in the history
  • Loading branch information
tjol committed Jun 13, 2024
1 parent fe6ed6e commit 33a7994
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 22 deletions.
82 changes: 66 additions & 16 deletions src/str.c
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,9 @@ kdl_owned_string kdl_escape_v2(kdl_str const* s, kdl_escape_mode mode)
kdl_owned_string kdl_unescape_v2(kdl_str const* s)
{
kdl_owned_string result;
kdl_owned_string dedented = _kdl_dedent_multiline_string(s);
kdl_owned_string no_ws_escapes = _kdl_remove_escaped_whitespace(s);
kdl_str pre_dedent = kdl_borrow_str(&no_ws_escapes);
kdl_owned_string dedented = _kdl_dedent_multiline_string(&pre_dedent);
kdl_str escaped = kdl_borrow_str(&dedented);

size_t orig_len = escaped.len;
Expand Down Expand Up @@ -411,21 +413,8 @@ kdl_owned_string kdl_unescape_v2(kdl_str const* s)
break;
}
default:
// See if this is a whitespace escape
if (_kdl_is_whitespace(KDL_CHARACTER_SET_V2, c) || _kdl_is_newline(c)) {
kdl_str tail = escaped; // make a copy - we will advance too far
while ((status = _kdl_pop_codepoint(&tail, &c)) == KDL_UTF8_OK
&& (_kdl_is_whitespace(KDL_CHARACTER_SET_V2, c) || _kdl_is_newline(c))) {
// skip this char
escaped = tail;
}
// if there is a UTF-8 error, this will be discovered on the
// next iteration of the outer loop
break;
} else {
// Not whitespace - backslash is illegal here
goto unesc_error;
}
// invalid escape
goto unesc_error;
}
} else {
// Nothing special, copy the character
Expand All @@ -435,6 +424,7 @@ kdl_owned_string kdl_unescape_v2(kdl_str const* s)

if (status == KDL_UTF8_EOF) {
// ok
kdl_free_string(&no_ws_escapes);
kdl_free_string(&dedented);
result = _kdl_buf_to_string(&buf);
return result;
Expand All @@ -443,6 +433,7 @@ kdl_owned_string kdl_unescape_v2(kdl_str const* s)
}

unesc_error:
kdl_free_string(&no_ws_escapes);
kdl_free_string(&dedented);
_kdl_free_write_buffer(&buf);
result = (kdl_owned_string){NULL, 0};
Expand Down Expand Up @@ -558,3 +549,62 @@ kdl_owned_string _kdl_dedent_multiline_string(kdl_str const* s)
result = (kdl_owned_string){NULL, 0};
return result;
}

kdl_owned_string _kdl_remove_escaped_whitespace(kdl_str const* s)
{
kdl_owned_string result;
kdl_str escaped = *s;

size_t orig_len = escaped.len;
_kdl_write_buffer buf = _kdl_new_write_buffer(orig_len);
if (buf.buf == NULL) goto unesc_error;
if (escaped.data == NULL) goto unesc_error;

uint32_t c = 0;
kdl_utf8_status status;

while ((status = _kdl_pop_codepoint(&escaped, &c)) == KDL_UTF8_OK) {
if (_kdl_is_illegal_char(KDL_CHARACTER_SET_V2, c)) {
goto unesc_error;
} else if (c == '\\') {
kdl_str tail = escaped; // make a copy - we will advance too far
bool removed_whitespace = false;

while ((status = _kdl_pop_codepoint(&tail, &c)) == KDL_UTF8_OK
&& (_kdl_is_whitespace(KDL_CHARACTER_SET_V2, c) || _kdl_is_newline(c))) {
// skip this char
escaped = tail;
removed_whitespace = true;
}

switch (status) { // why did the loop end?
case KDL_UTF8_OK:
case KDL_UTF8_EOF:
break;
default:
goto unesc_error;
}

if (!removed_whitespace) {
// no whitespace -> keep backslash for kdl_unescape_v2()
_kdl_buf_push_char(&buf, '\\');
}
} else {
// Nothing special, copy the character
_kdl_buf_push_codepoint(&buf, c);
}
}

if (status == KDL_UTF8_EOF) {
// ok
result = _kdl_buf_to_string(&buf);
return result;
} else {
goto unesc_error;
}

unesc_error:
_kdl_free_write_buffer(&buf);
result = (kdl_owned_string){NULL, 0};
return result;
}
1 change: 1 addition & 0 deletions src/str.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@ KDL_NODISCARD kdl_owned_string kdl_escape_v2(kdl_str const* s, kdl_escape_mode m
KDL_NODISCARD kdl_owned_string kdl_unescape_v2(kdl_str const* s);

KDL_NODISCARD kdl_owned_string _kdl_dedent_multiline_string(kdl_str const* s);
KDL_NODISCARD kdl_owned_string _kdl_remove_escaped_whitespace(kdl_str const* s);

#endif // KDL_INTERNAL_STR_H_
6 changes: 0 additions & 6 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,18 +136,12 @@ set(FUZZY_KDL_TESTS_LIST_V2
no_decimal_exponent.kdl # float representation not consistent with other test cases
)
string(REPLACE ";" ":" FUZZY_KDL_TESTS_V2 "${FUZZY_KDL_TESTS_LIST_V2}")
set(SKIP_KDL_TESTS_LIST_V2
escaped_whitespace.kdl # invalid multi-line string
string_escaped_literal_whitespace.kdl # invalid multi-line string
)
string(REPLACE ";" ":" SKIP_KDL_TESTS_V2 "${SKIP_KDL_TESTS_LIST_V2}")

add_executable(example_doc_test_v2 example_doc_test.c)
target_link_libraries(example_doc_test_v2 kdl test_util ckdl-cat)
target_compile_definitions(example_doc_test_v2 PRIVATE
"KDL_TEST_CASES_ROOT=\"${KDL_TEST_CASES_ROOT}\""
"FUZZY_KDL_TESTS=\"${FUZZY_KDL_TESTS_V2}\""
"SKIP_KDL_TESTS=\"${SKIP_KDL_TESTS_V2}\""
"KDL_VERSION=KDL_VERSION_2")
add_test(NAME example_doc_test_v2 COMMAND "$<TARGET_FILE:example_doc_test_v2>" "${KDL_TEST_CASES_ROOT}")
#################################################
Expand Down

0 comments on commit 33a7994

Please sign in to comment.