Skip to content

Commit 468caef

Browse files
authored
Merge pull request #9005 from diffblue/string_utils_string_view
use `std::string_view` in string_utils
2 parents 30ec11e + c2db038 commit 468caef

9 files changed

Lines changed: 443 additions & 306 deletions

File tree

src/util/string_utils.cpp

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,27 @@ Author: Daniel Poetzl
1818
/// in the middle of the string is left unchanged
1919
/// \param s: the string to strip
2020
/// \return The stripped string
21-
std::string strip_string(const std::string &s)
21+
std::string strip_string(std::string_view s)
2222
{
2323
auto pred=[](char c){ return std::isspace(c); };
2424

25-
std::string::const_iterator left
26-
=std::find_if_not(s.begin(), s.end(), pred);
25+
std::string_view::const_iterator left =
26+
std::find_if_not(s.begin(), s.end(), pred);
2727
if(left==s.end())
2828
return "";
2929

30-
std::string::size_type i=std::distance(s.begin(), left);
30+
std::size_t i = std::distance(s.begin(), left);
3131

32-
std::string::const_reverse_iterator right
33-
=std::find_if_not(s.rbegin(), s.rend(), pred);
34-
std::string::size_type j=std::distance(right, s.rend())-1;
32+
std::string_view::const_reverse_iterator right =
33+
std::find_if_not(s.rbegin(), s.rend(), pred);
34+
std::size_t j = std::distance(right, s.rend()) - 1;
3535

36-
return s.substr(i, (j-i+1));
36+
// copy happens here; this could return a view in the future
37+
return std::string{s.substr(i, (j - i + 1))};
3738
}
3839

3940
void split_string(
40-
const std::string &s,
41+
std::string_view s,
4142
char delim,
4243
std::vector<std::string> &result,
4344
bool strip,
@@ -54,42 +55,48 @@ void split_string(
5455
return;
5556
}
5657

57-
std::string::size_type n=s.length();
58+
std::size_t n = s.length();
5859
INVARIANT(n > 0, "Empty string case should already be handled");
5960

60-
std::string::size_type start=0;
61-
std::string::size_type i;
61+
std::size_t start = 0;
62+
std::size_t i;
6263

6364
for(i=0; i<n; i++)
6465
{
6566
if(s[i]==delim)
6667
{
67-
std::string new_s=s.substr(start, i-start);
68+
// result owns std::strings rather than string_views: callers
69+
// routinely pass a temporary as `s` (e.g. some_function()
70+
// returning std::string), so the input may not outlive the
71+
// result.
72+
std::string new_s = std::string{s.substr(start, i - start)};
6873

6974
if(strip)
7075
new_s=strip_string(new_s);
7176

7277
if(!remove_empty || !new_s.empty())
73-
result.push_back(new_s);
78+
result.push_back(std::move(new_s));
7479

7580
start=i+1;
7681
}
7782
}
7883

79-
std::string new_s=s.substr(start, n-start);
84+
// result owns std::strings rather than string_views: see the
85+
// comment above the first push_back in the loop.
86+
std::string new_s = std::string{s.substr(start, n - start)};
8087

8188
if(strip)
8289
new_s=strip_string(new_s);
8390

8491
if(!remove_empty || !new_s.empty())
85-
result.push_back(new_s);
92+
result.push_back(std::move(new_s));
8693

8794
if(!remove_empty && result.empty())
8895
result.push_back("");
8996
}
9097

9198
void split_string(
92-
const std::string &s,
99+
std::string_view s,
93100
char delim,
94101
std::string &left,
95102
std::string &right,
@@ -102,31 +109,26 @@ void split_string(
102109

103110
if(result.size() != 2)
104111
{
105-
throw deserialization_exceptiont{"expected string '" + s +
106-
"' to contain two substrings "
107-
"delimited by " +
108-
delim + " but has " +
109-
std::to_string(result.size())};
112+
throw deserialization_exceptiont{
113+
"expected string '" + std::string{s} +
114+
"' to contain two substrings "
115+
"delimited by " +
116+
delim + " but has " + std::to_string(result.size())};
110117
}
111118

112119
left=result[0];
113120
right=result[1];
114121
}
115122

116-
std::vector<std::string> split_string(
117-
const std::string &s,
118-
char delim,
119-
bool strip,
120-
bool remove_empty)
123+
std::vector<std::string>
124+
split_string(std::string_view s, char delim, bool strip, bool remove_empty)
121125
{
122126
std::vector<std::string> result;
123127
split_string(s, delim, result, strip, remove_empty);
124128
return result;
125129
}
126130

127-
std::string trim_from_last_delimiter(
128-
const std::string &s,
129-
const char delim)
131+
std::string trim_from_last_delimiter(std::string_view s, const char delim)
130132
{
131133
std::string result;
132134
const size_t index=s.find_last_of(delim);
@@ -135,7 +137,7 @@ std::string trim_from_last_delimiter(
135137
return result;
136138
}
137139

138-
std::string escape(const std::string &s)
140+
std::string escape(std::string_view s)
139141
{
140142
std::string result;
141143

@@ -150,7 +152,7 @@ std::string escape(const std::string &s)
150152
return result;
151153
}
152154

153-
std::string escape_non_alnum(const std::string &to_escape)
155+
std::string escape_non_alnum(std::string_view to_escape)
154156
{
155157
std::ostringstream escaped;
156158
for(auto &ch : to_escape)
@@ -172,11 +174,12 @@ std::string escape_non_alnum(const std::string &to_escape)
172174
}
173175
return escaped.str();
174176
}
175-
std::string capitalize(const std::string &str)
177+
178+
std::string capitalize(std::string_view s)
176179
{
177-
if(str.empty())
178-
return str;
179-
std::string capitalized = str;
180+
if(s.empty())
181+
return std::string{};
182+
std::string capitalized = std::string{s}; // copy
180183
capitalized[0] = toupper(capitalized[0]);
181184
return capitalized;
182185
}

src/util/string_utils.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@ Author: Daniel Poetzl
1111
#define CPROVER_UTIL_STRING_UTILS_H
1212

1313
#include <string>
14+
#include <string_view>
1415
#include <vector>
1516

16-
std::string strip_string(const std::string &s);
17+
std::string strip_string(std::string_view s);
1718

18-
std::string capitalize(const std::string &str);
19+
std::string capitalize(std::string_view s);
1920

2021
void split_string(
21-
const std::string &s,
22+
std::string_view s,
2223
char delim,
2324
std::string &left,
2425
std::string &right,
@@ -34,14 +35,12 @@ void split_string(
3435
/// This is applied after strip so whitespace only elements will be removed if
3536
/// both are set to true.
3637
std::vector<std::string> split_string(
37-
const std::string &s,
38+
std::string_view s,
3839
char delim,
3940
bool strip = false,
4041
bool remove_empty = false);
4142

42-
std::string trim_from_last_delimiter(
43-
const std::string &s,
44-
const char delim);
43+
std::string trim_from_last_delimiter(std::string_view s, const char delim);
4544

4645
/// Prints items to an stream, separated by a constant delimiter
4746
/// \tparam It: An iterator type
@@ -97,13 +96,13 @@ join_strings(Stream &&os, const It b, const It e, const Delimiter &delimiter)
9796

9897
/// Generic escaping of strings; this is not meant to be a particular
9998
/// programming language.
100-
std::string escape(const std::string &);
99+
std::string escape(std::string_view s);
101100

102101
/// Replace non-alphanumeric characters with `_xx` escapes, where xx are hex
103102
/// digits. Underscores are replaced by `__`.
104103
/// \param to_escape: string to escape
105104
/// \return string with non-alphanumeric characters escaped
106-
std::string escape_non_alnum(const std::string &to_escape);
105+
std::string escape_non_alnum(std::string_view to_escape);
107106

108107
/// Wrap line at spaces to not extend past the right margin, and include given
109108
/// padding with spaces to the left

unit/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,12 @@ SRC += analyses/ai/ai.cpp \
207207
util/string2int.cpp \
208208
util/structured_data.cpp \
209209
util/string_utils/capitalize.cpp \
210+
util/string_utils/escape.cpp \
210211
util/string_utils/escape_non_alnum.cpp \
211212
util/string_utils/join_string.cpp \
212213
util/string_utils/split_string.cpp \
213214
util/string_utils/strip_string.cpp \
215+
util/string_utils/trim_from_last_delimiter.cpp \
214216
util/string_utils/wrap_line.cpp \
215217
util/symbol_table.cpp \
216218
util/symbol.cpp \

unit/util/string_utils/capitalize.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,12 @@ Author: Thomas Kiley
66
77
\*******************************************************************/
88

9-
#include <testing-utils/use_catch.h>
109
#include <util/string_utils.h>
1110

11+
#include <testing-utils/use_catch.h>
12+
13+
#include <string_view>
14+
1215
TEST_CASE("capitalize", "[core][util][string_utils]")
1316
{
1417
REQUIRE(capitalize("") == "");
@@ -18,3 +21,14 @@ TEST_CASE("capitalize", "[core][util][string_utils]")
1821
REQUIRE(capitalize("abc def") == "Abc def");
1922
REQUIRE(capitalize("1") == "1");
2023
}
24+
25+
TEST_CASE(
26+
"capitalize honours string_view length over a non-NUL-terminated buffer",
27+
"[core][util][string_utils]")
28+
{
29+
// Without a trailing NUL: a regression where the implementation
30+
// walked the buffer until '\0' would read past the end.
31+
const char buf[] = {'a', 'b', 'c', 'X', 'Y'};
32+
std::string_view sv{buf, 3};
33+
REQUIRE(capitalize(sv) == "Abc");
34+
}

unit/util/string_utils/escape.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*******************************************************************\
2+
3+
Module: Unit tests of escape
4+
5+
Author: Diffblue Ltd.
6+
7+
\*******************************************************************/
8+
9+
/// \file
10+
/// escape Unit Tests
11+
12+
#include <util/string_utils.h>
13+
14+
#include <testing-utils/use_catch.h>
15+
16+
#include <string_view>
17+
18+
TEST_CASE("escape", "[core][utils][string_utils][escape]")
19+
{
20+
REQUIRE(escape("") == "");
21+
REQUIRE(escape("abc") == "abc");
22+
REQUIRE(escape("a\"b") == "a\\\"b");
23+
REQUIRE(escape("a\\b") == "a\\\\b");
24+
// characters other than `"` and `\` are passed through unchanged
25+
REQUIRE(escape("a'b") == "a'b");
26+
}
27+
28+
TEST_CASE(
29+
"escape honours string_view length over a non-NUL-terminated buffer",
30+
"[core][utils][string_utils][escape]")
31+
{
32+
// Without a trailing NUL: a regression where the implementation
33+
// walked the buffer until '\0' would read past the end.
34+
const char buf[] = {'a', '"', 'b', 'X', 'Y'};
35+
std::string_view sv{buf, 3};
36+
REQUIRE(escape(sv) == "a\\\"b");
37+
}

0 commit comments

Comments
 (0)