Skip to content

Commit 3feb75f

Browse files
authored
Implement new string functions (#1655)
`STRFIND`, `STRRFIND`, `STRCHAR`, `STRSLICE`, `CHARCMP`, `CHARSIZE`, and `REVCHAR`
1 parent ad4d9da commit 3feb75f

27 files changed

+584
-134
lines changed

include/asm/charmap.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ void charmap_Pop();
2121
void charmap_CheckStack();
2222
void charmap_Add(std::string const &mapping, std::vector<int32_t> &&value);
2323
bool charmap_HasChar(std::string const &mapping);
24+
size_t charmap_CharSize(std::string const &mapping);
2425
std::vector<int32_t> charmap_Convert(std::string const &input);
2526
size_t charmap_ConvertNext(std::string_view &input, std::vector<int32_t> *output);
27+
std::string charmap_Reverse(std::vector<int32_t> const &value, bool &unique);
2628

2729
#endif // RGBDS_ASM_CHARMAP_HPP

man/rgbasm.5

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -564,22 +564,17 @@ is equivalent to the regular string
564564
(Note that this prevents raw strings from including the double quote character.)
565565
Raw strings also may be contained in triple quotes for them to be multi-line, so they can include literal newline or quote characters (although still not three quotes in a row).
566566
.Pp
567-
The following functions operate on string expressions.
568-
Most of them return a string, however some of these functions actually return an integer and can be used as part of an integer expression!
569-
.Bl -column "STRSUB(str, pos, len)"
567+
The following functions operate on string expressions, and return strings themselves.
568+
.Bl -column "STRSLICE(str, start, stop)"
570569
.It Sy Name Ta Sy Operation
571-
.It Fn STRLEN str Ta Returns the number of characters in Ar str .
572570
.It Fn STRCAT strs... Ta Concatenates Ar strs .
573-
.It Fn STRCMP str1 str2 Ta Returns -1 if Ar str1 No is alphabetically lower than Ar str2 No , zero if they match, 1 if Ar str1 No is greater than Ar str2 .
574-
.It Fn STRIN str1 str2 Ta Returns the first position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
575-
.It Fn STRRIN str1 str2 Ta Returns the last position of Ar str2 No in Ar str1 No or zero if it's not present Pq first character is position 1 .
576-
.It Fn STRSUB str pos len Ta Returns a substring from Ar str No starting at Ar pos No (first character is position 1, last is position -1) and Ar len No characters long. If Ar len No is not specified the substring continues to the end of Ar str .
577571
.It Fn STRUPR str Ta Returns Ar str No with all ASCII letters
578572
.Pq Ql a-z
579573
in uppercase.
580574
.It Fn STRLWR str Ta Returns Ar str No with all ASCII letters
581575
.Pq Ql A-Z
582576
in lowercase.
577+
.It Fn STRSLICE str start stop Ta Returns a substring of Ar str No starting at Ar start No and ending at Ar stop No (exclusive). If Ar stop No is not specified, the substring continues to the end of Ar str Ns .
583578
.It Fn STRRPL str old new Ta Returns Ar str No with each non-overlapping occurrence of the substring Ar old No replaced with Ar new .
584579
.It Fn STRFMT fmt args... Ta Returns the string Ar fmt No with each
585580
.Ql %spec
@@ -589,9 +584,35 @@ pattern replaced by interpolating the format
589584
with its corresponding argument in
590585
.Ar args
591586
.Pq So %% Sc is replaced by the So % Sc character .
592-
.It Fn INCHARMAP str Ta Returns 1 if Ar str No has an entry in the current charmap, and 0 otherwise .
587+
.It Fn STRCHAR str idx Ta Returns the substring of Ar str No for the charmap entry at Ar idx No with the current charmap . Pq Ar idx No counts charmap entries, not characters.
588+
.It Fn REVCHAR vals... Ta Returns the string that is mapped to Ar vals No with the current charmap. If there is no unique charmap entry for Ar vals Ns , an error occurs.
589+
.El
590+
.Pp
591+
The following functions operate on string expressions, but return integers.
592+
.Bl -column "STRRFIND(str, sub)"
593+
.It Sy Name Ta Sy Operation
594+
.It Fn STRLEN str Ta Returns the number of characters in Ar str .
595+
.It Fn STRCMP str1 str2 Ta Compares Ar str1 No and Ar str2 No according to ASCII ordering of their characters. Returns -1 if Ar str1 No is lower than Ar str2 Ns , 1 if Ar str1 No is greater than Ar str2 Ns , or 0 if they match.
596+
.It Fn STRFIND str sub Ta Returns the first index of Ar sub No in Ar str Ns , or -1 if it's not present.
597+
.It Fn STRRFIND str sub Ta Returns the last index of Ar sub No in Ar str Ns , or -1 if it's not present.
598+
.It Fn INCHARMAP str Ta Returns 1 if Ar str No has an entry in the current charmap, or 0 otherwise .
593599
.It Fn CHARLEN str Ta Returns the number of charmap entries in Ar str No with the current charmap .
594-
.It Fn CHARSUB str pos Ta Returns the substring for the charmap entry at Ar pos No in Ar str No (first character is position 1, last is position -1) with the current charmap .
600+
.It Fn CHARCMP str1 str2 Ta Compares Ar str1 No and Ar str2 No according to their charmap entry values with the current charmap. Returns -1 if Ar str1 No is lower than Ar str2 Ns , 1 if Ar str1 No is greater than Ar str2 Ns , or 0 if they match.
601+
.It Fn CHARSIZE char Ta Returns how many values are in the charmap entry for Ar char No with the current charmap.
602+
.El
603+
.Pp
604+
Note that the first character of a string is at index 0, and the last is at index -1.
605+
.Pp
606+
The following legacy functions are similar to other functions that operate on string expressions, but for historical reasons, they count characters starting from
607+
.Em position 1 ,
608+
not from index 0!
609+
(Position -1 still counts from the last character.)
610+
.Bl -column "STRSUB(str, pos, len)"
611+
.It Sy Name Ta Sy Operation
612+
.It Fn STRSUB str pos len Ta Returns a substring of Ar str No starting at Ar pos No and Ar len No characters long. If Ar len No is not specified, the substring continues to the end of Ar str No .
613+
.It Fn STRIN str sub Ta Returns the first position of Ar sub No in Ar str Ns , or 0 if it's not present.
614+
.It Fn STRRIN str sub Ta Returns the last position of Ar sub No in Ar str Ns , or 0 if it's not present.
615+
.It Fn CHARSUB str pos Ta Returns the substring of Ar str No for the charmap entry at Ar pos No with the current charmap . Pq Ar pos No counts charmap entries, not characters.
595616
.El
596617
.Ss Character maps
597618
When writing text strings that are meant to be displayed on the Game Boy, the character encoding in the ROM may need to be different than the source file encoding.

src/asm/charmap.cpp

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,29 @@ struct CharmapNode {
3131
struct Charmap {
3232
std::string name;
3333
std::vector<CharmapNode> nodes; // first node is reserved for the root node
34+
35+
// Traverse the trie depth-first to derive the character mappings in definition order
36+
template<typename F>
37+
bool forEachChar(F callback) const {
38+
// clang-format off: nested initializers
39+
for (std::stack<std::pair<size_t, std::string>> prefixes({{0, ""}}); !prefixes.empty();) {
40+
// clang-format on
41+
auto [nodeIdx, mapping] = std::move(prefixes.top());
42+
prefixes.pop();
43+
CharmapNode const &node = nodes[nodeIdx];
44+
if (node.isTerminal()) {
45+
if (!callback(nodeIdx, mapping)) {
46+
return false;
47+
}
48+
}
49+
for (unsigned c = 0; c < std::size(node.next); c++) {
50+
if (size_t nextIdx = node.next[c]; nextIdx) {
51+
prefixes.push({nextIdx, mapping + static_cast<char>(c)});
52+
}
53+
}
54+
}
55+
return true;
56+
}
3457
};
3558

3659
static std::deque<Charmap> charmapList;
@@ -44,24 +67,12 @@ bool charmap_ForEach(
4467
void (*charFunc)(std::string const &, std::vector<int32_t>)
4568
) {
4669
for (Charmap const &charmap : charmapList) {
47-
// Traverse the trie depth-first to derive the character mappings in definition order
4870
std::map<size_t, std::string> mappings;
49-
// clang-format off: nested initializers
50-
for (std::stack<std::pair<size_t, std::string>> prefixes({{0, ""}});
51-
!prefixes.empty();) {
52-
// clang-format on
53-
auto [nodeIdx, mapping] = std::move(prefixes.top());
54-
prefixes.pop();
55-
CharmapNode const &node = charmap.nodes[nodeIdx];
56-
if (node.isTerminal()) {
57-
mappings[nodeIdx] = mapping;
58-
}
59-
for (unsigned c = 0; c < 256; c++) {
60-
if (size_t nextIdx = node.next[c]; nextIdx) {
61-
prefixes.push({nextIdx, mapping + static_cast<char>(c)});
62-
}
63-
}
64-
}
71+
charmap.forEachChar([&mappings](size_t nodeIdx, std::string const &mapping) {
72+
mappings[nodeIdx] = mapping;
73+
return true;
74+
});
75+
6576
mapFunc(charmap.name);
6677
for (auto [nodeIdx, mapping] : mappings) {
6778
charFunc(mapping, charmap.nodes[nodeIdx].value);
@@ -178,6 +189,22 @@ bool charmap_HasChar(std::string const &mapping) {
178189
return charmap.nodes[nodeIdx].isTerminal();
179190
}
180191

192+
size_t charmap_CharSize(std::string const &mapping) {
193+
Charmap const &charmap = *currentCharmap;
194+
size_t nodeIdx = 0;
195+
196+
for (char c : mapping) {
197+
nodeIdx = charmap.nodes[nodeIdx].next[static_cast<uint8_t>(c)];
198+
199+
if (!nodeIdx) {
200+
return 0;
201+
}
202+
}
203+
204+
CharmapNode const &node = charmap.nodes[nodeIdx];
205+
return node.isTerminal() ? node.value.size() : 0;
206+
}
207+
181208
std::vector<int32_t> charmap_Convert(std::string const &input) {
182209
std::vector<int32_t> output;
183210
for (std::string_view inputView = input; charmap_ConvertNext(inputView, &output);) {}
@@ -263,3 +290,20 @@ size_t charmap_ConvertNext(std::string_view &input, std::vector<int32_t> *output
263290
input = input.substr(inputIdx);
264291
return matchLen;
265292
}
293+
294+
std::string charmap_Reverse(std::vector<int32_t> const &value, bool &unique) {
295+
Charmap const &charmap = *currentCharmap;
296+
std::string revMapping;
297+
unique = charmap.forEachChar([&](size_t nodeIdx, std::string const &mapping) {
298+
if (charmap.nodes[nodeIdx].value == value) {
299+
if (revMapping.empty()) {
300+
revMapping = mapping;
301+
} else {
302+
revMapping.clear();
303+
return false;
304+
}
305+
}
306+
return true;
307+
});
308+
return revMapping;
309+
}

src/asm/lexer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,19 +240,26 @@ static std::unordered_map<std::string, int, CaseInsensitive, CaseInsensitive> ke
240240
{"TZCOUNT", T_(OP_TZCOUNT) },
241241

242242
{"STRCAT", T_(OP_STRCAT) },
243+
{"STRCHAR", T_(OP_STRCHAR) },
243244
{"STRCMP", T_(OP_STRCMP) },
245+
{"STRFIND", T_(OP_STRFIND) },
244246
{"STRFMT", T_(OP_STRFMT) },
245247
{"STRIN", T_(OP_STRIN) },
246248
{"STRLEN", T_(OP_STRLEN) },
247249
{"STRLWR", T_(OP_STRLWR) },
250+
{"STRRFIND", T_(OP_STRRFIND) },
248251
{"STRRIN", T_(OP_STRRIN) },
249252
{"STRRPL", T_(OP_STRRPL) },
253+
{"STRSLICE", T_(OP_STRSLICE) },
250254
{"STRSUB", T_(OP_STRSUB) },
251255
{"STRUPR", T_(OP_STRUPR) },
252256

257+
{"CHARCMP", T_(OP_CHARCMP) },
253258
{"CHARLEN", T_(OP_CHARLEN) },
259+
{"CHARSIZE", T_(OP_CHARSIZE) },
254260
{"CHARSUB", T_(OP_CHARSUB) },
255261
{"INCHARMAP", T_(OP_INCHARMAP) },
262+
{"REVCHAR", T_(OP_REVCHAR) },
256263

257264
{"INCLUDE", T_(POP_INCLUDE) },
258265
{"PRINT", T_(POP_PRINT) },

0 commit comments

Comments
 (0)