Skip to content

Commit b2e865e

Browse files
authored
Disable EQUS expansion for raw symbols (by parsing them as strings) (#1648)
1 parent 3feb75f commit b2e865e

8 files changed

+206
-17
lines changed

man/rgbasm.5

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,7 +1125,9 @@ Additionally, label names can contain up to a single dot
11251125
.Ql \&. ,
11261126
which may not be the first character.
11271127
.Pp
1128-
A symbol cannot have the same name as a reserved keyword, unless it is prefixed by a hash
1128+
A symbol cannot have the same name as a reserved keyword, unless its name is a
1129+
.Dq raw identifier
1130+
prefixed by a hash
11291131
.Sq # .
11301132
For example,
11311133
.Ql #load
@@ -1300,7 +1302,7 @@ it at the same time.
13001302
below).
13011303
.Ss Numeric constants
13021304
.Ic EQU
1303-
is used to define immutable numeric symbols.
1305+
is used to define numeric constant symbols.
13041306
Unlike
13051307
.Sq =
13061308
above, constants defined this way cannot be redefined.
@@ -1408,6 +1410,8 @@ This expansion is disabled in a few contexts:
14081410
and
14091411
.Ql MACRO name
14101412
will not expand string constants in their names.
1413+
Expansion is also disabled if the string constant's name is a raw identifier prefixed by a hash
1414+
.Sq # .
14111415
.Bd -literal -offset indent
14121416
DEF COUNTREG EQUS "[hl+]"
14131417
ld a, COUNTREG
@@ -1873,7 +1877,7 @@ being the second, and so on. Since there are only nine digits, you can only use
18731877
To use the rest, you put the argument number in angle brackets, like
18741878
.Ic \e<10> .
18751879
.Pp
1876-
This bracketed syntax supports decimal numbers and numeric constant symbols.
1880+
This bracketed syntax supports decimal numbers and numeric symbols.
18771881
For example,
18781882
.Ql \e<_NARG>
18791883
will get the last argument.

src/asm/lexer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1915,8 +1915,8 @@ static Token yylex_NORMAL() {
19151915
// `token` is either a `SYMBOL` or a `LOCAL`, and both have a `std::string` value.
19161916
assume(token.value.holds<std::string>());
19171917

1918-
// Local symbols cannot be string expansions
1919-
if (token.type == T_(SYMBOL) && lexerState->expandStrings) {
1918+
// Raw symbols and local symbols cannot be string expansions
1919+
if (!raw && token.type == T_(SYMBOL) && lexerState->expandStrings) {
19201920
// Attempt string expansion
19211921
Symbol const *sym = sym_FindExactSymbol(token.value.get<std::string>());
19221922

src/asm/parser.y

Lines changed: 99 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,17 @@
8080
static void failAssert(AssertionType type);
8181
static void failAssertMsg(AssertionType type, std::string const &message);
8282

83+
template <typename N, typename S>
84+
static auto handleSymbolByType(std::string const &symName, N numCallback, S strCallback) {
85+
if (Symbol *sym = sym_FindScopedSymbol(symName); sym && sym->type == SYM_EQUS) {
86+
return strCallback(*sym->getEqus());
87+
} else {
88+
Expression expr;
89+
expr.makeSymbol(symName);
90+
return numCallback(expr);
91+
}
92+
}
93+
8394
// The CPU encodes instructions in a logical way, so most instructions actually follow patterns.
8495
// These enums thus help with bit twiddling to compute opcodes.
8596
enum { REG_B, REG_C, REG_D, REG_E, REG_H, REG_L, REG_HL_IND, REG_A };
@@ -343,6 +354,7 @@
343354

344355
// Strings
345356
%type <std::string> string
357+
%type <std::string> string_literal
346358
%type <std::string> strcat_args
347359
// Strings used for identifiers
348360
%type <std::string> def_id
@@ -1210,10 +1222,17 @@ print_expr:
12101222
relocexpr_no_str {
12111223
printf("$%" PRIX32, $1.getConstVal());
12121224
}
1213-
| string {
1225+
| string_literal {
12141226
// Allow printing NUL characters
12151227
fwrite($1.data(), 1, $1.length(), stdout);
12161228
}
1229+
| scoped_sym {
1230+
handleSymbolByType(
1231+
$1,
1232+
[](Expression const &expr) { printf("$%" PRIX32, expr.getConstVal()); },
1233+
[](std::string const &str) { fwrite(str.data(), 1, str.length(), stdout); }
1234+
);
1235+
}
12171236
;
12181237

12191238
reloc_3bit:
@@ -1233,10 +1252,23 @@ constlist_8bit_entry:
12331252
$1.checkNBit(8);
12341253
sect_RelByte($1, 0);
12351254
}
1236-
| string {
1255+
| string_literal {
12371256
std::vector<int32_t> output = charmap_Convert($1);
12381257
sect_ByteString(output);
12391258
}
1259+
| scoped_sym {
1260+
handleSymbolByType(
1261+
$1,
1262+
[](Expression const &expr) {
1263+
expr.checkNBit(8);
1264+
sect_RelByte(expr, 0);
1265+
},
1266+
[](std::string const &str) {
1267+
std::vector<int32_t> output = charmap_Convert(str);
1268+
sect_ByteString(output);
1269+
}
1270+
);
1271+
}
12401272
;
12411273

12421274
constlist_16bit:
@@ -1249,10 +1281,23 @@ constlist_16bit_entry:
12491281
$1.checkNBit(16);
12501282
sect_RelWord($1, 0);
12511283
}
1252-
| string {
1284+
| string_literal {
12531285
std::vector<int32_t> output = charmap_Convert($1);
12541286
sect_WordString(output);
12551287
}
1288+
| scoped_sym {
1289+
handleSymbolByType(
1290+
$1,
1291+
[](Expression const &expr) {
1292+
expr.checkNBit(16);
1293+
sect_RelWord(expr, 0);
1294+
},
1295+
[](std::string const &str) {
1296+
std::vector<int32_t> output = charmap_Convert(str);
1297+
sect_WordString(output);
1298+
}
1299+
);
1300+
}
12561301
;
12571302

12581303
constlist_32bit:
@@ -1264,10 +1309,20 @@ constlist_32bit_entry:
12641309
relocexpr_no_str {
12651310
sect_RelLong($1, 0);
12661311
}
1267-
| string {
1312+
| string_literal {
12681313
std::vector<int32_t> output = charmap_Convert($1);
12691314
sect_LongString(output);
12701315
}
1316+
| scoped_sym {
1317+
handleSymbolByType(
1318+
$1,
1319+
[](Expression const &expr) { sect_RelLong(expr, 0); },
1320+
[](std::string const &str) {
1321+
std::vector<int32_t> output = charmap_Convert(str);
1322+
sect_LongString(output);
1323+
}
1324+
);
1325+
}
12711326
;
12721327

12731328
reloc_8bit:
@@ -1299,17 +1354,26 @@ relocexpr:
12991354
relocexpr_no_str {
13001355
$$ = std::move($1);
13011356
}
1302-
| string {
1357+
| string_literal {
13031358
std::vector<int32_t> output = charmap_Convert($1);
13041359
$$.makeNumber(strToNum(output));
13051360
}
1361+
| scoped_sym {
1362+
$$ = handleSymbolByType(
1363+
$1,
1364+
[](Expression const &expr) { return expr; },
1365+
[](std::string const &str) {
1366+
std::vector<int32_t> output = charmap_Convert(str);
1367+
Expression expr;
1368+
expr.makeNumber(strToNum(output));
1369+
return expr;
1370+
}
1371+
);
1372+
}
13061373
;
13071374

13081375
relocexpr_no_str:
1309-
scoped_sym {
1310-
$$.makeSymbol($1);
1311-
}
1312-
| NUMBER {
1376+
NUMBER {
13131377
$$.makeNumber($1);
13141378
}
13151379
| OP_LOGICNOT relocexpr %prec NEG {
@@ -1403,7 +1467,7 @@ relocexpr_no_str:
14031467
// '@' is also a SYMBOL; it is handled here
14041468
$$.makeBankSymbol($3);
14051469
}
1406-
| OP_BANK LPAREN string RPAREN {
1470+
| OP_BANK LPAREN string_literal RPAREN {
14071471
$$.makeBankSection($3);
14081472
}
14091473
| OP_SIZEOF LPAREN string RPAREN {
@@ -1540,7 +1604,7 @@ precision_arg:
15401604
}
15411605
;
15421606

1543-
string:
1607+
string_literal:
15441608
STRING {
15451609
$$ = std::move($1);
15461610
}
@@ -1625,6 +1689,19 @@ string:
16251689
}
16261690
;
16271691

1692+
string:
1693+
string_literal {
1694+
$$ = std::move($1);
1695+
}
1696+
| scoped_sym {
1697+
if (Symbol *sym = sym_FindScopedSymbol($1); sym && sym->type == SYM_EQUS) {
1698+
$$ = *sym->getEqus();
1699+
} else {
1700+
::error("'%s' is not a string symbol\n", $1.c_str());
1701+
}
1702+
}
1703+
;
1704+
16281705
strcat_args:
16291706
string {
16301707
$$ = std::move($1);
@@ -1649,10 +1726,20 @@ strfmt_va_args:
16491726
$$ = std::move($1);
16501727
$$.args.push_back(static_cast<uint32_t>($3.getConstVal()));
16511728
}
1652-
| strfmt_va_args COMMA string {
1729+
| strfmt_va_args COMMA string_literal {
16531730
$$ = std::move($1);
16541731
$$.args.push_back(std::move($3));
16551732
}
1733+
| strfmt_va_args COMMA scoped_sym {
1734+
$$ = std::move($1);
1735+
handleSymbolByType(
1736+
$3,
1737+
[&](Expression const &expr) {
1738+
$$.args.push_back(static_cast<uint32_t>(expr.getConstVal()));
1739+
},
1740+
[&](std::string const &str) { $$.args.push_back(str); }
1741+
);
1742+
}
16561743
;
16571744

16581745
section:

test/asm/raw-string-symbol-errors.asm

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
def n equ 42
2+
def s equs "hello"
3+
macro m
4+
endm
5+
6+
assert (#n) == 42
7+
assert (#s) == $656c6c6f
8+
assert (#m) == 0
9+
assert (#u) == 0
10+
11+
assert strlen(#n) == 0
12+
assert strlen(#s) == 5
13+
assert strlen(#m) == 0
14+
assert strlen(#u) == 0
15+
16+
def d_n = (#n)
17+
def d_s = (#s)
18+
def d_m = (#m)
19+
def d_u = (#u)
20+
21+
def s_n equs #n
22+
def s_s equs #s
23+
def s_m equs #m
24+
def s_u equs #u
25+
26+
purge #s
27+
purge #s
28+
assert (#s) == 0
29+
assert strlen(#s) == 0

test/asm/raw-string-symbol-errors.err

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
warning: raw-string-symbol-errors.asm(7): [-Wobsolete]
2+
Treating multi-unit strings as numbers is deprecated
3+
error: raw-string-symbol-errors.asm(8):
4+
'm' is not a numeric symbol
5+
error: raw-string-symbol-errors.asm(11):
6+
'n' is not a string symbol
7+
error: raw-string-symbol-errors.asm(13):
8+
'm' is not a string symbol
9+
error: raw-string-symbol-errors.asm(14):
10+
'u' is not a string symbol
11+
warning: raw-string-symbol-errors.asm(17): [-Wobsolete]
12+
Treating multi-unit strings as numbers is deprecated
13+
error: raw-string-symbol-errors.asm(18):
14+
'm' is not a numeric symbol
15+
error: raw-string-symbol-errors.asm(19):
16+
Expected constant expression: 'u' is not constant at assembly time
17+
error: raw-string-symbol-errors.asm(21):
18+
'n' is not a string symbol
19+
error: raw-string-symbol-errors.asm(23):
20+
'm' is not a string symbol
21+
error: raw-string-symbol-errors.asm(24):
22+
'u' is not a string symbol
23+
error: raw-string-symbol-errors.asm(27):
24+
's' was already purged
25+
error: raw-string-symbol-errors.asm(29):
26+
's' is not a string symbol
27+
error: Assembly aborted (11 errors)!

test/asm/raw-string-symbols.asm

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
opt Wno-unmapped-char
2+
3+
def hello equs "world"
4+
def name equs "hello"
5+
println "{name}"
6+
println #name
7+
assert !strcmp(strsub(#name, 1, 4), "hell")
8+
assert strlen(#hello) == charlen(#hello)
9+
assert strlen("{hello}") == 5
10+
11+
def multi equs """the quick
12+
brown fox"""
13+
println #multi
14+
15+
def char equs "A"
16+
def n = #char
17+
println n
18+
def n = (#char)
19+
println n
20+
def n = 1 + #char
21+
println n
22+
assert #char == $41
23+
24+
def fmt equs "%s %s %d"
25+
println strfmt(#fmt, #name, #hello, (#char))
26+
27+
purge #name
28+
assert !def(name) && !def(#name) && def(hello)
29+
30+
section "test", rom0
31+
#label:
32+
db #hello
33+
dw #hello
34+
dw BANK(#label), #label

test/asm/raw-string-symbols.out

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
hello
2+
hello
3+
the quick
4+
brown fox
5+
$41
6+
$41
7+
$42
8+
hello world 65

test/asm/raw-string-symbols.out.bin

19 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)