From fdf6c55929746f146ed7db314015a36a13dfbc18 Mon Sep 17 00:00:00 2001 From: Rangi42 Date: Wed, 8 Jan 2025 17:18:40 -0500 Subject: [PATCH] Implement [[ fragment literals ]] This feature is referred to as "code/data literals" in ASMotor, and simply as "literals" in some older assemblers like MIDAS for the PDP-10. RGBASM already had the "section fragments" feature for keeping disparate contents together when linked, so these worked naturally as "fragment literals". --- include/asm/lexer.hpp | 1 + include/asm/section.hpp | 2 + man/rgbasm.5 | 64 ++++++++++++++++++++ src/asm/lexer.cpp | 28 +++++++-- src/asm/parser.y | 26 +++++++- src/asm/section.cpp | 44 ++++++++++++++ src/asm/symbol.cpp | 4 +- test/asm/code-after-endm-endr-endc.err | 8 +-- test/asm/fragment-literal-in-load.asm | 14 +++++ test/asm/fragment-literal-in-load.err | 2 + test/asm/fragment-literal-in-ram.asm | 9 +++ test/asm/fragment-literal-in-ram.err | 2 + test/asm/fragment-literal-in-ram.out | 1 + test/asm/fragment-literal-in-union.asm | 5 ++ test/asm/fragment-literal-in-union.err | 2 + test/asm/syntax-error-after-syntax-error.err | 2 +- 16 files changed, 201 insertions(+), 13 deletions(-) create mode 100644 test/asm/fragment-literal-in-load.asm create mode 100644 test/asm/fragment-literal-in-load.err create mode 100644 test/asm/fragment-literal-in-ram.asm create mode 100644 test/asm/fragment-literal-in-ram.err create mode 100644 test/asm/fragment-literal-in-ram.out create mode 100644 test/asm/fragment-literal-in-union.asm create mode 100644 test/asm/fragment-literal-in-union.err diff --git a/include/asm/lexer.hpp b/include/asm/lexer.hpp index 9ca6cfa5e3..7b0c3a5f07 100644 --- a/include/asm/lexer.hpp +++ b/include/asm/lexer.hpp @@ -85,6 +85,7 @@ struct LexerState { uint32_t lineNo; uint32_t colNo; int lastToken; + int nextToken; std::deque ifStack; diff --git a/include/asm/section.hpp b/include/asm/section.hpp index a89a4b0cf7..48fd5f1755 100644 --- a/include/asm/section.hpp +++ b/include/asm/section.hpp @@ -104,4 +104,6 @@ void sect_PushSection(); void sect_PopSection(); void sect_CheckStack(); +std::string sect_PushSectionFragmentLiteral(); + #endif // RGBDS_ASM_SECTION_HPP diff --git a/man/rgbasm.5 b/man/rgbasm.5 index b865e628b1..6142d28682 100644 --- a/man/rgbasm.5 +++ b/man/rgbasm.5 @@ -1075,6 +1075,70 @@ first, followed by the one from and the one from .Ql bar.o last. +.Ss Fragment literals +Fragment literals are useful for short blocks of code or data that are only referenced once. +They are section fragments created by surrounding instructions or directives with +.Ql [[ +double brackets +.Ql ]] , +without a separate +.Ic SECTION FRAGMENT +declaration. +.Pp +The content of a fragment literal becomes a +.Ic SECTION FRAGMENT , +sharing the same name and bank as its parent ROM section, but without any other constraints. +The parent section also becomes a +.Ic FRAGMENT +if it was not one already, so that it can be merged with its fragment literals. +RGBLINK merges the fragments in no particular order. +.Pp +A fragment literal can take the place of any 16-bit integer constant +.Ql n16 +from the +.Xr gbz80 7 +documentation, as well as a +.Ic DW +item. +The fragment literal then evaluates to its starting address. +For example, you can +.Ic CALL +or +.Ic JP +to a fragment literal. +.Pp +This code using named labels: +.Bd -literal -offset indent +FortyTwo: + call Sub1 + jp Sub2 +Sub1: + ld a, [Twenty] + ret +Sub2: + inc a + add a + ret +Twenty: db 20 +dw FortyTwo +.Ed +.Pp +is equivalent to this code using fragment literals: +.Bd -literal -offset indent +dw [[ + call [[ + ld a, [ [[db 20]] ] + ret + ]] + jp [[ + inc a + add a + ret + ]] +]] +.Ed +.Pp +The difference is that the example using fragment literals does not declare a particular order for its pieces. .Sh SYMBOLS RGBDS supports several types of symbols: .Bl -hang diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index aa8e12878d..0f327c9a54 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -346,6 +346,7 @@ void LexerState::clear(uint32_t lineNo_) { mode = LEXER_NORMAL; atLineStart = true; // yylex() will init colNo due to this lastToken = T_(YYEOF); + nextToken = 0; ifStack.clear(); @@ -1146,6 +1147,7 @@ static uint32_t readGfxConstant() { static bool startsIdentifier(int c) { // Anonymous labels internally start with '!' + // Fragment literal labels internally start with '$' return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_'; } @@ -1573,6 +1575,11 @@ static void appendStringLiteral(std::string &str, bool raw) { static Token yylex_SKIP_TO_ENDC(); // forward declaration for yylex_NORMAL static Token yylex_NORMAL() { + if (int nextToken = lexerState->nextToken; nextToken) { + lexerState->nextToken = 0; + return Token(nextToken); + } + for (;;) { int c = nextChar(); @@ -1596,10 +1603,6 @@ static Token yylex_NORMAL() { return Token(T_(ID), symName); } - case '[': - return Token(T_(LBRACK)); - case ']': - return Token(T_(RBRACK)); case '(': return Token(T_(LPAREN)); case ')': @@ -1609,6 +1612,23 @@ static Token yylex_NORMAL() { // Handle ambiguous 1- or 2-char tokens + case '[': // Either [ or [[ + if (peek() == '[') { + shiftChar(); + return Token(T_(LBRACKS)); + } + return Token(T_(LBRACK)); + + case ']': // Either ] or ]] + if (peek() == ']') { + shiftChar(); + // `[[ Fragment literals ]]` inject an EOL token to end their contents + // even without a newline. Retroactively lex the `]]` after it. + lexerState->nextToken = T_(RBRACKS); + return Token(T_(EOL)); + } + return Token(T_(RBRACK)); + case '+': // Either += or ADD if (peek() == '=') { shiftChar(); diff --git a/src/asm/parser.y b/src/asm/parser.y index 437c6afa5e..1729e0b382 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -105,11 +105,13 @@ %token YYEOF 0 "end of file" %token NEWLINE "end of line" %token EOB "end of buffer" +%token EOL "end of fragment literal" // General punctuation %token COMMA "," %token COLON ":" DOUBLE_COLON "::" %token LBRACK "[" RBRACK "]" +%token LBRACKS "[[" RBRACKS "]]" %token LPAREN "(" RPAREN ")" // Arithmetic operators @@ -362,6 +364,8 @@ %type redef_equs %type scoped_id %type scoped_anon_id +%type fragment_literal +%type fragment_literal_name // SM83 instruction parameters %type reg_r @@ -435,7 +439,7 @@ line: | line_directive // Directives that manage newlines themselves ; -endofline: NEWLINE | EOB; +endofline: NEWLINE | EOB | EOL; // For "logistical" reasons, these directives must manage newlines themselves. // This is because we need to switch the lexer's mode *after* the newline has been read, @@ -1310,6 +1314,9 @@ reloc_16bit: $$ = std::move($1); $$.checkNBit(16); } + | fragment_literal { + $$.makeSymbol($1); + } ; reloc_16bit_no_str: @@ -1317,6 +1324,23 @@ reloc_16bit_no_str: $$ = std::move($1); $$.checkNBit(16); } + | fragment_literal { + $$.makeSymbol($1); + } +; + +fragment_literal: + LBRACKS fragment_literal_name asm_file RBRACKS { + sect_PopSection(); + $$ = std::move($2); + } +; + +fragment_literal_name: + %empty { + $$ = sect_PushSectionFragmentLiteral(); + sym_AddLabel($$); + } ; relocexpr: diff --git a/src/asm/section.cpp b/src/asm/section.cpp index 25553888af..f82e6328fd 100644 --- a/src/asm/section.cpp +++ b/src/asm/section.cpp @@ -22,6 +22,8 @@ #include "asm/symbol.hpp" #include "asm/warning.hpp" +using namespace std::literals; + uint8_t fillByte; struct UnionStackEntry { @@ -993,3 +995,45 @@ void sect_EndSection() { currentSection = nullptr; sym_ResetCurrentLabelScopes(); } + +std::string sect_PushSectionFragmentLiteral() { + static uint64_t nextFragmentLiteralID = 0; + + // Like `requireCodeSection` but fatal + if (!currentSection) + fatalerror("Cannot output fragment literals outside of a SECTION\n"); + if (!sect_HasData(currentSection->type)) + fatalerror( + "Section '%s' cannot contain fragment literals (not ROM0 or ROMX)\n", + currentSection->name.c_str() + ); + + if (currentLoadSection) + fatalerror("`LOAD` blocks cannot contain fragment literals\n"); + if (currentSection->modifier == SECTION_UNION) + fatalerror("`SECTION UNION` cannot contain fragment literals\n"); + + // A section containing a fragment literal has to become a fragment too + currentSection->modifier = SECTION_FRAGMENT; + + Section *parent = currentSection; + sect_PushSection(); // Resets `currentSection` + + Section *sect = createSection( + parent->name, + parent->type, + UINT32_MAX, + parent->bank == 0 ? UINT32_MAX : parent->bank, + 0, + 0, + SECTION_FRAGMENT + ); + + changeSection(); + curOffset = sect->size; + loadOffset = 0; + currentSection = sect; + + // Return a symbol ID to use for the address of this section fragment + return "$"s + std::to_string(nextFragmentLiteralID++); +} diff --git a/src/asm/symbol.cpp b/src/asm/symbol.cpp index 1ae86aba5b..d350a25490 100644 --- a/src/asm/symbol.cpp +++ b/src/asm/symbol.cpp @@ -539,9 +539,7 @@ std::string sym_MakeAnonLabelName(uint32_t ofs, bool neg) { id = anonLabelID + ofs; } - std::string anon("!"); - anon += std::to_string(id); - return anon; + return "!"s + std::to_string(id); } void sym_Export(std::string const &symName) { diff --git a/test/asm/code-after-endm-endr-endc.err b/test/asm/code-after-endm-endr-endc.err index 0220c1a5a3..a0ee347c15 100644 --- a/test/asm/code-after-endm-endr-endc.err +++ b/test/asm/code-after-endm-endr-endc.err @@ -1,15 +1,15 @@ error: code-after-endm-endr-endc.asm(6): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal error: code-after-endm-endr-endc.asm(7): Macro "mac" not defined error: code-after-endm-endr-endc.asm(12): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal error: code-after-endm-endr-endc.asm(17): syntax error, unexpected PRINTLN, expecting end of line error: code-after-endm-endr-endc.asm(19): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal error: code-after-endm-endr-endc.asm(23): syntax error, unexpected PRINTLN, expecting end of line error: code-after-endm-endr-endc.asm(25): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal error: Assembly aborted (7 errors)! diff --git a/test/asm/fragment-literal-in-load.asm b/test/asm/fragment-literal-in-load.asm new file mode 100644 index 0000000000..87768688c6 --- /dev/null +++ b/test/asm/fragment-literal-in-load.asm @@ -0,0 +1,14 @@ +SECTION "OAMDMACode", ROM0 +OAMDMACode: +LOAD "hOAMDMA", HRAM +hOAMDMA:: + ldh [$ff46], a + ld a, 40 + jp [[ +: dec a + jr nz, :- + ret + ]] +.end +ENDL +OAMDMACodeEnd: diff --git a/test/asm/fragment-literal-in-load.err b/test/asm/fragment-literal-in-load.err new file mode 100644 index 0000000000..c103d4b21b --- /dev/null +++ b/test/asm/fragment-literal-in-load.err @@ -0,0 +1,2 @@ +FATAL: fragment-literal-in-load.asm(7): + `LOAD` blocks cannot contain fragment literals diff --git a/test/asm/fragment-literal-in-ram.asm b/test/asm/fragment-literal-in-ram.asm new file mode 100644 index 0000000000..4b36a16a96 --- /dev/null +++ b/test/asm/fragment-literal-in-ram.asm @@ -0,0 +1,9 @@ +SECTION "RAM", WRAM0 + +wFoo:: db +wBar:: ds 3 + println "ok" +wQux:: dw [[ + ds 4 + println "inline" +]] diff --git a/test/asm/fragment-literal-in-ram.err b/test/asm/fragment-literal-in-ram.err new file mode 100644 index 0000000000..e4ddc2f10b --- /dev/null +++ b/test/asm/fragment-literal-in-ram.err @@ -0,0 +1,2 @@ +FATAL: fragment-literal-in-ram.asm(6): + Section 'RAM' cannot contain fragment literals (not ROM0 or ROMX) diff --git a/test/asm/fragment-literal-in-ram.out b/test/asm/fragment-literal-in-ram.out new file mode 100644 index 0000000000..9766475a41 --- /dev/null +++ b/test/asm/fragment-literal-in-ram.out @@ -0,0 +1 @@ +ok diff --git a/test/asm/fragment-literal-in-union.asm b/test/asm/fragment-literal-in-union.asm new file mode 100644 index 0000000000..9f2e75cf8f --- /dev/null +++ b/test/asm/fragment-literal-in-union.asm @@ -0,0 +1,5 @@ +SECTION UNION "U", ROM0 + db $11 + dw [[ db $22 ]] +SECTION UNION "U", ROM0 + db $33 diff --git a/test/asm/fragment-literal-in-union.err b/test/asm/fragment-literal-in-union.err new file mode 100644 index 0000000000..c368c377a2 --- /dev/null +++ b/test/asm/fragment-literal-in-union.err @@ -0,0 +1,2 @@ +FATAL: fragment-literal-in-union.asm(3): + `SECTION UNION` cannot contain fragment literals diff --git a/test/asm/syntax-error-after-syntax-error.err b/test/asm/syntax-error-after-syntax-error.err index a41f723a70..2112ad0961 100644 --- a/test/asm/syntax-error-after-syntax-error.err +++ b/test/asm/syntax-error-after-syntax-error.err @@ -7,5 +7,5 @@ error: syntax-error-after-syntax-error.asm(6): error: syntax-error-after-syntax-error.asm(9): syntax error, unexpected : error: syntax-error-after-syntax-error.asm(10): - syntax error, unexpected stop, expecting end of line or end of buffer or :: + syntax error, unexpected stop, expecting end of line or end of buffer or end of fragment literal or :: error: Assembly aborted (5 errors)!