From 6a327a1bc3df13cd751f134554f9a731efb08847 Mon Sep 17 00:00:00 2001 From: Rangi42 Date: Wed, 8 Jan 2025 17:18:40 -0500 Subject: [PATCH] Implement [[ fragment literals ]] This feature is referred to as "code/data literals" in ASMotor, and simply as "literals" in some older assemblers like MIDAS for the PDP-10. RGBASM already had the "section fragments" feature for keeping disparate contents together when linked, so these worked naturally as "fragment literals". --- include/asm/lexer.hpp | 1 + include/asm/section.hpp | 2 + man/rgbasm.5 | 66 ++++++++++++++++++- src/asm/lexer.cpp | 28 ++++++-- src/asm/parser.y | 26 +++++++- src/asm/section.cpp | 61 +++++++++++++++++ src/asm/symbol.cpp | 4 +- test/asm/code-after-endm-endr-endc.err | 8 +-- test/asm/fragment-literal-in-load.asm | 14 ++++ test/asm/fragment-literal-in-load.err | 2 + test/asm/fragment-literal-in-ram.asm | 9 +++ test/asm/fragment-literal-in-ram.err | 2 + test/asm/fragment-literal-in-ram.out | 1 + test/asm/fragment-literal-in-union.asm | 5 ++ test/asm/fragment-literal-in-union.err | 2 + test/asm/fragment-literals.asm | 62 +++++++++++++++++ test/asm/fragment-literals.out.bin | Bin 0 -> 89 bytes test/asm/syntax-error-after-syntax-error.err | 2 +- 18 files changed, 281 insertions(+), 14 deletions(-) create mode 100644 test/asm/fragment-literal-in-load.asm create mode 100644 test/asm/fragment-literal-in-load.err create mode 100644 test/asm/fragment-literal-in-ram.asm create mode 100644 test/asm/fragment-literal-in-ram.err create mode 100644 test/asm/fragment-literal-in-ram.out create mode 100644 test/asm/fragment-literal-in-union.asm create mode 100644 test/asm/fragment-literal-in-union.err create mode 100644 test/asm/fragment-literals.asm create mode 100644 test/asm/fragment-literals.out.bin diff --git a/include/asm/lexer.hpp b/include/asm/lexer.hpp index 9ca6cfa5e..7b0c3a5f0 100644 --- a/include/asm/lexer.hpp +++ b/include/asm/lexer.hpp @@ -85,6 +85,7 @@ struct LexerState { uint32_t lineNo; uint32_t colNo; int lastToken; + int nextToken; std::deque ifStack; diff --git a/include/asm/section.hpp b/include/asm/section.hpp index a89a4b0cf..48fd5f175 100644 --- a/include/asm/section.hpp +++ b/include/asm/section.hpp @@ -104,4 +104,6 @@ void sect_PushSection(); void sect_PopSection(); void sect_CheckStack(); +std::string sect_PushSectionFragmentLiteral(); + #endif // RGBDS_ASM_SECTION_HPP diff --git a/man/rgbasm.5 b/man/rgbasm.5 index b865e628b..9372cd830 100644 --- a/man/rgbasm.5 +++ b/man/rgbasm.5 @@ -1052,7 +1052,7 @@ and .Ic WRAMX types are still considered different. .It -Different constraints (alignment, bank, etc.) can be specified for each unionized section declaration, but they must all be compatible. +Different constraints (alignment, bank, etc.) can be specified for each section fragment declaration, but they must all be compatible. For example, alignment must be compatible with any fixed address, all specified banks must be the same, etc. .It A section fragment may not be unionized; after all, that wouldn't make much sense. @@ -1075,6 +1075,70 @@ first, followed by the one from and the one from .Ql bar.o last. +.Ss Fragment literals +Fragment literals are useful for short blocks of code or data that are only referenced once. +They are section fragments created by surrounding instructions or directives with +.Ql [[ +double brackets +.Ql ]] , +without a separate +.Ic SECTION FRAGMENT +declaration. +.Pp +The content of a fragment literal becomes a +.Ic SECTION FRAGMENT , +sharing the same name and bank as its parent ROM section, but without any other constraints. +The parent section also becomes a +.Ic FRAGMENT +if it was not one already, so that it can be merged with its fragment literals. +RGBLINK merges the fragments in no particular order. +.Pp +A fragment literal can take the place of any 16-bit integer constant +.Ql n16 +from the +.Xr gbz80 7 +documentation, as well as a +.Ic DW +item. +The fragment literal then evaluates to its starting address. +For example, you can +.Ic CALL +or +.Ic JP +to a fragment literal. +.Pp +This code using named labels: +.Bd -literal -offset indent +FortyTwo: + call Sub1 + jp Sub2 +Sub1: + ld a, [Twenty] + ret +Sub2: + inc a + add a + ret +Twenty: db 20 +dw FortyTwo +.Ed +.Pp +is equivalent to this code using fragment literals: +.Bd -literal -offset indent +dw [[ + call [[ + ld a, [ [[db 20]] ] + ret + ]] + jp [[ + inc a + add a + ret + ]] +]] +.Ed +.Pp +The difference is that the example using fragment literals does not declare a particular order for its pieces. .Sh SYMBOLS RGBDS supports several types of symbols: .Bl -hang diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 50b612882..808e4f411 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -346,6 +346,7 @@ void LexerState::clear(uint32_t lineNo_) { mode = LEXER_NORMAL; atLineStart = true; // yylex() will init colNo due to this lastToken = T_(YYEOF); + nextToken = 0; ifStack.clear(); @@ -1146,6 +1147,7 @@ static uint32_t readGfxConstant() { static bool startsIdentifier(int c) { // Anonymous labels internally start with '!' + // Fragment literal labels internally start with '$' return (c <= 'Z' && c >= 'A') || (c <= 'z' && c >= 'a') || c == '.' || c == '_'; } @@ -1573,6 +1575,11 @@ static void appendStringLiteral(std::string &str, bool raw) { static Token yylex_SKIP_TO_ENDC(); // forward declaration for yylex_NORMAL static Token yylex_NORMAL() { + if (int nextToken = lexerState->nextToken; nextToken) { + lexerState->nextToken = 0; + return Token(nextToken); + } + for (;;) { int c = nextChar(); @@ -1596,10 +1603,6 @@ static Token yylex_NORMAL() { return Token(T_(ID), symName); } - case '[': - return Token(T_(LBRACK)); - case ']': - return Token(T_(RBRACK)); case '(': return Token(T_(LPAREN)); case ')': @@ -1609,6 +1612,23 @@ static Token yylex_NORMAL() { // Handle ambiguous 1- or 2-char tokens + case '[': // Either [ or [[ + if (peek() == '[') { + shiftChar(); + return Token(T_(LBRACKS)); + } + return Token(T_(LBRACK)); + + case ']': // Either ] or ]] + if (peek() == ']') { + shiftChar(); + // `[[ Fragment literals ]]` inject an EOL token to end their contents + // even without a newline. Retroactively lex the `]]` after it. + lexerState->nextToken = T_(RBRACKS); + return Token(T_(EOL)); + } + return Token(T_(RBRACK)); + case '+': // Either += or ADD if (peek() == '=') { shiftChar(); diff --git a/src/asm/parser.y b/src/asm/parser.y index 2c8683c6a..62a33bfb0 100644 --- a/src/asm/parser.y +++ b/src/asm/parser.y @@ -105,11 +105,13 @@ %token YYEOF 0 "end of file" %token NEWLINE "end of line" %token EOB "end of buffer" +%token EOL "end of fragment literal" // General punctuation %token COMMA "," %token COLON ":" DOUBLE_COLON "::" %token LBRACK "[" RBRACK "]" +%token LBRACKS "[[" RBRACKS "]]" %token LPAREN "(" RPAREN ")" // Arithmetic operators @@ -362,6 +364,8 @@ %type redef_equs %type scoped_id %type scoped_anon_id +%type fragment_literal +%type fragment_literal_name // SM83 instruction parameters %type reg_r @@ -435,7 +439,7 @@ line: | line_directive // Directives that manage newlines themselves ; -endofline: NEWLINE | EOB; +endofline: NEWLINE | EOB | EOL; // For "logistical" reasons, these directives must manage newlines themselves. // This is because we need to switch the lexer's mode *after* the newline has been read, @@ -1310,6 +1314,9 @@ reloc_16bit: $$ = std::move($1); $$.checkNBit(16); } + | fragment_literal { + $$.makeSymbol($1); + } ; reloc_16bit_no_str: @@ -1317,6 +1324,23 @@ reloc_16bit_no_str: $$ = std::move($1); $$.checkNBit(16); } + | fragment_literal { + $$.makeSymbol($1); + } +; + +fragment_literal: + LBRACKS fragment_literal_name asm_file RBRACKS { + sect_PopSection(); + $$ = std::move($2); + } +; + +fragment_literal_name: + %empty { + $$ = sect_PushSectionFragmentLiteral(); + sym_AddLabel($$); + } ; relocexpr: diff --git a/src/asm/section.cpp b/src/asm/section.cpp index 25553888a..5b5bc2eae 100644 --- a/src/asm/section.cpp +++ b/src/asm/section.cpp @@ -22,6 +22,8 @@ #include "asm/symbol.hpp" #include "asm/warning.hpp" +using namespace std::literals; + uint8_t fillByte; struct UnionStackEntry { @@ -298,6 +300,32 @@ static Section *createSection( return § } +// Create a new section fragment literal, not yet in the list. +static Section *createSectionFragmentLiteral(Section const &parent) { + // Add the new section to the list, but do not update the map + Section § = sectionList.emplace_back(); + assume(sectionMap.find(parent.name) != sectionMap.end()); + + sect.name = parent.name; + sect.type = parent.type; + sect.modifier = SECTION_FRAGMENT; + sect.src = fstk_GetFileStack(); + sect.fileLine = lexer_GetLineNo(); + sect.size = 0; + sect.org = UINT32_MAX; + sect.bank = parent.bank == 0 ? UINT32_MAX : parent.bank; + sect.align = 0; + sect.alignOfs = 0; + + out_RegisterNode(sect.src); + + // Section fragment literals must be ROM sections. + assume(sect_HasData(sect.type)); + sect.data.resize(sectionTypeInfo[sect.type].size); + + return § +} + // Find a section by name and type. If it doesn't exist, create it. static Section *getSection( std::string const &name, @@ -993,3 +1021,36 @@ void sect_EndSection() { currentSection = nullptr; sym_ResetCurrentLabelScopes(); } + +std::string sect_PushSectionFragmentLiteral() { + static uint64_t nextFragmentLiteralID = 0; + + // Like `requireCodeSection` but fatal + if (!currentSection) + fatalerror("Cannot output fragment literals outside of a SECTION\n"); + if (!sect_HasData(currentSection->type)) + fatalerror( + "Section '%s' cannot contain fragment literals (not ROM0 or ROMX)\n", + currentSection->name.c_str() + ); + + if (currentLoadSection) + fatalerror("`LOAD` blocks cannot contain fragment literals\n"); + if (currentSection->modifier == SECTION_UNION) + fatalerror("`SECTION UNION` cannot contain fragment literals\n"); + + // A section containing a fragment literal has to become a fragment too + currentSection->modifier = SECTION_FRAGMENT; + + Section *parent = currentSection; + sect_PushSection(); // Resets `currentSection` + + Section *sect = createSectionFragmentLiteral(*parent); + + changeSection(); + curOffset = sect->size; + currentSection = sect; + + // Return a symbol ID to use for the address of this section fragment + return "$"s + std::to_string(nextFragmentLiteralID++); +} diff --git a/src/asm/symbol.cpp b/src/asm/symbol.cpp index 1ae86aba5..d350a2549 100644 --- a/src/asm/symbol.cpp +++ b/src/asm/symbol.cpp @@ -539,9 +539,7 @@ std::string sym_MakeAnonLabelName(uint32_t ofs, bool neg) { id = anonLabelID + ofs; } - std::string anon("!"); - anon += std::to_string(id); - return anon; + return "!"s + std::to_string(id); } void sym_Export(std::string const &symName) { diff --git a/test/asm/code-after-endm-endr-endc.err b/test/asm/code-after-endm-endr-endc.err index 0220c1a5a..a0ee347c1 100644 --- a/test/asm/code-after-endm-endr-endc.err +++ b/test/asm/code-after-endm-endr-endc.err @@ -1,15 +1,15 @@ error: code-after-endm-endr-endc.asm(6): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal error: code-after-endm-endr-endc.asm(7): Macro "mac" not defined error: code-after-endm-endr-endc.asm(12): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal error: code-after-endm-endr-endc.asm(17): syntax error, unexpected PRINTLN, expecting end of line error: code-after-endm-endr-endc.asm(19): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal error: code-after-endm-endr-endc.asm(23): syntax error, unexpected PRINTLN, expecting end of line error: code-after-endm-endr-endc.asm(25): - syntax error, unexpected PRINTLN, expecting end of line or end of buffer + syntax error, unexpected PRINTLN, expecting end of line or end of buffer or end of fragment literal error: Assembly aborted (7 errors)! diff --git a/test/asm/fragment-literal-in-load.asm b/test/asm/fragment-literal-in-load.asm new file mode 100644 index 000000000..87768688c --- /dev/null +++ b/test/asm/fragment-literal-in-load.asm @@ -0,0 +1,14 @@ +SECTION "OAMDMACode", ROM0 +OAMDMACode: +LOAD "hOAMDMA", HRAM +hOAMDMA:: + ldh [$ff46], a + ld a, 40 + jp [[ +: dec a + jr nz, :- + ret + ]] +.end +ENDL +OAMDMACodeEnd: diff --git a/test/asm/fragment-literal-in-load.err b/test/asm/fragment-literal-in-load.err new file mode 100644 index 000000000..c103d4b21 --- /dev/null +++ b/test/asm/fragment-literal-in-load.err @@ -0,0 +1,2 @@ +FATAL: fragment-literal-in-load.asm(7): + `LOAD` blocks cannot contain fragment literals diff --git a/test/asm/fragment-literal-in-ram.asm b/test/asm/fragment-literal-in-ram.asm new file mode 100644 index 000000000..4b36a16a9 --- /dev/null +++ b/test/asm/fragment-literal-in-ram.asm @@ -0,0 +1,9 @@ +SECTION "RAM", WRAM0 + +wFoo:: db +wBar:: ds 3 + println "ok" +wQux:: dw [[ + ds 4 + println "inline" +]] diff --git a/test/asm/fragment-literal-in-ram.err b/test/asm/fragment-literal-in-ram.err new file mode 100644 index 000000000..e4ddc2f10 --- /dev/null +++ b/test/asm/fragment-literal-in-ram.err @@ -0,0 +1,2 @@ +FATAL: fragment-literal-in-ram.asm(6): + Section 'RAM' cannot contain fragment literals (not ROM0 or ROMX) diff --git a/test/asm/fragment-literal-in-ram.out b/test/asm/fragment-literal-in-ram.out new file mode 100644 index 000000000..9766475a4 --- /dev/null +++ b/test/asm/fragment-literal-in-ram.out @@ -0,0 +1 @@ +ok diff --git a/test/asm/fragment-literal-in-union.asm b/test/asm/fragment-literal-in-union.asm new file mode 100644 index 000000000..9f2e75cf8 --- /dev/null +++ b/test/asm/fragment-literal-in-union.asm @@ -0,0 +1,5 @@ +SECTION UNION "U", ROM0 + db $11 + dw [[ db $22 ]] +SECTION UNION "U", ROM0 + db $33 diff --git a/test/asm/fragment-literal-in-union.err b/test/asm/fragment-literal-in-union.err new file mode 100644 index 000000000..c368c377a --- /dev/null +++ b/test/asm/fragment-literal-in-union.err @@ -0,0 +1,2 @@ +FATAL: fragment-literal-in-union.asm(3): + `SECTION UNION` cannot contain fragment literals diff --git a/test/asm/fragment-literals.asm b/test/asm/fragment-literals.asm new file mode 100644 index 000000000..c9ab16b37 --- /dev/null +++ b/test/asm/fragment-literals.asm @@ -0,0 +1,62 @@ +SECTION "1", ROM0[0] + +DEF VERSION EQU $11 +GetVersion:: + ld a, [ [[db VERSION]] ] + ret + +SECTION "2", ROM0, ALIGN[4] + +MACRO text + db \1, 0 +ENDM + +MACRO text_pointer + dw [[ + text \1 + ]] +ENDM + +GetText:: + ld hl, [[ + dw [[ db "Alpha", 0 ]] + dw [[ + text "Beta" + ]] + text_pointer "Gamma" + dw 0 + ]] + ld c, a + ld b, 0 + add hl, bc + add hl, bc + ld a, [hli] + ld h, [hl] + ld l, a + ret + +SECTION "C", ROM0 + +Foo:: + call [[ jp [[ jp [[ ret ]] ]] ]] + call [[ +Label:: + call GetVersion + DEF MYTEXT EQU 3 + ld a, MYTEXT + call GetText + ld b, h + ld c, l + ret + ]] + jp [[ +Bar: + inc hl +.loop + nop +: dec l + jr nz, :- + dec h + jr nz, .loop + ret + ]] diff --git a/test/asm/fragment-literals.out.bin b/test/asm/fragment-literals.out.bin new file mode 100644 index 0000000000000000000000000000000000000000..b39e217402583a7cc2dcf19ca2084467ae32cf32 GIT binary patch literal 89 zcmeyx!f;ZM0Ra?c82s57I61Y_@=vNTXffymr5tk#G7=e_QcDsU+!J$i6B*9hF`RW} kI2^!m*pcC|3&Y8?3=DS6X9XBsd`~Jf=qmhGRrq-l05akh!~g&Q literal 0 HcmV?d00001 diff --git a/test/asm/syntax-error-after-syntax-error.err b/test/asm/syntax-error-after-syntax-error.err index a41f723a7..2112ad096 100644 --- a/test/asm/syntax-error-after-syntax-error.err +++ b/test/asm/syntax-error-after-syntax-error.err @@ -7,5 +7,5 @@ error: syntax-error-after-syntax-error.asm(6): error: syntax-error-after-syntax-error.asm(9): syntax error, unexpected : error: syntax-error-after-syntax-error.asm(10): - syntax error, unexpected stop, expecting end of line or end of buffer or :: + syntax error, unexpected stop, expecting end of line or end of buffer or end of fragment literal or :: error: Assembly aborted (5 errors)!