From 246f6df075735a5b86ab56d3976a1b01e0f16993 Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Thu, 24 Apr 2025 17:11:25 +0100 Subject: [PATCH 1/2] tweaks --- backend/x86_gas.ml | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/backend/x86_gas.ml b/backend/x86_gas.ml index bf8ff37631e..9ce9f64991b 100644 --- a/backend/x86_gas.ml +++ b/backend/x86_gas.ml @@ -269,22 +269,25 @@ let print_line b = function match is_solaris system with | true -> buf_bytes_directive b ".byte" s | false -> - (* Very long lines can cause gas to be extremely slow so split up large - string literals. It turns out that gas reads files in 32kb chunks so - splitting the string into blocks of 25k characters should be close to - the sweet spot even with a lot of escapes. *) - let chunk_size = 25000 in - let rec chunk i = - if String.length s - i > chunk_size - then ( - bprintf b "\t.ascii\t\"%s\"\n" - (string_of_substring_literal i chunk_size s); - chunk (i + chunk_size)) - else i - in - let i = chunk 0 in - bprintf b "\t.ascii\t\"%s\"" - (string_of_substring_literal i (String.length s - i) s)) + if String.length s = 0 + then () + else + (* Very long lines can cause gas to be extremely slow so split up large + string literals. It turns out that gas reads files in 32kb chunks so + splitting the string into blocks of 25k characters should be close to + the sweet spot even with a lot of escapes. *) + let chunk_size = 25000 in + let rec chunk i = + if String.length s - i > chunk_size + then ( + bprintf b "\t.ascii\t\"%s\"\n" + (string_of_substring_literal i chunk_size s); + chunk (i + chunk_size)) + else i + in + let i = chunk 0 in + bprintf b "\t.ascii\t\"%s\"" + (string_of_substring_literal i (String.length s - i) s)) | Comment s -> bprintf b "\t\t\t\t/* %s */" s | Global s -> bprintf b "\t.globl\t%s" s | Protected s -> bprintf b "\t.protected\t%s" s @@ -338,7 +341,7 @@ let print_line b = function | Size (s, c) -> bprintf b "\t.size %s,%a" s cst c | Type (s, typ) -> bprintf b "\t.type %s,%s" s typ | Reloc { offset; name; expr } -> - bprintf b "\t.reloc %a,%s,%a" cst offset + bprintf b "\t.reloc\t%a, %s, %a" cst offset (reloc_type_to_string name) cst expr (* masm only *) From 45cda5dd1ac8ddc993b64b70f39b019d80b3f3fa Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Thu, 24 Apr 2025 17:37:12 +0100 Subject: [PATCH 2/2] copy over force digits change --- backend/asm_targets/asm_directives_new.ml | 88 ++++++++++++++++------- 1 file changed, 62 insertions(+), 26 deletions(-) diff --git a/backend/asm_targets/asm_directives_new.ml b/backend/asm_targets/asm_directives_new.ml index 1ac0c98d445..1d0c2ffd7de 100644 --- a/backend/asm_targets/asm_directives_new.ml +++ b/backend/asm_targets/asm_directives_new.ml @@ -86,14 +86,28 @@ module Directive = struct | Add of t * t | Sub of t * t - let rec print buf t = + (* CR sspies: The printing change here is needed for the Arm backend. It + currently uses this printer and hence, without the change, prints always + digits. It does not affect x86 in this version, since that one is printed + via the print_gas/print_masm in this code base. *) + let rec print ~force_digits buf t = match t with | (Named_thing _ | Signed_int _ | Unsigned_int _ | This) as c -> - print_subterm buf c - | Add (c1, c2) -> bprintf buf "%a + %a" print_subterm c1 print_subterm c2 - | Sub (c1, c2) -> bprintf buf "%a - %a" print_subterm c1 print_subterm c2 + print_subterm ~force_digits buf c + | Add (c1, c2) -> + bprintf buf "%a + %a" + (print_subterm ~force_digits) + c1 + (print_subterm ~force_digits) + c2 + | Sub (c1, c2) -> + bprintf buf "%a - %a" + (print_subterm ~force_digits) + c1 + (print_subterm ~force_digits) + c2 - and print_subterm buf t = + and print_subterm ~force_digits buf t = match t with | This -> ( match TS.assembler () with @@ -101,25 +115,34 @@ module Directive = struct | MASM -> Buffer.add_string buf "THIS BYTE") | Named_thing name -> Buffer.add_string buf name | Signed_int n -> ( - match TS.assembler () with - (* We use %Ld and not %Lx on Unix-like platforms to ensure that - ".sleb128" directives do not end up with hex arguments (since this - denotes a variable-length encoding it would not be clear where the - sign bit is). *) - | MacOS | GAS_like -> bprintf buf "%Ld" n - | MASM -> - if Int64.compare n 0x8000_0000L >= 0 - && Int64.compare n 0x7fff_ffffL <= 0 + match TS.assembler (), force_digits with + | _, true -> Buffer.add_string buf (Int64.to_string n) + | MASM, _ -> + if Int64.compare n 0x7FFF_FFFFL <= 0 + && Int64.compare n (-0x8000_0000L) >= 0 + then Buffer.add_string buf (Int64.to_string n) + else bprintf buf "0%LxH" n + | _, false -> + if Int64.compare n 0x7FFF_FFFFL <= 0 + && Int64.compare n (-0x8000_0000L) >= 0 then Buffer.add_string buf (Int64.to_string n) - else bprintf buf "0%LxH" n) + else bprintf buf "0x%Lx" n) | Unsigned_int n -> (* We can use the printer for [Signed_int] since we always print as an unsigned hex representation. *) - print_subterm buf (Signed_int (Uint64.to_int64 n)) + print_subterm ~force_digits buf (Signed_int (Uint64.to_int64 n)) | Add (c1, c2) -> - bprintf buf "(%a + %a)" print_subterm c1 print_subterm c2 + bprintf buf "(%a + %a)" + (print_subterm ~force_digits) + c1 + (print_subterm ~force_digits) + c2 | Sub (c1, c2) -> - bprintf buf "(%a - %a)" print_subterm c1 print_subterm c2 + bprintf buf "(%a - %a)" + (print_subterm ~force_digits) + c1 + (print_subterm ~force_digits) + c2 end module Constant_with_width = struct @@ -323,7 +346,8 @@ module Directive = struct | Sixty_four -> "8byte" in let comment = gas_comment_opt comment in - bprintf buf "\t.%s\t%a%s" directive Constant.print + bprintf buf "\t.%s\t%a%s" directive + (Constant.print ~force_digits:false) (Constant_with_width.constant constant) comment | Bytes { str; comment } -> @@ -377,10 +401,13 @@ module Directive = struct bprintf buf "\t.loc\t%d\t%d%a%a" file_num line print_col col print_discriminator discriminator | Private_extern s -> bprintf buf "\t.private_extern %s" s - | Size (s, c) -> bprintf buf "\t.size %s,%a" s Constant.print c + | Size (s, c) -> + bprintf buf "\t.size %s,%a" s (Constant.print ~force_digits:false) c | Sleb128 { constant; comment } -> let comment = gas_comment_opt comment in - bprintf buf "\t.sleb128\t%a%s" Constant.print constant comment + bprintf buf "\t.sleb128\t%a%s" + (Constant.print ~force_digits:false) + constant comment | Type (s, typ) -> let typ = symbol_type_to_string typ in (* CR sspies: Technically, ",STT_OBJECT" violates the assembler syntax @@ -390,10 +417,15 @@ module Directive = struct bprintf buf "\t.type %s,%s" s typ | Uleb128 { constant; comment } -> let comment = gas_comment_opt comment in - bprintf buf "\t.uleb128\t%a%s" Constant.print constant comment + bprintf buf "\t.uleb128\t%a%s" + (Constant.print ~force_digits:true) + constant comment | Direct_assignment (var, const) -> ( match TS.assembler () with - | MacOS -> bprintf buf "\t.set %s, %a" var Constant.print const + | MacOS -> + bprintf buf "\t.set %s, %a" var + (Constant.print ~force_digits:true) + const | _ -> Misc.fatal_error "Cannot emit [Direct_assignment] except on macOS-like assemblers") @@ -403,9 +435,12 @@ module Directive = struct (* masm only *) | External _ -> assert false | Reloc { offset; name; expr } -> - bprintf buf "\t.reloc\t%a, %s, %a" Constant.print offset + bprintf buf "\t.reloc\t%a, %s, %a" + (Constant.print ~force_digits:false) + offset (reloc_type_to_string name) - Constant.print expr + (Constant.print ~force_digits:false) + expr let print_masm buf t = let unsupported name = @@ -437,7 +472,8 @@ module Directive = struct | Sixty_four -> "QWORD" in let comment = masm_comment_opt comment in - bprintf buf "\t%s\t%a%s" directive Constant.print + bprintf buf "\t%s\t%a%s" directive + (Constant.print ~force_digits:false) (Constant_with_width.constant constant) comment | Global s -> bprintf buf "\tPUBLIC\t%s" s