From 4628e0aaf305b90956d5cfdda189ee0e760a6a55 Mon Sep 17 00:00:00 2001 From: smaillet <25911635+smaillet@users.noreply.github.com> Date: Tue, 6 May 2025 15:58:43 -0700 Subject: [PATCH] Updated to use UTF8 literals for attribute names - This means less overhead at runtime as the strings are already in the native UTF8 format. - Some other places where `LazyEncodedString` were already used were also updated to use a UTF8 literal. - A few doc updates. --- IgnoredWords.dic | 5 +++ .../CodeGenWithDebugInfo/CortexM3ABI.cs | 42 ++++++++++--------- src/Samples/CodeGenWithDebugInfo/Program.cs | 6 +-- src/Samples/CodeGenWithDebugInfo/X64ABI.cs | 30 ++++++------- .../Kaleidoscope/Chapter2/Kaleidoscope-ch2.md | 2 +- .../Kaleidoscope/Chapter3.5/CodeGenerator.cs | 1 - .../Chapter3.5/Kaleidoscope-ch3.5.md | 21 +++++----- .../Kaleidoscope/Chapter8/CodeGenerator.cs | 2 +- src/Samples/Kaleidoscope/IgnoredWords.dic | 14 +++++++ src/Samples/OrcV2VeryLazy/Program.cs | 10 ++--- .../LazyEncodedString.cs | 5 +++ 11 files changed, 83 insertions(+), 55 deletions(-) diff --git a/IgnoredWords.dic b/IgnoredWords.dic index 3f5eb797e..336e1044c 100644 --- a/IgnoredWords.dic +++ b/IgnoredWords.dic @@ -1,4 +1,5 @@ ABI +Accessor Addr alloca anonymize @@ -56,6 +57,7 @@ Identifier inline inlined Interop +jit Lexer LibLLVM Llilum @@ -99,10 +101,13 @@ Typedef uid unaryop undiscoverable +uniqued +uniqueing userdefinedop Users usings utils +variadic vcxproj versioned versioning diff --git a/src/Samples/CodeGenWithDebugInfo/CortexM3ABI.cs b/src/Samples/CodeGenWithDebugInfo/CortexM3ABI.cs index ea08dc038..27a5c4351 100644 --- a/src/Samples/CodeGenWithDebugInfo/CortexM3ABI.cs +++ b/src/Samples/CodeGenWithDebugInfo/CortexM3ABI.cs @@ -7,6 +7,7 @@ using System; using System.Collections.Immutable; +using Ubiquity.NET.InteropHelpers; using Ubiquity.NET.Llvm; using Ubiquity.NET.Llvm.DebugInfo; using Ubiquity.NET.Llvm.Types; @@ -72,8 +73,8 @@ public void AddAttributesForByValueStructure( Function function, DebugFunctionTy var layout = function.ParentModule.Layout; function.AddAttributes( FunctionAttributeIndex.Parameter0 + paramIndex - , function.Context.CreateAttribute( "byval", ptrType.ElementType) - , function.Context.CreateAttribute( "align", layout.AbiAlignmentOf( ptrType.ElementType! ) ) + , function.Context.CreateAttribute( "byval"u8, ptrType.ElementType) + , function.Context.CreateAttribute( "align"u8, layout.AbiAlignmentOf( ptrType.ElementType! ) ) ); } @@ -86,27 +87,28 @@ public void AddModuleFlags( Module module ) public ImmutableArray BuildTargetDependentFunctionAttributes( IContext ctx ) => [ - ctx.CreateAttribute( "correctly-rounded-divide-sqrt-fp-math", "false" ), - ctx.CreateAttribute( "disable-tail-calls", "false" ), - ctx.CreateAttribute( "less-precise-fpmad", "false" ), - ctx.CreateAttribute( "no-frame-pointer-elim", "true" ), - ctx.CreateAttribute( "no-frame-pointer-elim-non-leaf" ), - ctx.CreateAttribute( "no-infs-fp-math", "false" ), - ctx.CreateAttribute( "no-jump-tables", "false" ), - ctx.CreateAttribute( "no-nans-fp-math", "false" ), - ctx.CreateAttribute( "no-signed-zeros-fp-math", "false" ), - ctx.CreateAttribute( "no-trapping-math", "false" ), - ctx.CreateAttribute( "stack-protector-buffer-size", "8" ), - ctx.CreateAttribute( "target-cpu", Cpu ), - ctx.CreateAttribute( "target-features", Features ), - ctx.CreateAttribute( "unsafe-fp-math", "false" ), - ctx.CreateAttribute( "use-soft-float", "false" ) + ctx.CreateAttribute( "correctly-rounded-divide-sqrt-fp-math"u8, "false"u8 ), + ctx.CreateAttribute( "disable-tail-calls"u8, "false"u8 ), + ctx.CreateAttribute( "less-precise-fpmad"u8, "false"u8 ), + ctx.CreateAttribute( "no-frame-pointer-elim"u8, "true"u8 ), + ctx.CreateAttribute( "no-frame-pointer-elim-non-leaf"u8 ), + ctx.CreateAttribute( "no-infs-fp-math"u8, "false"u8 ), + ctx.CreateAttribute( "no-jump-tables"u8, "false"u8 ), + ctx.CreateAttribute( "no-nans-fp-math"u8, "false"u8 ), + ctx.CreateAttribute( "no-signed-zeros-fp-math"u8, "false"u8 ), + ctx.CreateAttribute( "no-trapping-math"u8, "false"u8 ), + ctx.CreateAttribute( "stack-protector-buffer-size"u8, "8"u8 ), + ctx.CreateAttribute( "target-cpu"u8, Cpu ), + ctx.CreateAttribute( "target-features"u8, Features ), + ctx.CreateAttribute( "unsafe-fp-math"u8, "false"u8 ), + ctx.CreateAttribute( "use-soft-float"u8, "false"u8 ) ]; private readonly ILibLlvm LlvmLib; - private const string Cpu = "cortex-m3"; - private const string Features = "+hwdiv,+strict-align,+thumb-mode"; - private const string TripleName = "thumbv7m-none--eabi"; + // Sadly, these can't be utf8 literals, but, they can be static readonly LazyEncodedString! + private static readonly LazyEncodedString Cpu = "cortex-m3"u8; + private static readonly LazyEncodedString Features = "+hwdiv,+strict-align,+thumb-mode"u8; + private static readonly LazyEncodedString TripleName = "thumbv7m-none--eabi"u8; } } diff --git a/src/Samples/CodeGenWithDebugInfo/Program.cs b/src/Samples/CodeGenWithDebugInfo/Program.cs index 4de7fad31..1ccfd375c 100644 --- a/src/Samples/CodeGenWithDebugInfo/Program.cs +++ b/src/Samples/CodeGenWithDebugInfo/Program.cs @@ -248,7 +248,7 @@ private static Function DeclareCopyFunc( ITargetABI abi , debugFlags: DebugInfoFlags.Prototyped , isOptimized: false ).Linkage( Linkage.Internal ) // static function - .AddAttributes( FunctionAttributeIndex.Function, "nounwind", "noinline", "optimizenone" ) + .AddAttributes( FunctionAttributeIndex.Function, "nounwind"u8, "noinline"u8, "optimizenone"u8 ) .AddAttributes( FunctionAttributeIndex.Function, abiAttributes ); Debug.Assert( !fooPtr.IsOpaque(), "Expected the debug info for a pointer was created with a valid ElementType"); @@ -301,7 +301,7 @@ private static void CreateCopyFunctionBody( ref readonly DIBuilder diBuilder .RegisterName( "pDst.addr" ) .SetAlignment( ptrAlign ); - bool hasParam0ByVal = copyFunc.FindAttribute(FunctionAttributeIndex.Parameter0, "byval") is not null; + bool hasParam0ByVal = copyFunc.FindAttribute(FunctionAttributeIndex.Parameter0, "byval"u8) is not null; if( hasParam0ByVal ) { diBuilder.InsertDeclare( copyFunc.Parameters[ 0 ] @@ -363,7 +363,7 @@ private static void CreateDoCopyFunctionBody( Module module // create instruction builder to build the body using var instBuilder = new InstructionBuilder( blk ); - bool hasParam0ByVal = doCopyFunc.FindAttribute(FunctionAttributeIndex.Parameter0, "byval") is not null; + bool hasParam0ByVal = doCopyFunc.FindAttribute(FunctionAttributeIndex.Parameter0, "byval"u8) is not null; if( !hasParam0ByVal ) { // create a temp local copy of the global structure diff --git a/src/Samples/CodeGenWithDebugInfo/X64ABI.cs b/src/Samples/CodeGenWithDebugInfo/X64ABI.cs index 425d8b8a3..8b1f91e85 100644 --- a/src/Samples/CodeGenWithDebugInfo/X64ABI.cs +++ b/src/Samples/CodeGenWithDebugInfo/X64ABI.cs @@ -7,6 +7,7 @@ using System; using System.Collections.Immutable; +using Ubiquity.NET.InteropHelpers; using Ubiquity.NET.Llvm; using Ubiquity.NET.Llvm.DebugInfo; using Ubiquity.NET.Llvm.Types; @@ -67,23 +68,24 @@ public void AddModuleFlags( Module module ) public ImmutableArray BuildTargetDependentFunctionAttributes( IContext ctx ) => [ - ctx.CreateAttribute( "disable-tail-calls", "false" ), - ctx.CreateAttribute( "less-precise-fpmad", "false" ), - ctx.CreateAttribute( "no-frame-pointer-elim", "false" ), - ctx.CreateAttribute( "no-infs-fp-math", "false" ), - ctx.CreateAttribute( "no-nans-fp-math", "false" ), - ctx.CreateAttribute( "stack-protector-buffer-size", "8" ), - ctx.CreateAttribute( "target-cpu", Cpu ), - ctx.CreateAttribute( "target-features", Features ), - ctx.CreateAttribute( "unsafe-fp-math", "false" ), - ctx.CreateAttribute( "use-soft-float", "false" ), - ctx.CreateAttribute( "uwtable", (ulong)UWTableKind.Async) + ctx.CreateAttribute( "disable-tail-calls"u8, "false"u8 ), + ctx.CreateAttribute( "less-precise-fpmad"u8, "false"u8 ), + ctx.CreateAttribute( "no-frame-pointer-elim"u8, "false"u8 ), + ctx.CreateAttribute( "no-infs-fp-math"u8, "false"u8 ), + ctx.CreateAttribute( "no-nans-fp-math"u8, "false"u8 ), + ctx.CreateAttribute( "stack-protector-buffer-size"u8, "8"u8 ), + ctx.CreateAttribute( "target-cpu"u8, Cpu ), + ctx.CreateAttribute( "target-features"u8, Features ), + ctx.CreateAttribute( "unsafe-fp-math"u8, "false"u8 ), + ctx.CreateAttribute( "use-soft-float"u8, "false"u8 ), + ctx.CreateAttribute( "uwtable"u8, (ulong)UWTableKind.Async) ]; private readonly ILibLlvm LlvmLib; - private const string Cpu = "x86-64"; - private const string Features = "+sse,+sse2"; - private const string TripleName = "x86_64-pc-windows-msvc18.0.0"; + // Sadly, these can't be utf8 literals, but, they can be static readonly LazyEncodedString! + private static readonly LazyEncodedString Cpu = "x86-64"u8; + private static readonly LazyEncodedString Features = "+sse,+sse2"u8; + private static readonly LazyEncodedString TripleName = "x86_64-pc-windows-msvc18.0.0"u8; } } diff --git a/src/Samples/Kaleidoscope/Chapter2/Kaleidoscope-ch2.md b/src/Samples/Kaleidoscope/Chapter2/Kaleidoscope-ch2.md index b54f4b787..ada11123a 100644 --- a/src/Samples/Kaleidoscope/Chapter2/Kaleidoscope-ch2.md +++ b/src/Samples/Kaleidoscope/Chapter2/Kaleidoscope-ch2.md @@ -20,7 +20,7 @@ to implement) ### Lexer symbols The Kaleidoscope lexer consists of several tokens and is defined in the -[Kaleidoscope.g4](https://github.com/UbiquityDotNET/Llvm.NET/blob/master/Samples/Kaleidoscope/Kaleidoscope.Parser/Kaleidoscope.g4) +[Kaleidoscope.g4](https://github.com/UbiquityDotNET/Llvm.NET/blob/develop/src/Samples/Kaleidoscope/Kaleidoscope.Grammar/ANTLR/Kaleidoscope.g4) grammar file. ``` antlr diff --git a/src/Samples/Kaleidoscope/Chapter3.5/CodeGenerator.cs b/src/Samples/Kaleidoscope/Chapter3.5/CodeGenerator.cs index dacf0c5c1..18cb7faae 100644 --- a/src/Samples/Kaleidoscope/Chapter3.5/CodeGenerator.cs +++ b/src/Samples/Kaleidoscope/Chapter3.5/CodeGenerator.cs @@ -211,7 +211,6 @@ private Function GetOrDeclareFunction(Prototype prototype) var llvmSignature = Context.GetFunctionType( returnType: Context.DoubleType, args: prototype.Parameters.Select( _ => Context.DoubleType ) ); var retVal = Module.CreateFunction( prototype.Name, llvmSignature ); - retVal.AddAttribute( FunctionAttributeIndex.Function, prototype.IsExtern ? "builtin" : "nobuiltin" ); int index = 0; foreach(var argId in prototype.Parameters) diff --git a/src/Samples/Kaleidoscope/Chapter3.5/Kaleidoscope-ch3.5.md b/src/Samples/Kaleidoscope/Chapter3.5/Kaleidoscope-ch3.5.md index 1204337ca..ff7571f4d 100644 --- a/src/Samples/Kaleidoscope/Chapter3.5/Kaleidoscope-ch3.5.md +++ b/src/Samples/Kaleidoscope/Chapter3.5/Kaleidoscope-ch3.5.md @@ -24,23 +24,24 @@ sample that actually uses the optimizations more is left as an exercise for the ### Initialization The code generation maintains state for the transformation as private members. To support optimization generally only requires a set of named passes and to call the method to run the passes on a function or -module. [Technically an overload provides the chance to set [PassBuilderOptions](xref:Ubiquity.NET.Llvm.PassBuilderOptions) but -this sample just uses the overload that applies defaults.] The new pass management system +module. [Technically an overload provides the chance to set [PassBuilderOptions](xref:Ubiquity.NET.Llvm.PassBuilderOptions) +but this sample just uses the overload that applies defaults.] The new pass management system uses the string names of passes instead of a distinct type and named methods for adding them etc... These Options are initialized in a private static member for the passes. [!code-csharp[Main](CodeGenerator.cs#PrivateMembers)] ### Special attributes for parsed functions +>[!WARNING] When performing optimizations with the new pass builder system the TargetLibraryInfo (Internal LLVM concept) is -used to determine what the "built-in" functions are. Unfortunately they leave little room for manipulating or -customizing this set (In C++, in LLVM-C there is NO support for this type at all!). Unfortunately that means that -if any function happens to have the same name as the TargetLibraryInfo for a given Triple then it will be optimized -AS a built-in function (even if not declared as one). This is an unfortunate state of affairs with the LLVM support -for C++ and highly problematic for `C` based bindings/projections like this library. Fortunately there is a scapegoat -for this. The function can include a `nobuiltin` attribute to prevent the optimizer from assuming calls to it are -one of the well known built-in functions. This is used for ALL methods that come from the AST, which is all functions -at this point in the language design. Thus `GetOrDeclareFunction` will add that attribute for any function creations. +used to determine what the "built-in" functions are. Unfortunately, they leave little room for manipulating or +customizing this set (In C++ there is some "wiggle room", in LLVM-C there is NO support for this type at all!). +Unfortunately, that means that if any function happens to have the same name as the TargetLibraryInfo for a given +Triple then it will be optimized AS a built-in function (even if not declared as one). This is an unfortunate state +of affairs with the LLVM support for C++ and highly problematic for `C` based bindings/projections like this library. +Fortunately there is a scapegoat for this. The function can include a `nobuiltin` attribute at the call site to prevent +the optimizer from assuming calls to it are one of the well known built-in functions. This isn't used for Kaleidoscope. +But does leave room for problems with names that match some arbitrary set of "built-in" symbols. [!code-csharp[Main](CodeGenerator.cs#GetOrDeclareFunction)] diff --git a/src/Samples/Kaleidoscope/Chapter8/CodeGenerator.cs b/src/Samples/Kaleidoscope/Chapter8/CodeGenerator.cs index 582ea03c0..66683f4ae 100644 --- a/src/Samples/Kaleidoscope/Chapter8/CodeGenerator.cs +++ b/src/Samples/Kaleidoscope/Chapter8/CodeGenerator.cs @@ -228,7 +228,7 @@ public void Dispose( ) if( definition.IsAnonymous ) { - function.AddAttribute( FunctionAttributeIndex.Function, "alwaysinline") + function.AddAttribute( FunctionAttributeIndex.Function, "alwaysinline"u8) .Linkage( Linkage.Private ); AnonymousFunctions.Add( function ); diff --git a/src/Samples/Kaleidoscope/IgnoredWords.dic b/src/Samples/Kaleidoscope/IgnoredWords.dic index a8674cee5..a06cb27eb 100644 --- a/src/Samples/Kaleidoscope/IgnoredWords.dic +++ b/src/Samples/Kaleidoscope/IgnoredWords.dic @@ -3,8 +3,13 @@ afterloop alloca alltmp antlr +ascii +binaryop +binop +blockdiag booltmp builtin +builtinop calltmp cmptmp conditionalexpression @@ -15,19 +20,24 @@ divtmp fadd fibi fmul +foo +func hacky ifcond ifcont ifresult imag impl +inline inlined +lexer loopcond mandel mandelconverger multmp nextvar nobuiltin +paren parsetree powtmp printchar @@ -37,4 +47,8 @@ printstar putchard ret subtmp +uid +unaryop uniqued +userdefinedop +xref diff --git a/src/Samples/OrcV2VeryLazy/Program.cs b/src/Samples/OrcV2VeryLazy/Program.cs index 7a3c8c564..e5a1ef865 100644 --- a/src/Samples/OrcV2VeryLazy/Program.cs +++ b/src/Samples/OrcV2VeryLazy/Program.cs @@ -34,7 +34,7 @@ private static void Main(/*string[] args*/) // occurs that prevents completion of the transfer. When transfer completes the // MU is marked as disposed but a call to Dispose() is a safe NOP. Thus, this handles // all conditions consistently - using var fooMu = new CustomMaterializationUnit("FooMU", Materialize, fooSym); + using var fooMu = new CustomMaterializationUnit("FooMU"u8, Materialize, fooSym); jit.MainLib.Define(fooMu); using var internedBarBodyName = jit.MangleAndIntern(BarBodySymbolName); @@ -42,14 +42,14 @@ private static void Main(/*string[] args*/) [internedBarBodyName] = flags, }.ToImmutable(); - using var barMu = new CustomMaterializationUnit("BarMU", Materialize, barSym); + using var barMu = new CustomMaterializationUnit("BarMU"u8, Materialize, barSym); jit.MainLib.Define(barMu); using var ism = new LocalIndirectStubsManager(triple); using var callThruMgr = jit.Session.CreateLazyCallThroughManager(triple); - using var internedFoo = jit.MangleAndIntern("foo"); - using var internedBar = jit.MangleAndIntern("bar"); + using var internedFoo = jit.MangleAndIntern("foo"u8); + using var internedBar = jit.MangleAndIntern("bar"u8); var reexports = new KvpArrayBuilder { [internedFoo] = new(internedFooBodyName, flags), @@ -59,7 +59,7 @@ private static void Main(/*string[] args*/) using var lazyReExports = new LazyReExportsMaterializationUnit(callThruMgr, ism, jit.MainLib, reexports); jit.MainLib.Define(lazyReExports); - UInt64 address = jit.Lookup("entry"); + UInt64 address = jit.Lookup("entry"u8); unsafe { diff --git a/src/Ubiquity.NET.InteropHelpers/LazyEncodedString.cs b/src/Ubiquity.NET.InteropHelpers/LazyEncodedString.cs index fad3c5131..0a594d549 100644 --- a/src/Ubiquity.NET.InteropHelpers/LazyEncodedString.cs +++ b/src/Ubiquity.NET.InteropHelpers/LazyEncodedString.cs @@ -182,6 +182,11 @@ public static implicit operator ReadOnlySpan(LazyEncodedString self) [SuppressMessage( "Usage", "CA2225:Operator overloads have named alternates", Justification = "It's a convenience wrapper around an existing constructor" )] public static implicit operator LazyEncodedString(string managed) => new(managed); + /// Convenient implicit conversion of a managed string into a Lazily encoded string + /// Span of UTF8 characters to wrap with lazy encoding support + [SuppressMessage( "Usage", "CA2225:Operator overloads have named alternates", Justification = "It's a convenience wrapper around an existing constructor" )] + public static implicit operator LazyEncodedString(ReadOnlySpan utf8Data) => new(utf8Data); + private readonly Encoding Encoding; private readonly Lazy ManagedString;