From 0662861866ca7b5439ea15a767972f656d3fb3b5 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 16 Sep 2024 21:53:10 -0700 Subject: [PATCH] kram - build - fix compile Xcode 16 and macOS 15. These introduce a half and half1/2/3/4/8/16 vector type. This conflicts with the half and half4 type, so namespace those to kram. --- kramv/KramRenderer.mm | 2 +- libkram/kram/KramConfig.h | 18 ++++++++++++------ libkram/kram/float4a.cpp | 18 +++++++++--------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 50895c0..b0c7d32 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -2320,7 +2320,7 @@ - (void)drawSample }; if (isDrawableBlit) { - half4 data16f; + kram::half4 data16f; [texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0]; data = toFloat4(data16f); } diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h index e4d3208..1d41e72 100644 --- a/libkram/kram/KramConfig.h +++ b/libkram/kram/KramConfig.h @@ -308,8 +308,9 @@ import std.regex; #endif // TODO: move half4 to it's own file, but always include it -// Apple's files don't have a half4 type. -namespace simd { +// x Apple's files don't have a half4 type. +// They do now as of macOS 15/Xcode 16. simd::half, 1/2/3/4/8/16 +namespace kram { // This has spotty support on Android. They left out hw support // for _Float16 on many of the devices. So there would need this fallback. @@ -360,7 +361,7 @@ class half4 { } }; -} // namespace simd +} // namespace kram #if !USE_EASTL @@ -451,11 +452,16 @@ inline float4 saturate(const float4& v) #endif -float4 toFloat4(const half4& vv); -half4 toHalf4(const float4& vv); - } // namespace simd + +namespace kram { + +simd::float4 toFloat4(const half4& vv); +half4 toHalf4(const simd::float4& vv); + +} // namespace kram + //--------------------------------------- // this just strips args diff --git a/libkram/kram/float4a.cpp b/libkram/kram/float4a.cpp index 44d5422..682ae24 100644 --- a/libkram/kram/float4a.cpp +++ b/libkram/kram/float4a.cpp @@ -6,21 +6,21 @@ // Bury these for now. They required -mf16c for Intel to be // defined, and that's kind of a pain right now. -namespace simd { +namespace kram { #if 0 // USE_FLOAT16 // This only works on Apple, and not on Android unless +fp16 arch there. // And this is likely not faster than the simd op that does this. -float4 toFloat4(const half4& vv) +simd::float4 toFloat4(const half4& vv) { // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/ // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors return float4m((float)vv.x, (float)vv.y, (float)vv.z, (float)vv.w); } -half4 toHalf4(const float4& vv) +half4 toHalf4(const simd::float4& vv) { return half4((_Float16)vv.x, (_Float16)vv.y, (_Float16)vv.z, (_Float16)vv.w); } @@ -30,7 +30,7 @@ half4 toHalf4(const float4& vv) // using casts instead of vv.reg, so these calls work with Apple SIMD too -float4 toFloat4(const half4& vv) +simd::float4 toFloat4(const half4& vv) { // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/ // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html @@ -44,10 +44,10 @@ float4 toFloat4(const half4& vv) reg16 = _mm_insert_epi16(reg16, vv[2], 2); reg16 = _mm_insert_epi16(reg16, vv[3], 3); - return float4(_mm_cvtph_ps(reg16)); + return simd::float4(_mm_cvtph_ps(reg16)); } -half4 toHalf4(const float4& vv) +half4 toHalf4(const simd::float4& vv) { __m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0); // 4xfp32-> 4xfp16, round to nearest-even @@ -67,11 +67,11 @@ half4 toHalf4(const float4& vv) // using casts intead of vv.reg, so these calls work with Apple SIMD too // Note: could just use the sse2 neon version -float4 toFloat4(const half4& vv) +simd::float4 toFloat4(const half4& vv) { - return float4(vcvt_f32_f16(*(const float16x4_t*)&vv)); + return simd::float4(vcvt_f32_f16(*(const float16x4_t*)&vv)); } -half4 toHalf4(const float4& vv) +half4 toHalf4(const simd::float4& vv) { return half4(vcvt_f16_f32(*(const float32x4_t*)&vv)); }