Skip to content

Commit

Permalink
kram - build - fix compile Xcode 16 and macOS 15.
Browse files Browse the repository at this point in the history
These introduce a half and half1/2/3/4/8/16 vector type.  This conflicts with the half and half4 type, so namespace those to kram.
  • Loading branch information
alecazam committed Sep 17, 2024
1 parent 7b8e6d2 commit 0662861
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 16 deletions.
2 changes: 1 addition & 1 deletion kramv/KramRenderer.mm
Original file line number Diff line number Diff line change
Expand Up @@ -2320,7 +2320,7 @@ - (void)drawSample
};

if (isDrawableBlit) {
half4 data16f;
kram::half4 data16f;
[texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0];
data = toFloat4(data16f);
}
Expand Down
18 changes: 12 additions & 6 deletions libkram/kram/KramConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,9 @@ import std.regex;
#endif

// TODO: move half4 to it's own file, but always include it
// Apple's files don't have a half4 type.
namespace simd {
// x Apple's files don't have a half4 type.
// They do now as of macOS 15/Xcode 16. simd::half, 1/2/3/4/8/16
namespace kram {

// This has spotty support on Android. They left out hw support
// for _Float16 on many of the devices. So there would need this fallback.
Expand Down Expand Up @@ -360,7 +361,7 @@ class half4 {
}
};

} // namespace simd
} // namespace kram

#if !USE_EASTL

Expand Down Expand Up @@ -451,11 +452,16 @@ inline float4 saturate(const float4& v)

#endif

float4 toFloat4(const half4& vv);
half4 toHalf4(const float4& vv);

} // namespace simd


namespace kram {

simd::float4 toFloat4(const half4& vv);
half4 toHalf4(const simd::float4& vv);

} // namespace kram

//---------------------------------------

// this just strips args
Expand Down
18 changes: 9 additions & 9 deletions libkram/kram/float4a.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@

// Bury these for now. They required -mf16c for Intel to be
// defined, and that's kind of a pain right now.
namespace simd {
namespace kram {


#if 0 // USE_FLOAT16

// This only works on Apple, and not on Android unless +fp16 arch there.
// And this is likely not faster than the simd op that does this.
float4 toFloat4(const half4& vv)
simd::float4 toFloat4(const half4& vv)
{
// https://patchwork.ozlabs.org/project/gcc/patch/[email protected]/
// https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
// https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors
return float4m((float)vv.x, (float)vv.y, (float)vv.z, (float)vv.w);
}
half4 toHalf4(const float4& vv)
half4 toHalf4(const simd::float4& vv)
{
return half4((_Float16)vv.x, (_Float16)vv.y, (_Float16)vv.z, (_Float16)vv.w);
}
Expand All @@ -30,7 +30,7 @@ half4 toHalf4(const float4& vv)

// using casts instead of vv.reg, so these calls work with Apple SIMD too

float4 toFloat4(const half4& vv)
simd::float4 toFloat4(const half4& vv)
{
// https://patchwork.ozlabs.org/project/gcc/patch/[email protected]/
// https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html
Expand All @@ -44,10 +44,10 @@ float4 toFloat4(const half4& vv)
reg16 = _mm_insert_epi16(reg16, vv[2], 2);
reg16 = _mm_insert_epi16(reg16, vv[3], 3);

return float4(_mm_cvtph_ps(reg16));
return simd::float4(_mm_cvtph_ps(reg16));
}

half4 toHalf4(const float4& vv)
half4 toHalf4(const simd::float4& vv)
{
__m128i reg16 = _mm_cvtps_ph(*(const __m128*)&vv, 0); // 4xfp32-> 4xfp16, round to nearest-even

Expand All @@ -67,11 +67,11 @@ half4 toHalf4(const float4& vv)
// using casts intead of vv.reg, so these calls work with Apple SIMD too
// Note: could just use the sse2 neon version

float4 toFloat4(const half4& vv)
simd::float4 toFloat4(const half4& vv)
{
return float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
return simd::float4(vcvt_f32_f16(*(const float16x4_t*)&vv));
}
half4 toHalf4(const float4& vv)
half4 toHalf4(const simd::float4& vv)
{
return half4(vcvt_f16_f32(*(const float32x4_t*)&vv));
}
Expand Down

0 comments on commit 0662861

Please sign in to comment.