Skip to content

Commit 20ec176

Browse files
committed
Implement vec_perm
Bit and Little endian supported.
1 parent e715c93 commit 20ec176

File tree

1 file changed

+73
-0
lines changed

1 file changed

+73
-0
lines changed

coresimd/powerpc/altivec.rs

+73
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515

1616
use coresimd::simd::*;
1717
use coresimd::simd_llvm::*;
18+
19+
use mem;
20+
1821
#[cfg(test)]
1922
use stdsimd_test::assert_instr;
2023

@@ -355,10 +358,46 @@ impl_from_bits_!(
355358
vector_bool_int
356359
);
357360

361+
#[allow(improper_ctypes)]
362+
extern "C" {
363+
#[ link_name = "llvm.ppc.altivec.vperm" ]
364+
fn vperm(a: vector_signed_int, b: vector_signed_int, c: vector_unsigned_char) -> vector_signed_int;
365+
}
366+
358367
mod sealed {
359368

360369
use super::*;
361370

371+
pub trait VectorPerm {
372+
unsafe fn vec_vperm(self, b: Self, c: vector_unsigned_char) -> Self;
373+
}
374+
375+
macro_rules! vector_perm {
376+
{$impl: ident} => {
377+
impl VectorPerm for $impl {
378+
#[inline]
379+
#[target_feature(enable = "altivec")]
380+
unsafe fn vec_vperm(self, b: Self, c: vector_unsigned_char) -> Self {
381+
mem::transmute(vperm(mem::transmute(self), mem::transmute(b), c))
382+
}
383+
}
384+
}
385+
}
386+
387+
vector_perm!{ vector_signed_char }
388+
vector_perm!{ vector_unsigned_char }
389+
vector_perm!{ vector_bool_char }
390+
391+
vector_perm!{ vector_signed_short }
392+
vector_perm!{ vector_unsigned_short }
393+
vector_perm!{ vector_bool_short }
394+
395+
vector_perm!{ vector_signed_int }
396+
vector_perm!{ vector_unsigned_int }
397+
vector_perm!{ vector_bool_int }
398+
399+
vector_perm!{ vector_float }
400+
362401
pub trait VectorAdd<Other> {
363402
type Result;
364403
unsafe fn vec_add(self, other: Other) -> Self::Result;
@@ -655,6 +694,28 @@ where
655694
a.vec_add(b)
656695
}
657696

697+
/// Vector permute.
698+
#[inline]
699+
#[target_feature(enable = "altivec")]
700+
pub unsafe fn vec_perm<T>(a: T, b: T, c: vector_unsigned_char) -> T
701+
where
702+
T: sealed::VectorPerm,
703+
{
704+
705+
if cfg!(target_endian = "little") {
706+
// vperm has big-endian bias
707+
//
708+
// Xor the mask and flip the arguments
709+
let d = u8x16::new(255, 255, 255, 255, 255, 255, 255, 255,
710+
255, 255, 255, 255, 255, 255, 255, 255).into_bits();
711+
let c = simd_xor(c, d);
712+
713+
b.vec_vperm(a, c)
714+
} else {
715+
a.vec_vperm(b, c)
716+
}
717+
}
718+
658719
#[cfg(test)]
659720
mod tests {
660721
#[cfg(target_arch = "powerpc")]
@@ -666,6 +727,18 @@ mod tests {
666727
use simd::*;
667728
use stdsimd_test::simd_test;
668729

730+
#[simd_test(enable = "altivec")]
731+
unsafe fn vec_perm_u16x8() {
732+
let a: vector_signed_short = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7).into_bits();
733+
let b = u16x8::new(10, 11, 12, 13, 14, 15, 16, 17).into_bits();
734+
735+
let c = u8x16::new(0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
736+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17).into_bits();
737+
let d = u16x8::new(0, 10, 1, 11, 2, 12, 3, 13);
738+
739+
assert_eq!(d, vec_perm(a, b, c).into_bits());
740+
}
741+
669742
#[simd_test(enable = "altivec")]
670743
unsafe fn vec_add_i32x4_i32x4() {
671744
let x = i32x4::new(1, 2, 3, 4);

0 commit comments

Comments
 (0)