Skip to content

Commit 47b90c2

Browse files
committed
Implement vec_perm
Bit and Little endian supported.
1 parent b8b1c5f commit 47b90c2

File tree

1 file changed

+73
-0
lines changed

1 file changed

+73
-0
lines changed

coresimd/powerpc/altivec.rs

+73
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515

1616
use coresimd::simd::*;
1717
use coresimd::simd_llvm::*;
18+
19+
use mem;
20+
1821
#[cfg(test)]
1922
use stdsimd_test::assert_instr;
2023

@@ -30,10 +33,46 @@ pub type vector_unsigned_int = u32x4;
3033
pub type vector_bool_int = m32x4;
3134
pub type vector_float = f32x4;
3235

36+
#[allow(improper_ctypes)]
37+
extern "C" {
38+
#[ link_name = "llvm.ppc.altivec.vperm" ]
39+
fn vperm(a: i32x4, b: i32x4, c: u8x16) -> i32x4;
40+
}
41+
3342
mod sealed {
3443

3544
use super::*;
3645

46+
pub trait VectorPerm {
47+
unsafe fn vec_vperm(self, b: Self, c: u8x16) -> Self;
48+
}
49+
50+
macro_rules! vector_perm {
51+
{$impl: ident} => {
52+
impl VectorPerm for $impl {
53+
#[inline]
54+
#[target_feature(enable = "altivec")]
55+
unsafe fn vec_vperm(self, b: Self, c: u8x16) -> Self {
56+
mem::transmute(vperm(mem::transmute(self), mem::transmute(b), c))
57+
}
58+
}
59+
}
60+
}
61+
62+
vector_perm!{ i8x16 }
63+
vector_perm!{ u8x16 }
64+
vector_perm!{ m8x16 }
65+
66+
vector_perm!{ i16x8 }
67+
vector_perm!{ u16x8 }
68+
vector_perm!{ m16x8 }
69+
70+
vector_perm!{ i32x4 }
71+
vector_perm!{ u32x4 }
72+
vector_perm!{ m32x4 }
73+
74+
vector_perm!{ f32x4 }
75+
3776
pub trait VectorAdd<Other> {
3877
type Result;
3978
unsafe fn vec_add(self, other: Other) -> Self::Result;
@@ -330,6 +369,28 @@ where
330369
a.vec_add(b)
331370
}
332371

372+
/// Vector permute.
373+
#[inline]
374+
#[target_feature(enable = "altivec")]
375+
pub unsafe fn vec_perm<T>(a: T, b: T, c: vector_unsigned_char) -> T
376+
where
377+
T: sealed::VectorPerm,
378+
{
379+
380+
if cfg!(target_endian = "little") {
381+
// vperm has big-endian bias
382+
//
383+
// Xor the mask and flip the arguments
384+
let d = u8x16::new(255, 255, 255, 255, 255, 255, 255, 255,
385+
255, 255, 255, 255, 255, 255, 255, 255);
386+
let c = simd_xor(c, d);
387+
388+
b.vec_vperm(a, c)
389+
} else {
390+
a.vec_vperm(b, c)
391+
}
392+
}
393+
333394
#[cfg(test)]
334395
mod tests {
335396
#[cfg(target_arch = "powerpc")]
@@ -341,6 +402,18 @@ mod tests {
341402
use simd::*;
342403
use stdsimd_test::simd_test;
343404

405+
#[simd_test(enable = "altivec")]
406+
unsafe fn vec_perm_u16x8() {
407+
let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
408+
let b = u16x8::new(10, 11, 12, 13, 14, 15, 16, 17);
409+
410+
let c = u8x16::new(0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
411+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17);
412+
let d = u16x8::new(0, 10, 1, 11, 2, 12, 3, 13);
413+
414+
assert_eq!(d, vec_perm(a, b, c));
415+
}
416+
344417
#[simd_test(enable = "altivec")]
345418
unsafe fn vec_add_i32x4_i32x4() {
346419
let x = i32x4::new(1, 2, 3, 4);

0 commit comments

Comments
 (0)