15
15
16
16
use coresimd:: simd:: * ;
17
17
use coresimd:: simd_llvm:: * ;
18
+
19
+ use mem;
20
+
18
21
#[ cfg( test) ]
19
22
use stdsimd_test:: assert_instr;
20
23
@@ -30,10 +33,46 @@ pub type vector_unsigned_int = u32x4;
30
33
pub type vector_bool_int = m32x4 ;
31
34
pub type vector_float = f32x4 ;
32
35
36
+ #[ allow( improper_ctypes) ]
37
+ extern "C" {
38
+ #[ link_name = "llvm.ppc.altivec.vperm" ]
39
+ fn vperm ( a : i32x4 , b : i32x4 , c : u8x16 ) -> i32x4 ;
40
+ }
41
+
33
42
mod sealed {
34
43
35
44
use super :: * ;
36
45
46
+ pub trait VectorPerm {
47
+ unsafe fn vec_vperm ( self , b : Self , c : u8x16 ) -> Self ;
48
+ }
49
+
50
+ macro_rules! vector_perm {
51
+ { $impl: ident} => {
52
+ impl VectorPerm for $impl {
53
+ #[ inline]
54
+ #[ target_feature( enable = "altivec" ) ]
55
+ unsafe fn vec_vperm( self , b: Self , c: u8x16) -> Self {
56
+ mem:: transmute( vperm( mem:: transmute( self ) , mem:: transmute( b) , c) )
57
+ }
58
+ }
59
+ }
60
+ }
61
+
62
+ vector_perm ! { i8x16 }
63
+ vector_perm ! { u8x16 }
64
+ vector_perm ! { m8x16 }
65
+
66
+ vector_perm ! { i16x8 }
67
+ vector_perm ! { u16x8 }
68
+ vector_perm ! { m16x8 }
69
+
70
+ vector_perm ! { i32x4 }
71
+ vector_perm ! { u32x4 }
72
+ vector_perm ! { m32x4 }
73
+
74
+ vector_perm ! { f32x4 }
75
+
37
76
pub trait VectorAdd < Other > {
38
77
type Result ;
39
78
unsafe fn vec_add ( self , other : Other ) -> Self :: Result ;
@@ -330,6 +369,28 @@ where
330
369
a. vec_add ( b)
331
370
}
332
371
372
+ /// Vector permute.
373
+ #[ inline]
374
+ #[ target_feature( enable = "altivec" ) ]
375
+ pub unsafe fn vec_perm < T > ( a : T , b : T , c : vector_unsigned_char ) -> T
376
+ where
377
+ T : sealed:: VectorPerm ,
378
+ {
379
+
380
+ if cfg ! ( target_endian = "little" ) {
381
+ // vperm has big-endian bias
382
+ //
383
+ // Xor the mask and flip the arguments
384
+ let d = u8x16:: new ( 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 ,
385
+ 255 , 255 , 255 , 255 , 255 , 255 , 255 , 255 ) ;
386
+ let c = simd_xor ( c, d) ;
387
+
388
+ b. vec_vperm ( a, c)
389
+ } else {
390
+ a. vec_vperm ( b, c)
391
+ }
392
+ }
393
+
333
394
#[ cfg( test) ]
334
395
mod tests {
335
396
#[ cfg( target_arch = "powerpc" ) ]
@@ -341,6 +402,18 @@ mod tests {
341
402
use simd:: * ;
342
403
use stdsimd_test:: simd_test;
343
404
405
+ #[ simd_test( enable = "altivec" ) ]
406
+ unsafe fn vec_perm_u16x8 ( ) {
407
+ let a = u16x8:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
408
+ let b = u16x8:: new ( 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 ) ;
409
+
410
+ let c = u8x16:: new ( 0x00 , 0x01 , 0x10 , 0x11 , 0x02 , 0x03 , 0x12 , 0x13 ,
411
+ 0x04 , 0x05 , 0x14 , 0x15 , 0x06 , 0x07 , 0x16 , 0x17 ) ;
412
+ let d = u16x8:: new ( 0 , 10 , 1 , 11 , 2 , 12 , 3 , 13 ) ;
413
+
414
+ assert_eq ! ( d, vec_perm( a, b, c) ) ;
415
+ }
416
+
344
417
#[ simd_test( enable = "altivec" ) ]
345
418
unsafe fn vec_add_i32x4_i32x4 ( ) {
346
419
let x = i32x4:: new ( 1 , 2 , 3 , 4 ) ;
0 commit comments