@@ -66,22 +66,23 @@ use core::arch::aarch64::vget_low_u32;
66
66
macro_rules! shuffle {
67
67
( $vec: expr , $index: expr) => {
68
68
unsafe {
69
- let v_n: [ u32 ; 8 ] = [
69
+ let v_n: [ u32 ; 8 ] = [
70
70
$vec. extract:: <0 >( ) ,
71
71
$vec. extract:: <1 >( ) ,
72
72
$vec. extract:: <2 >( ) ,
73
73
$vec. extract:: <3 >( ) ,
74
74
$vec. extract:: <4 >( ) ,
75
75
$vec. extract:: <5 >( ) ,
76
76
$vec. extract:: <6 >( ) ,
77
- $vec. extract:: <7 >( )
78
- ] ;
77
+ $vec. extract:: <7 >( ) ,
78
+ ] ;
79
+ #[ rustfmt:: skip]
79
80
u32x4x2:: new(
80
81
core:: mem:: transmute:: <[ u32 ; 4 ] , u32x4>(
81
82
[ v_n[ $index[ 0 ] ] , v_n[ $index[ 1 ] ] , v_n[ $index[ 2 ] ] , v_n[ $index[ 3 ] ] ] ) ,
82
83
core:: mem:: transmute:: <[ u32 ; 4 ] , u32x4>(
83
84
[ v_n[ $index[ 4 ] ] , v_n[ $index[ 5 ] ] , v_n[ $index[ 6 ] ] , v_n[ $index[ 7 ] ] ] )
84
- )
85
+ )
85
86
}
86
87
} ;
87
88
}
@@ -90,18 +91,22 @@ macro_rules! shuffle {
90
91
macro_rules! blend {
91
92
( $vec0: expr, $vec1: expr, $index: expr) => {
92
93
unsafe {
93
- let v_n: [ u32 ; 8 ] = [
94
+ let v_n: [ u32 ; 8 ] = [
94
95
$vec0. extract:: <0 >( ) ,
95
96
$vec0. extract:: <1 >( ) ,
96
97
$vec0. extract:: <2 >( ) ,
97
98
$vec0. extract:: <3 >( ) ,
98
99
$vec1. extract:: <0 >( ) ,
99
100
$vec1. extract:: <1 >( ) ,
100
101
$vec1. extract:: <2 >( ) ,
101
- $vec1. extract:: <3 >( )
102
- ] ;
103
- core:: mem:: transmute:: <[ u32 ; 4 ] , u32x4>(
104
- [ v_n[ $index[ 0 ] ] , v_n[ $index[ 1 ] ] , v_n[ $index[ 2 ] ] , v_n[ $index[ 3 ] ] ] )
102
+ $vec1. extract:: <3 >( ) ,
103
+ ] ;
104
+ core:: mem:: transmute:: <[ u32 ; 4 ] , u32x4>( [
105
+ v_n[ $index[ 0 ] ] ,
106
+ v_n[ $index[ 1 ] ] ,
107
+ v_n[ $index[ 2 ] ] ,
108
+ v_n[ $index[ 3 ] ] ,
109
+ ] )
105
110
}
106
111
} ;
107
112
}
@@ -118,10 +123,10 @@ fn unpack_pair(src: u32x4x2) -> (u32x2x2, u32x2x2) {
118
123
let b0: u32x2 ;
119
124
let b1: u32x2 ;
120
125
unsafe {
121
- a0 = vget_low_u32 ( src. 0 . 0 ) . into ( ) ;
122
- a1 = vget_low_u32 ( src. 0 . 1 ) . into ( ) ;
123
- b0 = vget_high_u32 ( src. 0 . 0 ) . into ( ) ;
124
- b1 = vget_high_u32 ( src. 0 . 1 ) . into ( ) ;
126
+ a0 = vget_low_u32 ( src. 0 . 0 ) . into ( ) ;
127
+ a1 = vget_low_u32 ( src. 0 . 1 ) . into ( ) ;
128
+ b0 = vget_high_u32 ( src. 0 . 0 ) . into ( ) ;
129
+ b1 = vget_high_u32 ( src. 0 . 1 ) . into ( ) ;
125
130
}
126
131
return ( u32x2x2:: new ( a0, a1) , u32x2x2:: new ( b0, b1) ) ;
127
132
}
@@ -193,7 +198,7 @@ impl ConditionallySelectable for FieldElement2625x4 {
193
198
a. 0 [ 1 ] ^ ( mask_vec & ( a. 0 [ 1 ] ^ b. 0 [ 1 ] ) ) ,
194
199
a. 0 [ 2 ] ^ ( mask_vec & ( a. 0 [ 2 ] ^ b. 0 [ 2 ] ) ) ,
195
200
a. 0 [ 3 ] ^ ( mask_vec & ( a. 0 [ 3 ] ^ b. 0 [ 3 ] ) ) ,
196
- a. 0 [ 4 ] ^ ( mask_vec & ( a. 0 [ 4 ] ^ b. 0 [ 4 ] ) )
201
+ a. 0 [ 4 ] ^ ( mask_vec & ( a. 0 [ 4 ] ^ b. 0 [ 4 ] ) ) ,
197
202
] )
198
203
}
199
204
@@ -266,7 +271,6 @@ impl FieldElement2625x4 {
266
271
self . shuffle ( Shuffle :: BACD )
267
272
}
268
273
269
-
270
274
// Can probably be sped up using multiple vset/vget instead of table
271
275
#[ inline]
272
276
pub fn blend ( & self , other : FieldElement2625x4 , control : Lanes ) -> FieldElement2625x4 {
@@ -326,7 +330,7 @@ impl FieldElement2625x4 {
326
330
327
331
buf[ i] = u32x4x2:: new (
328
332
u32x4:: new ( a_2i, b_2i, a_2i_1, b_2i_1) ,
329
- u32x4:: new ( c_2i, d_2i, c_2i_1, d_2i_1)
333
+ u32x4:: new ( c_2i, d_2i, c_2i_1, d_2i_1) ,
330
334
) ;
331
335
}
332
336
return FieldElement2625x4 ( buf) . reduce ( ) ;
@@ -368,20 +372,12 @@ impl FieldElement2625x4 {
368
372
use core:: arch:: aarch64:: vqshlq_u32;
369
373
370
374
let c: u32x4x2 = u32x4x2:: new (
371
- vqshlq_u32 ( v. 0 . 0 , shifts. 0 . into ( ) ) . into ( ) ,
372
- vqshlq_u32 ( v. 0 . 1 , shifts. 1 . into ( ) ) . into ( ) ,
375
+ vqshlq_u32 ( v. 0 . 0 , shifts. 0 . into ( ) ) . into ( ) ,
376
+ vqshlq_u32 ( v. 0 . 1 , shifts. 1 . into ( ) ) . into ( ) ,
373
377
) ;
374
378
u32x4x2:: new (
375
- vcombine_u32 (
376
- vget_high_u32 ( c. 0 . 0 ) ,
377
- vget_low_u32 ( c. 0 . 0 ) ,
378
- )
379
- . into ( ) ,
380
- vcombine_u32 (
381
- vget_high_u32 ( c. 0 . 1 ) ,
382
- vget_low_u32 ( c. 0 . 1 ) ,
383
- )
384
- . into ( ) ,
379
+ vcombine_u32 ( vget_high_u32 ( c. 0 . 0 ) , vget_low_u32 ( c. 0 . 0 ) ) . into ( ) ,
380
+ vcombine_u32 ( vget_high_u32 ( c. 0 . 1 ) , vget_low_u32 ( c. 0 . 1 ) ) . into ( ) ,
385
381
)
386
382
}
387
383
} ;
@@ -390,16 +386,8 @@ impl FieldElement2625x4 {
390
386
unsafe {
391
387
use core:: arch:: aarch64:: vcombine_u32;
392
388
u32x4x2:: new (
393
- vcombine_u32 (
394
- vget_low_u32 ( v_lo. 0 . 0 ) ,
395
- vget_high_u32 ( v_hi. 0 . 0 ) ,
396
- )
397
- . into ( ) ,
398
- vcombine_u32 (
399
- vget_low_u32 ( v_lo. 0 . 1 ) ,
400
- vget_high_u32 ( v_hi. 0 . 1 ) ,
401
- )
402
- . into ( ) ,
389
+ vcombine_u32 ( vget_low_u32 ( v_lo. 0 . 0 ) , vget_high_u32 ( v_hi. 0 . 0 ) ) . into ( ) ,
390
+ vcombine_u32 ( vget_low_u32 ( v_lo. 0 . 1 ) , vget_high_u32 ( v_hi. 0 . 1 ) ) . into ( ) ,
403
391
)
404
392
}
405
393
} ;
@@ -874,5 +862,3 @@ mod test {
874
862
assert_eq ! ( x3, splits[ 3 ] ) ;
875
863
}
876
864
}
877
-
878
-
0 commit comments