@@ -66,21 +66,29 @@ use core::arch::aarch64::vget_low_u32;
66
66
macro_rules! shuffle {
67
67
( $vec: expr , $index: expr) => {
68
68
unsafe {
69
- let v_n: [ u32 ; 8 ] = [
69
+ let v_n: [ u32 ; 8 ] = [
70
70
$vec. extract:: <0 >( ) ,
71
71
$vec. extract:: <1 >( ) ,
72
72
$vec. extract:: <2 >( ) ,
73
73
$vec. extract:: <3 >( ) ,
74
74
$vec. extract:: <4 >( ) ,
75
75
$vec. extract:: <5 >( ) ,
76
76
$vec. extract:: <6 >( ) ,
77
- $vec. extract:: <7 >( )
78
- ] ;
77
+ $vec. extract:: <7 >( ) ,
78
+ ] ;
79
79
u32x4x2:: new(
80
- core:: mem:: transmute:: <[ u32 ; 4 ] , u32x4>(
81
- [ v_n[ $index[ 0 ] ] , v_n[ $index[ 1 ] ] , v_n[ $index[ 2 ] ] , v_n[ $index[ 3 ] ] ] ) ,
82
- core:: mem:: transmute:: <[ u32 ; 4 ] , u32x4>(
83
- [ v_n[ $index[ 4 ] ] , v_n[ $index[ 5 ] ] , v_n[ $index[ 6 ] ] , v_n[ $index[ 7 ] ] ] )
80
+ core:: mem:: transmute:: <[ u32 ; 4 ] , u32x4>( [
81
+ v_n[ $index[ 0 ] ] ,
82
+ v_n[ $index[ 1 ] ] ,
83
+ v_n[ $index[ 2 ] ] ,
84
+ v_n[ $index[ 3 ] ] ,
85
+ ] ) ,
86
+ core:: mem:: transmute:: <[ u32 ; 4 ] , u32x4>( [
87
+ v_n[ $index[ 4 ] ] ,
88
+ v_n[ $index[ 5 ] ] ,
89
+ v_n[ $index[ 6 ] ] ,
90
+ v_n[ $index[ 7 ] ] ,
91
+ ] ) ,
84
92
)
85
93
}
86
94
} ;
@@ -90,18 +98,22 @@ macro_rules! shuffle {
90
98
macro_rules! blend {
91
99
( $vec0: expr, $vec1: expr, $index: expr) => {
92
100
unsafe {
93
- let v_n: [ u32 ; 8 ] = [
101
+ let v_n: [ u32 ; 8 ] = [
94
102
$vec0. extract:: <0 >( ) ,
95
103
$vec0. extract:: <1 >( ) ,
96
104
$vec0. extract:: <2 >( ) ,
97
105
$vec0. extract:: <3 >( ) ,
98
106
$vec1. extract:: <0 >( ) ,
99
107
$vec1. extract:: <1 >( ) ,
100
108
$vec1. extract:: <2 >( ) ,
101
- $vec1. extract:: <3 >( )
102
- ] ;
103
- core:: mem:: transmute:: <[ u32 ; 4 ] , u32x4>(
104
- [ v_n[ $index[ 0 ] ] , v_n[ $index[ 1 ] ] , v_n[ $index[ 2 ] ] , v_n[ $index[ 3 ] ] ] )
109
+ $vec1. extract:: <3 >( ) ,
110
+ ] ;
111
+ core:: mem:: transmute:: <[ u32 ; 4 ] , u32x4>( [
112
+ v_n[ $index[ 0 ] ] ,
113
+ v_n[ $index[ 1 ] ] ,
114
+ v_n[ $index[ 2 ] ] ,
115
+ v_n[ $index[ 3 ] ] ,
116
+ ] )
105
117
}
106
118
} ;
107
119
}
@@ -118,10 +130,10 @@ fn unpack_pair(src: u32x4x2) -> (u32x2x2, u32x2x2) {
118
130
let b0: u32x2 ;
119
131
let b1: u32x2 ;
120
132
unsafe {
121
- a0 = vget_low_u32 ( src. 0 . 0 ) . into ( ) ;
122
- a1 = vget_low_u32 ( src. 0 . 1 ) . into ( ) ;
123
- b0 = vget_high_u32 ( src. 0 . 0 ) . into ( ) ;
124
- b1 = vget_high_u32 ( src. 0 . 1 ) . into ( ) ;
133
+ a0 = vget_low_u32 ( src. 0 . 0 ) . into ( ) ;
134
+ a1 = vget_low_u32 ( src. 0 . 1 ) . into ( ) ;
135
+ b0 = vget_high_u32 ( src. 0 . 0 ) . into ( ) ;
136
+ b1 = vget_high_u32 ( src. 0 . 1 ) . into ( ) ;
125
137
}
126
138
return ( u32x2x2:: new ( a0, a1) , u32x2x2:: new ( b0, b1) ) ;
127
139
}
@@ -193,7 +205,7 @@ impl ConditionallySelectable for FieldElement2625x4 {
193
205
a. 0 [ 1 ] ^ ( mask_vec & ( a. 0 [ 1 ] ^ b. 0 [ 1 ] ) ) ,
194
206
a. 0 [ 2 ] ^ ( mask_vec & ( a. 0 [ 2 ] ^ b. 0 [ 2 ] ) ) ,
195
207
a. 0 [ 3 ] ^ ( mask_vec & ( a. 0 [ 3 ] ^ b. 0 [ 3 ] ) ) ,
196
- a. 0 [ 4 ] ^ ( mask_vec & ( a. 0 [ 4 ] ^ b. 0 [ 4 ] ) )
208
+ a. 0 [ 4 ] ^ ( mask_vec & ( a. 0 [ 4 ] ^ b. 0 [ 4 ] ) ) ,
197
209
] )
198
210
}
199
211
@@ -266,7 +278,6 @@ impl FieldElement2625x4 {
266
278
self . shuffle ( Shuffle :: BACD )
267
279
}
268
280
269
-
270
281
// Can probably be sped up using multiple vset/vget instead of table
271
282
#[ inline]
272
283
pub fn blend ( & self , other : FieldElement2625x4 , control : Lanes ) -> FieldElement2625x4 {
@@ -326,7 +337,7 @@ impl FieldElement2625x4 {
326
337
327
338
buf[ i] = u32x4x2:: new (
328
339
u32x4:: new ( a_2i, b_2i, a_2i_1, b_2i_1) ,
329
- u32x4:: new ( c_2i, d_2i, c_2i_1, d_2i_1)
340
+ u32x4:: new ( c_2i, d_2i, c_2i_1, d_2i_1) ,
330
341
) ;
331
342
}
332
343
return FieldElement2625x4 ( buf) . reduce ( ) ;
@@ -368,20 +379,12 @@ impl FieldElement2625x4 {
368
379
use core:: arch:: aarch64:: vqshlq_u32;
369
380
370
381
let c: u32x4x2 = u32x4x2:: new (
371
- vqshlq_u32 ( v. 0 . 0 , shifts. 0 . into ( ) ) . into ( ) ,
372
- vqshlq_u32 ( v. 0 . 1 , shifts. 1 . into ( ) ) . into ( ) ,
382
+ vqshlq_u32 ( v. 0 . 0 , shifts. 0 . into ( ) ) . into ( ) ,
383
+ vqshlq_u32 ( v. 0 . 1 , shifts. 1 . into ( ) ) . into ( ) ,
373
384
) ;
374
385
u32x4x2:: new (
375
- vcombine_u32 (
376
- vget_high_u32 ( c. 0 . 0 ) ,
377
- vget_low_u32 ( c. 0 . 0 ) ,
378
- )
379
- . into ( ) ,
380
- vcombine_u32 (
381
- vget_high_u32 ( c. 0 . 1 ) ,
382
- vget_low_u32 ( c. 0 . 1 ) ,
383
- )
384
- . into ( ) ,
386
+ vcombine_u32 ( vget_high_u32 ( c. 0 . 0 ) , vget_low_u32 ( c. 0 . 0 ) ) . into ( ) ,
387
+ vcombine_u32 ( vget_high_u32 ( c. 0 . 1 ) , vget_low_u32 ( c. 0 . 1 ) ) . into ( ) ,
385
388
)
386
389
}
387
390
} ;
@@ -390,16 +393,8 @@ impl FieldElement2625x4 {
390
393
unsafe {
391
394
use core:: arch:: aarch64:: vcombine_u32;
392
395
u32x4x2:: new (
393
- vcombine_u32 (
394
- vget_low_u32 ( v_lo. 0 . 0 ) ,
395
- vget_high_u32 ( v_hi. 0 . 0 ) ,
396
- )
397
- . into ( ) ,
398
- vcombine_u32 (
399
- vget_low_u32 ( v_lo. 0 . 1 ) ,
400
- vget_high_u32 ( v_hi. 0 . 1 ) ,
401
- )
402
- . into ( ) ,
396
+ vcombine_u32 ( vget_low_u32 ( v_lo. 0 . 0 ) , vget_high_u32 ( v_hi. 0 . 0 ) ) . into ( ) ,
397
+ vcombine_u32 ( vget_low_u32 ( v_lo. 0 . 1 ) , vget_high_u32 ( v_hi. 0 . 1 ) ) . into ( ) ,
403
398
)
404
399
}
405
400
} ;
@@ -874,5 +869,3 @@ mod test {
874
869
assert_eq ! ( x3, splits[ 3 ] ) ;
875
870
}
876
871
}
877
-
878
-
0 commit comments