@@ -28,9 +28,22 @@ use crate::backend::vector::neon::constants::{
28
28
P_TIMES_16_HI , P_TIMES_16_LO , P_TIMES_2_HI , P_TIMES_2_LO ,
29
29
} ;
30
30
31
+ #[ cfg( target_arch = "aarch64" ) ]
32
+ use core:: arch:: aarch64 as core_neon;
33
+ #[ cfg( target_arch = "arm" ) ]
34
+ use core:: arch:: arm as core_neon;
35
+
36
+ use core_neon:: {
37
+ uint32x2_t, uint32x4_t, vcombine_u32, vmull_u32, vmulq_n_u32, vqshlq_u32,
38
+ vreinterpretq_u32_u64, vsubq_u64, vtrn1_u32, vuzp1_u32,
39
+ } ;
40
+
41
+ #[ cfg( target_arch = "arm" ) ]
42
+ use core:: arch:: arm:: { vget_high_u32, vget_low_u32} ;
43
+
31
44
#[ cfg( all( target_arch = "aarch64" ) ) ]
32
45
#[ inline( always) ]
33
- fn vget_high_u32 ( v : core :: arch :: aarch64 :: uint32x4_t ) -> core :: arch :: aarch64 :: uint32x2_t {
46
+ fn vget_high_u32 ( v : uint32x4_t ) -> uint32x2_t {
34
47
use core:: arch:: asm;
35
48
let o;
36
49
unsafe {
@@ -45,7 +58,7 @@ fn vget_high_u32(v: core::arch::aarch64::uint32x4_t) -> core::arch::aarch64::uin
45
58
46
59
#[ cfg( all( target_arch = "aarch64" ) ) ]
47
60
#[ inline( always) ]
48
- fn vget_low_u32 ( v : core :: arch :: aarch64 :: uint32x4_t ) -> core :: arch :: aarch64 :: uint32x2_t {
61
+ fn vget_low_u32 ( v : uint32x4_t ) -> uint32x2_t {
49
62
use core:: arch:: asm;
50
63
let o;
51
64
unsafe {
@@ -57,10 +70,6 @@ fn vget_low_u32(v: core::arch::aarch64::uint32x4_t) -> core::arch::aarch64::uint
57
70
}
58
71
o
59
72
}
60
- #[ cfg( not( target_arch = "aarch64" ) ) ]
61
- use core:: arch:: aarch64:: vget_high_u32;
62
- #[ cfg( not( target_arch = "aarch64" ) ) ]
63
- use core:: arch:: aarch64:: vget_low_u32;
64
73
65
74
// Shuffle the lanes in a u32x4x2
66
75
macro_rules! shuffle {
@@ -146,9 +155,6 @@ fn unpack_pair(src: u32x4x2) -> (u32x2x2, u32x2x2) {
146
155
#[ rustfmt:: skip] // Retain formatting of the return tuples
147
156
fn repack_pair ( x : u32x4x2 , y : u32x4x2 ) -> u32x4x2 {
148
157
unsafe {
149
- use core:: arch:: aarch64:: vcombine_u32;
150
- use core:: arch:: aarch64:: vtrn1_u32;
151
-
152
158
u32x4x2:: new (
153
159
vcombine_u32 (
154
160
vtrn1_u32 ( vget_low_u32 ( x. 0 . 0 ) , vget_high_u32 ( x. 0 . 0 ) ) ,
@@ -375,9 +381,6 @@ impl FieldElement2625x4 {
375
381
// Use mutliple transposes instead of table lookup?
376
382
let rotated_carryout = |v : u32x4x2 | -> u32x4x2 {
377
383
unsafe {
378
- use core:: arch:: aarch64:: vcombine_u32;
379
- use core:: arch:: aarch64:: vqshlq_u32;
380
-
381
384
let c: u32x4x2 = u32x4x2:: new (
382
385
vqshlq_u32 ( v. 0 . 0 , shifts. 0 . into ( ) ) . into ( ) ,
383
386
vqshlq_u32 ( v. 0 . 1 , shifts. 1 . into ( ) ) . into ( ) ,
@@ -391,7 +394,6 @@ impl FieldElement2625x4 {
391
394
392
395
let combine = |v_lo : u32x4x2 , v_hi : u32x4x2 | -> u32x4x2 {
393
396
unsafe {
394
- use core:: arch:: aarch64:: vcombine_u32;
395
397
u32x4x2:: new (
396
398
vcombine_u32 ( vget_low_u32 ( v_lo. 0 . 0 ) , vget_high_u32 ( v_hi. 0 . 0 ) ) . into ( ) ,
397
399
vcombine_u32 ( vget_low_u32 ( v_lo. 0 . 1 ) , vget_high_u32 ( v_hi. 0 . 1 ) ) . into ( ) ,
@@ -423,9 +425,6 @@ impl FieldElement2625x4 {
423
425
424
426
#[ rustfmt:: skip] // Retain formatting of return tuple
425
427
let c9_19: u32x4x2 = unsafe {
426
- use core:: arch:: aarch64:: vcombine_u32;
427
- use core:: arch:: aarch64:: vmulq_n_u32;
428
-
429
428
let c9_19_spread: u32x4x2 = u32x4x2:: new (
430
429
vmulq_n_u32 ( c98. 0 . 0 , 19 ) . into ( ) ,
431
430
vmulq_n_u32 ( c98. 0 . 1 , 19 ) . into ( ) ,
@@ -471,9 +470,6 @@ impl FieldElement2625x4 {
471
470
let mut c1: u64x2x2 = c. shr :: < 26 > ( ) ;
472
471
473
472
unsafe {
474
- use core:: arch:: aarch64:: vmulq_n_u32;
475
- use core:: arch:: aarch64:: vreinterpretq_u32_u64;
476
-
477
473
c0 = u64x2x2:: new (
478
474
vmulq_n_u32 ( vreinterpretq_u32_u64 ( c0. 0 . 0 ) , 19 ) . into ( ) ,
479
475
vmulq_n_u32 ( vreinterpretq_u32_u64 ( c0. 0 . 1 ) , 19 ) . into ( ) ) ;
@@ -500,7 +496,6 @@ impl FieldElement2625x4 {
500
496
pub fn square_and_negate_D ( & self ) -> FieldElement2625x4 {
501
497
#[ inline( always) ]
502
498
fn m ( x : u32x2x2 , y : u32x2x2 ) -> u64x2x2 {
503
- use core:: arch:: aarch64:: vmull_u32;
504
499
unsafe {
505
500
let z0: u64x2 = vmull_u32 ( x. 0 . 0 , y. 0 . 0 ) . into ( ) ;
506
501
let z1: u64x2 = vmull_u32 ( x. 0 . 1 , y. 0 . 1 ) . into ( ) ;
@@ -510,8 +505,6 @@ impl FieldElement2625x4 {
510
505
511
506
#[ inline( always) ]
512
507
fn m_lo ( x : u32x2x2 , y : u32x2x2 ) -> u32x2x2 {
513
- use core:: arch:: aarch64:: vmull_u32;
514
- use core:: arch:: aarch64:: vuzp1_u32;
515
508
unsafe {
516
509
let x: u32x4x2 = u32x4x2:: new (
517
510
vmull_u32 ( x. 0 . 0 , y. 0 . 0 ) . into ( ) ,
@@ -565,9 +558,6 @@ impl FieldElement2625x4 {
565
558
566
559
let negate_D = |x_01 : u64x2x2 , p_01 : u64x2x2 | -> u64x2x2 {
567
560
unsafe {
568
- use core:: arch:: aarch64:: vcombine_u32;
569
- use core:: arch:: aarch64:: vreinterpretq_u32_u64;
570
- use core:: arch:: aarch64:: vsubq_u64;
571
561
572
562
u64x2x2:: new ( u64x2 ( x_01. 0 . 0 ) ,
573
563
vcombine_u32 (
@@ -626,8 +616,6 @@ impl Mul<(u32, u32, u32, u32)> for FieldElement2625x4 {
626
616
#[ rustfmt:: skip] // Retain formatting of packing
627
617
fn mul ( self , scalars : ( u32 , u32 , u32 , u32 ) ) -> FieldElement2625x4 {
628
618
unsafe {
629
- use core:: arch:: aarch64:: vmull_u32;
630
-
631
619
let consts = (
632
620
u32x2:: new ( scalars. 0 , scalars. 1 ) ,
633
621
u32x2:: new ( scalars. 2 , scalars. 3 ) ,
@@ -662,7 +650,6 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 {
662
650
fn mul ( self , rhs : & ' b FieldElement2625x4 ) -> FieldElement2625x4 {
663
651
#[ inline( always) ]
664
652
fn m ( x : u32x2x2 , y : u32x2x2 ) -> u64x2x2 {
665
- use core:: arch:: aarch64:: vmull_u32;
666
653
unsafe {
667
654
let z0: u64x2 = vmull_u32 ( x. 0 . 0 , y. 0 . 0 ) . into ( ) ;
668
655
let z1: u64x2 = vmull_u32 ( x. 0 . 1 , y. 0 . 1 ) . into ( ) ;
@@ -672,8 +659,6 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 {
672
659
673
660
#[ inline( always) ]
674
661
fn m_lo ( x : u32x2x2 , y : u32x2x2 ) -> u32x2x2 {
675
- use core:: arch:: aarch64:: vmull_u32;
676
- use core:: arch:: aarch64:: vuzp1_u32;
677
662
unsafe {
678
663
let x: u32x4x2 = u32x4x2:: new (
679
664
vmull_u32 ( x. 0 . 0 , y. 0 . 0 ) . into ( ) ,
0 commit comments