Skip to content

Commit b4018cd

Browse files
committed
Partial port to v7
1 parent aeaed19 commit b4018cd

File tree

7 files changed

+79
-77
lines changed

7 files changed

+79
-77
lines changed

curve25519-dalek/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ legacy_compatibility = []
6969
group = ["dep:group", "rand_core"]
7070
group-bits = ["group", "ff/bits"]
7171

72-
[target.'cfg(all(not(curve25519_dalek_backend = "fiat"), not(curve25519_dalek_backend = "serial"), any(target_arch = "x86_64", target_arch = "aarch64")))'.dependencies]
72+
[target.'cfg(all(not(curve25519_dalek_backend = "fiat"), not(curve25519_dalek_backend = "serial"), any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "arm")))'.dependencies]
7373
curve25519-dalek-derive = { version = "0.1", path = "../curve25519-dalek-derive" }
7474

7575
[lints.rust.unexpected_cfgs]

curve25519-dalek/build.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ fn main() {
7878
// Is the target arch & curve25519_dalek_bits potentially simd capable ?
7979
fn is_capable_simd(arch: &str, bits: DalekBits) -> bool {
8080
(arch == "x86_64" || arch == "aarch64") && bits == DalekBits::Dalek64
81+
|| arch == "arm" && bits == DalekBits::Dalek32
8182
}
8283

8384
// Deterministic cfg(curve25519_dalek_bits) when this is not explicitly set.

curve25519-dalek/src/backend/vector/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ pub mod avx2;
2121
#[cfg(all(nightly, target_arch = "x86_64"))]
2222
pub mod ifma;
2323

24-
#[cfg(all(nightly, target_arch = "aarch64"))]
24+
#[cfg(all(nightly, any(target_arch = "arm", target_arch = "aarch64")))]
2525
pub mod neon;
2626

2727
pub mod scalar_mul;

curve25519-dalek/src/backend/vector/neon/field.rs

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,22 @@ use crate::backend::vector::neon::constants::{
2828
P_TIMES_16_HI, P_TIMES_16_LO, P_TIMES_2_HI, P_TIMES_2_LO,
2929
};
3030

31+
#[cfg(target_arch = "aarch64")]
32+
use core::arch::aarch64 as core_neon;
33+
#[cfg(target_arch = "arm")]
34+
use core::arch::arm as core_neon;
35+
36+
use core_neon::{
37+
uint32x2_t, uint32x4_t, vcombine_u32, vmull_u32, vmulq_n_u32, vqshlq_u32,
38+
vreinterpretq_u32_u64, vsubq_u64, vtrn1_u32, vuzp1_u32,
39+
};
40+
41+
#[cfg(target_arch = "arm")]
42+
use core::arch::arm::{vget_high_u32, vget_low_u32};
43+
3144
#[cfg(all(target_arch = "aarch64"))]
3245
#[inline(always)]
33-
fn vget_high_u32(v: core::arch::aarch64::uint32x4_t) -> core::arch::aarch64::uint32x2_t {
46+
fn vget_high_u32(v: uint32x4_t) -> uint32x2_t {
3447
use core::arch::asm;
3548
let o;
3649
unsafe {
@@ -45,7 +58,7 @@ fn vget_high_u32(v: core::arch::aarch64::uint32x4_t) -> core::arch::aarch64::uin
4558

4659
#[cfg(all(target_arch = "aarch64"))]
4760
#[inline(always)]
48-
fn vget_low_u32(v: core::arch::aarch64::uint32x4_t) -> core::arch::aarch64::uint32x2_t {
61+
fn vget_low_u32(v: uint32x4_t) -> uint32x2_t {
4962
use core::arch::asm;
5063
let o;
5164
unsafe {
@@ -57,10 +70,6 @@ fn vget_low_u32(v: core::arch::aarch64::uint32x4_t) -> core::arch::aarch64::uint
5770
}
5871
o
5972
}
60-
#[cfg(not(target_arch = "aarch64"))]
61-
use core::arch::aarch64::vget_high_u32;
62-
#[cfg(not(target_arch = "aarch64"))]
63-
use core::arch::aarch64::vget_low_u32;
6473

6574
// Shuffle the lanes in a u32x4x2
6675
macro_rules! shuffle {
@@ -146,9 +155,6 @@ fn unpack_pair(src: u32x4x2) -> (u32x2x2, u32x2x2) {
146155
#[rustfmt::skip] // Retain formatting of the return tuples
147156
fn repack_pair(x: u32x4x2, y: u32x4x2) -> u32x4x2 {
148157
unsafe {
149-
use core::arch::aarch64::vcombine_u32;
150-
use core::arch::aarch64::vtrn1_u32;
151-
152158
u32x4x2::new(
153159
vcombine_u32(
154160
vtrn1_u32(vget_low_u32(x.0.0), vget_high_u32(x.0.0)),
@@ -375,9 +381,6 @@ impl FieldElement2625x4 {
375381
// Use mutliple transposes instead of table lookup?
376382
let rotated_carryout = |v: u32x4x2| -> u32x4x2 {
377383
unsafe {
378-
use core::arch::aarch64::vcombine_u32;
379-
use core::arch::aarch64::vqshlq_u32;
380-
381384
let c: u32x4x2 = u32x4x2::new(
382385
vqshlq_u32(v.0 .0, shifts.0.into()).into(),
383386
vqshlq_u32(v.0 .1, shifts.1.into()).into(),
@@ -391,7 +394,6 @@ impl FieldElement2625x4 {
391394

392395
let combine = |v_lo: u32x4x2, v_hi: u32x4x2| -> u32x4x2 {
393396
unsafe {
394-
use core::arch::aarch64::vcombine_u32;
395397
u32x4x2::new(
396398
vcombine_u32(vget_low_u32(v_lo.0 .0), vget_high_u32(v_hi.0 .0)).into(),
397399
vcombine_u32(vget_low_u32(v_lo.0 .1), vget_high_u32(v_hi.0 .1)).into(),
@@ -423,9 +425,6 @@ impl FieldElement2625x4 {
423425

424426
#[rustfmt::skip] // Retain formatting of return tuple
425427
let c9_19: u32x4x2 = unsafe {
426-
use core::arch::aarch64::vcombine_u32;
427-
use core::arch::aarch64::vmulq_n_u32;
428-
429428
let c9_19_spread: u32x4x2 = u32x4x2::new(
430429
vmulq_n_u32(c98.0.0, 19).into(),
431430
vmulq_n_u32(c98.0.1, 19).into(),
@@ -471,9 +470,6 @@ impl FieldElement2625x4 {
471470
let mut c1: u64x2x2 = c.shr::<26>();
472471

473472
unsafe {
474-
use core::arch::aarch64::vmulq_n_u32;
475-
use core::arch::aarch64::vreinterpretq_u32_u64;
476-
477473
c0 = u64x2x2::new(
478474
vmulq_n_u32(vreinterpretq_u32_u64(c0.0.0), 19).into(),
479475
vmulq_n_u32(vreinterpretq_u32_u64(c0.0.1), 19).into());
@@ -500,7 +496,6 @@ impl FieldElement2625x4 {
500496
pub fn square_and_negate_D(&self) -> FieldElement2625x4 {
501497
#[inline(always)]
502498
fn m(x: u32x2x2, y: u32x2x2) -> u64x2x2 {
503-
use core::arch::aarch64::vmull_u32;
504499
unsafe {
505500
let z0: u64x2 = vmull_u32(x.0.0, y.0.0).into();
506501
let z1: u64x2 = vmull_u32(x.0.1, y.0.1).into();
@@ -510,8 +505,6 @@ impl FieldElement2625x4 {
510505

511506
#[inline(always)]
512507
fn m_lo(x: u32x2x2, y: u32x2x2) -> u32x2x2 {
513-
use core::arch::aarch64::vmull_u32;
514-
use core::arch::aarch64::vuzp1_u32;
515508
unsafe {
516509
let x: u32x4x2 = u32x4x2::new(
517510
vmull_u32(x.0.0, y.0.0).into(),
@@ -565,9 +558,6 @@ impl FieldElement2625x4 {
565558

566559
let negate_D = |x_01: u64x2x2, p_01: u64x2x2| -> u64x2x2 {
567560
unsafe {
568-
use core::arch::aarch64::vcombine_u32;
569-
use core::arch::aarch64::vreinterpretq_u32_u64;
570-
use core::arch::aarch64::vsubq_u64;
571561

572562
u64x2x2::new(u64x2(x_01.0.0),
573563
vcombine_u32(
@@ -626,8 +616,6 @@ impl Mul<(u32, u32, u32, u32)> for FieldElement2625x4 {
626616
#[rustfmt::skip] // Retain formatting of packing
627617
fn mul(self, scalars: (u32, u32, u32, u32)) -> FieldElement2625x4 {
628618
unsafe {
629-
use core::arch::aarch64::vmull_u32;
630-
631619
let consts = (
632620
u32x2::new(scalars.0, scalars.1),
633621
u32x2::new(scalars.2, scalars.3),
@@ -662,7 +650,6 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 {
662650
fn mul(self, rhs: &'b FieldElement2625x4) -> FieldElement2625x4 {
663651
#[inline(always)]
664652
fn m(x: u32x2x2, y: u32x2x2) -> u64x2x2 {
665-
use core::arch::aarch64::vmull_u32;
666653
unsafe {
667654
let z0: u64x2 = vmull_u32(x.0.0, y.0.0).into();
668655
let z1: u64x2 = vmull_u32(x.0.1, y.0.1).into();
@@ -672,8 +659,6 @@ impl<'a, 'b> Mul<&'b FieldElement2625x4> for &'a FieldElement2625x4 {
672659

673660
#[inline(always)]
674661
fn m_lo(x: u32x2x2, y: u32x2x2) -> u32x2x2 {
675-
use core::arch::aarch64::vmull_u32;
676-
use core::arch::aarch64::vuzp1_u32;
677662
unsafe {
678663
let x: u32x4x2 = u32x4x2::new(
679664
vmull_u32(x.0.0, y.0.0).into(),

0 commit comments

Comments
 (0)