Skip to content

Commit a4da32d

Browse files
committed
Use packed_simd::shuffle instead of vqtbx1q_u8
1 parent 0383131 commit a4da32d

File tree

1 file changed

+26
-33
lines changed

1 file changed

+26
-33
lines changed

src/backend/vector/neon/field.rs

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -211,40 +211,33 @@ impl FieldElement2625x4 {
211211
pub fn blend(&self, other: FieldElement2625x4, control: Lanes) -> FieldElement2625x4 {
212212
#[inline(always)]
213213
fn blend_lanes(x: (u32x4, u32x4), y: (u32x4, u32x4), control: Lanes) -> (u32x4, u32x4) {
214-
unsafe {
215-
use core::arch::aarch64::vqtbx1q_u8;
216-
match control {
217-
Lanes::C => {
218-
(x.0,
219-
vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits())
220-
}
221-
Lanes::D => {
222-
(x.0,
223-
vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits()).into_bits())
224-
}
225-
Lanes::AD => {
226-
(vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits() ).into_bits(),
227-
vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits() ).into_bits())
228-
}
229-
Lanes::AB => {
230-
(y.0, x.1)
231-
}
232-
Lanes::AC => {
233-
(vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits(),
234-
vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits())
235-
}
236-
Lanes::CD => {
237-
(x.0, y.1)
238-
}
239-
Lanes::BC => {
240-
(vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits() ).into_bits(),
241-
vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits() ).into_bits())
242-
}
243-
Lanes::ABCD => {
244-
y
245-
}
246-
214+
use packed_simd::shuffle;
215+
match control {
216+
Lanes::C => {
217+
(x.0, shuffle!(y.1, x.1, [0, 5, 2, 7]))
218+
}
219+
Lanes::D => {
220+
(x.0, shuffle!(y.1, x.1, [4, 1, 6, 3]))
221+
}
222+
Lanes::AD => {
223+
(shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [4, 1, 6, 3]))
224+
}
225+
Lanes::AB => {
226+
(y.0, x.1)
247227
}
228+
Lanes::AC => {
229+
(shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [0, 5, 2, 7]))
230+
}
231+
Lanes::CD => {
232+
(x.0, y.1)
233+
}
234+
Lanes::BC => {
235+
(shuffle!(y.0, x.0, [4, 1, 6, 3]), shuffle!(y.1, x.1, [0, 5, 2, 7]))
236+
}
237+
Lanes::ABCD => {
238+
y
239+
}
240+
248241
}
249242
}
250243

0 commit comments

Comments
 (0)