Skip to content

Commit 8debbb9

Browse files
authored
[CIR][CIRGen][Builtin][Neon] Lower neon_vstl1_lane_s64 and vstl1q_lane_s64 (#1340)
Lower `neon_vstl1_lane_s64` and `vstl1q_lane_s64`
1 parent be82182 commit 8debbb9

File tree

2 files changed

+134
-1
lines changed

2 files changed

+134
-1
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4473,7 +4473,12 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
44734473
}
44744474
case NEON::BI__builtin_neon_vstl1_lane_s64:
44754475
case NEON::BI__builtin_neon_vstl1q_lane_s64: {
4476-
llvm_unreachable("NEON::BI__builtin_neon_vstl1q_lane_s64 NYI");
4476+
Ops[1] = builder.createBitcast(Ops[1], ty);
4477+
Ops[1] = builder.create<cir::VecExtractOp>(Ops[1].getLoc(), Ops[1], Ops[2]);
4478+
cir::StoreOp Store = builder.createAlignedStore(
4479+
getLoc(E->getExprLoc()), Ops[1], Ops[0], PtrOp0.getAlignment());
4480+
Store.setAtomic(cir::MemOrder::Release);
4481+
return Ops[1];
44774482
}
44784483
case NEON::BI__builtin_neon_vld2_v:
44794484
case NEON::BI__builtin_neon_vld2q_v: {

clang/test/CIR/CodeGen/AArch64/neon-ldst.c

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,3 +501,131 @@ void test_vst1q_lane_f64(float64_t * ptr, float64x2_t src) {
501501
// LLVM: [[VEC_CAST1:%.*]] = bitcast <16 x i8> [[VEC_CAST0]] to <2 x double>
502502
// LLVM: [[RES:%.*]] = extractelement <2 x double> [[VEC_CAST1]], i32 1
503503
// LLVM: store double [[RES]], ptr [[PTR]], align 8
504+
505+
void test_vstl1q_lane_u64(uint64_t *a, uint64x2_t b) {
506+
vstl1q_lane_u64(a, b, 1);
507+
}
508+
509+
// CIR-LABEL: test_vstl1q_lane_u64
510+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
511+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 2>
512+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!u64i>
513+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !u64i, !cir.ptr<!u64i>
514+
515+
// LLVM: {{.*}}test_vstl1q_lane_u64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
516+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
517+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
518+
// LLVM: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
519+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
520+
521+
void test_vstl1q_lane_s64(int64_t *a, int64x2_t b) {
522+
vstl1q_lane_s64(a, b, 1);
523+
}
524+
525+
// CIR-LABEL: test_vstl1q_lane_s64
526+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
527+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
528+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
529+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
530+
531+
// LLVM: {{.*}}test_vstl1q_lane_s64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
532+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
533+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
534+
// LLVM: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
535+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
536+
537+
void test_vstl1q_lane_f64(float64_t *a, float64x2_t b) {
538+
vstl1q_lane_f64(a, b, 1);
539+
}
540+
541+
// CIR-LABEL: test_vstl1q_lane_f64
542+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
543+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 2>
544+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!cir.double>
545+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !cir.double, !cir.ptr<!cir.double>
546+
547+
// LLVM: {{.*}}test_vstl1q_lane_f64(ptr{{.*}}[[PTR:%.*]], <2 x double>{{.*}}[[SRC:%.*]])
548+
// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[SRC]] to <16 x i8>
549+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
550+
// LLVM: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
551+
// LLVM: store atomic double [[TMP2]], ptr [[PTR]] release, align 8
552+
553+
void test_vstl1q_lane_p64(poly64_t *a, poly64x2_t b) {
554+
vstl1q_lane_p64(a, b, 1);
555+
}
556+
557+
// CIR-LABEL: test_vstl1q_lane_p64
558+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
559+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
560+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
561+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
562+
563+
// LLVM: {{.*}}test_vstl1q_lane_p64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
564+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
565+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
566+
// LLVM: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
567+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
568+
569+
void test_vstl1_lane_u64(uint64_t *a, uint64x1_t b) {
570+
vstl1_lane_u64(a, b, 0);
571+
}
572+
573+
// CIR-LABEL: test_vstl1_lane_u64
574+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
575+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 1>
576+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!u64i>
577+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !u64i, !cir.ptr<!u64i>
578+
579+
// LLVM: {{.*}}test_vstl1_lane_u64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
580+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
581+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
582+
// LLVM: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
583+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
584+
585+
void test_vstl1_lane_s64(int64_t *a, int64x1_t b) {
586+
vstl1_lane_s64(a, b, 0);
587+
}
588+
589+
// CIR-LABEL:test_vstl1_lane_s64
590+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
591+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
592+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
593+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
594+
595+
// LLVM: {{.*}}test_vstl1_lane_s64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
596+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
597+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
598+
// LLVM: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
599+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
600+
601+
void test_vstl1_lane_f64(float64_t *a, float64x1_t b) {
602+
vstl1_lane_f64(a, b, 0);
603+
}
604+
605+
// CIR-LABEL:test_vstl1_lane_f64
606+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
607+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 1>
608+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!cir.double>
609+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !cir.double, !cir.ptr<!cir.double>
610+
611+
// LLVM: {{.*}}test_vstl1_lane_f64(ptr{{.*}}[[PTR:%.*]], <1 x double>{{.*}}[[SRC:%.*]])
612+
// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[SRC]] to <8 x i8>
613+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
614+
// LLVM: [[TMP2:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
615+
// LLVM: store atomic double [[TMP2]], ptr [[PTR]] release, align 8
616+
617+
void test_vstl1_lane_p64(poly64_t *a, poly64x1_t b) {
618+
vstl1_lane_p64(a, b, 0);
619+
}
620+
621+
// CIR-LABEL: test_vstl1_lane_p64
622+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
623+
// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
624+
// CIR: [[PTR:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
625+
// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
626+
627+
// LLVM: {{.*}}test_vstl1_lane_p64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
628+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
629+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
630+
// LLVM: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
631+
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8

0 commit comments

Comments
 (0)