Skip to content

Commit 13a8b5d

Browse files
authored
[CIR][CIRGen][Builtin][Neon] Lower neon_vldap1_lane_s64 and vldap1q_lane_s64 (#1346)
Lower `neon_vldap1_lane_s64` and `vldap1q_lane_s64` To add atomic `MemOrder` I changed the return type of builder to return LoadOp similar to our builders for StoreOp.
1 parent bf3135b commit 13a8b5d

File tree

3 files changed

+146
-4
lines changed

3 files changed

+146
-4
lines changed

clang/lib/CIR/CodeGen/CIRGenBuilder.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,7 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
871871
/*mem_order=*/cir::MemOrderAttr{}, /*tbaa=*/cir::TBAAAttr{});
872872
}
873873

874-
mlir::Value createAlignedLoad(mlir::Location loc, mlir::Type ty,
874+
cir::LoadOp createAlignedLoad(mlir::Location loc, mlir::Type ty,
875875
mlir::Value ptr, llvm::MaybeAlign align,
876876
bool isVolatile) {
877877
if (ty != mlir::cast<cir::PointerType>(ptr.getType()).getPointee())
@@ -880,14 +880,14 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
880880
return CIRBaseBuilderTy::createLoad(loc, ptr, isVolatile, alignment);
881881
}
882882

883-
mlir::Value createAlignedLoad(mlir::Location loc, mlir::Type ty,
883+
cir::LoadOp createAlignedLoad(mlir::Location loc, mlir::Type ty,
884884
mlir::Value ptr, llvm::MaybeAlign align) {
885885
// TODO: make sure callsites shouldn't be really passing volatile.
886886
assert(!cir::MissingFeatures::volatileLoadOrStore());
887887
return createAlignedLoad(loc, ty, ptr, align, /*isVolatile=*/false);
888888
}
889889

890-
mlir::Value
890+
cir::LoadOp
891891
createAlignedLoad(mlir::Location loc, mlir::Type ty, mlir::Value addr,
892892
clang::CharUnits align = clang::CharUnits::One()) {
893893
return createAlignedLoad(loc, ty, addr, align.getAsAlign());

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4453,7 +4453,12 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
44534453
}
44544454
case NEON::BI__builtin_neon_vldap1_lane_s64:
44554455
case NEON::BI__builtin_neon_vldap1q_lane_s64: {
4456-
llvm_unreachable("NEON::BI__builtin_neon_vldap1q_lane_s64 NYI");
4456+
cir::LoadOp Load = builder.createAlignedLoad(
4457+
Ops[0].getLoc(), vTy.getEltType(), Ops[0], PtrOp0.getAlignment());
4458+
Load.setAtomic(cir::MemOrder::Acquire);
4459+
return builder.create<cir::VecInsertOp>(getLoc(E->getExprLoc()),
4460+
builder.createBitcast(Ops[1], vTy),
4461+
Load, Ops[2]);
44574462
}
44584463
case NEON::BI__builtin_neon_vld1_dup_v:
44594464
case NEON::BI__builtin_neon_vld1q_dup_v: {

clang/test/CIR/CodeGen/AArch64/neon-ldst.c

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,3 +629,140 @@ void test_vstl1_lane_p64(poly64_t *a, poly64x1_t b) {
629629
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
630630
// LLVM: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
631631
// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
632+
633+
uint64x2_t test_vldap1q_lane_u64(uint64_t *a, uint64x2_t b) {
634+
return vldap1q_lane_u64(a, b, 1);
635+
}
636+
637+
// CIR-LABEL:test_vldap1q_lane_u64
638+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
639+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!u64i>
640+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!u64i>, !u64
641+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u64i x 2>
642+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 2>
643+
644+
// LLVM: {{.*}}test_vldap1q_lane_u64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
645+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
646+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
647+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
648+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
649+
650+
int64x2_t test_vldap1q_lane_s64(int64_t *a, int64x2_t b) {
651+
return vldap1q_lane_s64(a, b, 1);
652+
}
653+
654+
// CIR-LABEL:test_vldap1q_lane_s64
655+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
656+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
657+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
658+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s64i x 2>
659+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
660+
661+
// LLVM: {{.*}}test_vldap1q_lane_s64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
662+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
663+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
664+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
665+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
666+
667+
float64x2_t test_vldap1q_lane_f64(float64_t *a, float64x2_t b) {
668+
return vldap1q_lane_f64(a, b, 1);
669+
}
670+
671+
// CIR-LABEL:test_vldap1q_lane_f64
672+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
673+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.double>
674+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!cir.double>, !cir.double
675+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 16>), !cir.vector<!cir.double x 2>
676+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 2>
677+
678+
// LLVM: {{.*}}test_vldap1q_lane_f64(ptr{{.*}}[[PTR:%.*]], <2 x double>{{.*}}[[SRC:%.*]])
679+
// LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[SRC]] to <16 x i8>
680+
// LLVM: [[TMP2:%.*]] = load atomic double, ptr [[PTR]] acquire, align 8
681+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
682+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x double> [[TMP1]], double [[TMP2]], i32 1
683+
684+
poly64x2_t test_vldap1q_lane_p64(poly64_t *a, poly64x2_t b) {
685+
return vldap1q_lane_p64(a, b, 1);
686+
}
687+
688+
// CIR-LABEL:test_vldap1q_lane_p64
689+
// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
690+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
691+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
692+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s64i x 2>
693+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
694+
695+
// LLVM: {{.*}}test_vldap1q_lane_p64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
696+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[SRC]] to <16 x i8>
697+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
698+
// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
699+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
700+
701+
uint64x1_t test_vldap1_lane_u64(uint64_t *a, uint64x1_t b) {
702+
return vldap1_lane_u64(a, b, 0);
703+
}
704+
705+
// CIR-LABEL:test_vldap1_lane_u64
706+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
707+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!u64i>
708+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!u64i>, !u64
709+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u64i x 1>
710+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 1>
711+
712+
// LLVM: {{.*}}test_vldap1_lane_u64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
713+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
714+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
715+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
716+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
717+
718+
int64x1_t test_vldap1_lane_s64(int64_t *a, int64x1_t b) {
719+
return vldap1_lane_s64(a, b, 0);
720+
}
721+
722+
// CIR-LABEL:test_vldap1_lane_s64
723+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
724+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
725+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
726+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s64i x 1>
727+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
728+
729+
// LLVM: {{.*}}test_vldap1_lane_s64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
730+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
731+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
732+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
733+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
734+
735+
736+
float64x1_t test_vldap1_lane_f64(float64_t *a, float64x1_t b) {
737+
return vldap1_lane_f64(a, b, 0);
738+
}
739+
740+
// CIR-LABEL: test_vldap1_lane_f64
741+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
742+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!cir.double>
743+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!cir.double>, !cir.double
744+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 8>), !cir.vector<!cir.double x 1>
745+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 1>
746+
747+
// LLVM: {{.*}}test_vldap1_lane_f64(ptr{{.*}}[[PTR:%.*]], <1 x double>{{.*}}[[SRC:%.*]])
748+
// LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[SRC]] to <8 x i8>
749+
// LLVM: [[TMP2:%.*]] = load atomic double, ptr [[PTR]] acquire, align 8
750+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
751+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x double> [[TMP1]], double [[TMP2]], i32 0
752+
753+
poly64x1_t test_vldap1_lane_p64(poly64_t *a, poly64x1_t b) {
754+
return vldap1_lane_p64(a, b, 0);
755+
}
756+
757+
// CIR-LABEL: test_vldap1_lane_p64
758+
// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
759+
// CIR: [[TMP0:%.*]] = cir.cast(bitcast, {{.*}} : !cir.ptr<!void>), !cir.ptr<!s64i>
760+
// CIR: [[VAL:%.*]] = cir.load align(8) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
761+
// CIR: [[VEC:%.*]] = cir.cast(bitcast, {{.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s64i x 1>
762+
// CIR: [[TMP:%.*]] = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
763+
764+
// LLVM: {{.*}}test_vldap1_lane_p64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
765+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[SRC]] to <8 x i8>
766+
// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
767+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
768+
// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0

0 commit comments

Comments
 (0)