Skip to content

Commit 3ac970b

Browse files
committed
[CIR][CIRGen][Builtin][X86] Lower avx512 scatter intrinsics
1 parent 942008c commit 3ac970b

File tree

3 files changed

+505
-2
lines changed

3 files changed

+505
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -771,8 +771,99 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
771771
case X86::BI__builtin_ia32_scattersiv4sf:
772772
case X86::BI__builtin_ia32_scattersiv4si:
773773
case X86::BI__builtin_ia32_scattersiv8sf:
774-
case X86::BI__builtin_ia32_scattersiv8si:
775-
llvm_unreachable("scattersiv8df NYI");
774+
case X86::BI__builtin_ia32_scattersiv8si: {
775+
776+
llvm::StringRef intrinsicName;
777+
778+
switch (BuiltinID) {
779+
default:
780+
llvm_unreachable("Unexpected builtin");
781+
case X86::BI__builtin_ia32_scattersiv8df:
782+
intrinsicName = "x86.avx512.mask.scatter.dpd.512";
783+
break;
784+
case X86::BI__builtin_ia32_scattersiv16sf:
785+
intrinsicName = "x86.avx512.mask.scatter.dps.512";
786+
break;
787+
case X86::BI__builtin_ia32_scatterdiv8df:
788+
intrinsicName = "x86.avx512.mask.scatter.qpd.512";
789+
break;
790+
case X86::BI__builtin_ia32_scatterdiv16sf:
791+
intrinsicName = "x86.avx512.mask.scatter.qps.512";
792+
break;
793+
case X86::BI__builtin_ia32_scattersiv8di:
794+
intrinsicName = "x86.avx512.mask.scatter.dpq.512";
795+
break;
796+
case X86::BI__builtin_ia32_scattersiv16si:
797+
intrinsicName = "x86.avx512.mask.scatter.dpi.512";
798+
break;
799+
case X86::BI__builtin_ia32_scatterdiv8di:
800+
intrinsicName = "x86.avx512.mask.scatter.qpq.512";
801+
break;
802+
case X86::BI__builtin_ia32_scatterdiv16si:
803+
intrinsicName = "x86.avx512.mask.scatter.qpi.512";
804+
break;
805+
case X86::BI__builtin_ia32_scatterdiv2df:
806+
intrinsicName = "x86.avx512.mask.scatterdiv2.df";
807+
break;
808+
case X86::BI__builtin_ia32_scatterdiv2di:
809+
intrinsicName = "x86.avx512.mask.scatterdiv2.di";
810+
break;
811+
case X86::BI__builtin_ia32_scatterdiv4df:
812+
intrinsicName = "x86.avx512.mask.scatterdiv4.df";
813+
break;
814+
case X86::BI__builtin_ia32_scatterdiv4di:
815+
intrinsicName = "x86.avx512.mask.scatterdiv4.di";
816+
break;
817+
case X86::BI__builtin_ia32_scatterdiv4sf:
818+
intrinsicName = "x86.avx512.mask.scatterdiv4.sf";
819+
break;
820+
case X86::BI__builtin_ia32_scatterdiv4si:
821+
intrinsicName = "x86.avx512.mask.scatterdiv4.si";
822+
break;
823+
case X86::BI__builtin_ia32_scatterdiv8sf:
824+
intrinsicName = "x86.avx512.mask.scatterdiv8.sf";
825+
break;
826+
case X86::BI__builtin_ia32_scatterdiv8si:
827+
intrinsicName = "x86.avx512.mask.scatterdiv8.si";
828+
break;
829+
case X86::BI__builtin_ia32_scattersiv2df:
830+
intrinsicName = "x86.avx512.mask.scattersiv2.df";
831+
break;
832+
case X86::BI__builtin_ia32_scattersiv2di:
833+
intrinsicName = "x86.avx512.mask.scattersiv2.di";
834+
break;
835+
case X86::BI__builtin_ia32_scattersiv4df:
836+
intrinsicName = "x86.avx512.mask.scattersiv4.df";
837+
break;
838+
case X86::BI__builtin_ia32_scattersiv4di:
839+
intrinsicName = "x86.avx512.mask.scattersiv4.di";
840+
break;
841+
case X86::BI__builtin_ia32_scattersiv4sf:
842+
intrinsicName = "x86.avx512.mask.scattersiv4.sf";
843+
break;
844+
case X86::BI__builtin_ia32_scattersiv4si:
845+
intrinsicName = "x86.avx512.mask.scattersiv4.si";
846+
break;
847+
case X86::BI__builtin_ia32_scattersiv8sf:
848+
intrinsicName = "x86.avx512.mask.scattersiv8.sf";
849+
break;
850+
case X86::BI__builtin_ia32_scattersiv8si:
851+
intrinsicName = "x86.avx512.mask.scattersiv8.si";
852+
break;
853+
}
854+
855+
unsigned minElts =
856+
std::min(cast<cir::VectorType>(Ops[2].getType()).getSize(),
857+
cast<cir::VectorType>(Ops[3].getType()).getSize());
858+
Ops[1] = getMaskVecValue(*this, Ops[1], minElts, getLoc(E->getExprLoc()));
859+
860+
return builder
861+
.create<cir::LLVMIntrinsicCallOp>(
862+
getLoc(E->getExprLoc()), builder.getStringAttr(intrinsicName.str()),
863+
builder.getVoidTy(), Ops)
864+
.getResult();
865+
}
866+
776867
case X86::BI__builtin_ia32_vextractf128_pd256:
777868
case X86::BI__builtin_ia32_vextractf128_ps256:
778869
case X86::BI__builtin_ia32_vextractf128_si256:

clang/test/CIR/CodeGen/X86/avx512f-builtins.c

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,3 +337,128 @@ __m512i test_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) {
337337
// LLVM: @llvm.masked.expandload.v16i32(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}})
338338
return _mm512_maskz_expandloadu_epi32(__U, __P);
339339
}
340+
void test_mm512_i32scatter_pd(void *__addr, __m256i __index, __m512d __v1) {
341+
// CIR-LABEL: test_mm512_i32scatter_pd
342+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpd.512"
343+
344+
// LLVM-LABEL: test_mm512_i32scatter_pd
345+
// LLVM: @llvm.x86.avx512.mask.scatter.dpd.512
346+
return _mm512_i32scatter_pd(__addr, __index, __v1, 2);
347+
}
348+
349+
void test_mm512_mask_i32scatter_pd(void *__addr, __mmask8 __mask, __m256i __index, __m512d __v1) {
350+
// CIR-LABEL: test_mm512_mask_i32scatter_pd
351+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpd.512"
352+
353+
// LLVM-LABEL: test_mm512_mask_i32scatter_pd
354+
// LLVM: @llvm.x86.avx512.mask.scatter.dpd.512
355+
return _mm512_mask_i32scatter_pd(__addr, __mask, __index, __v1, 2);
356+
}
357+
358+
void test_mm512_i32scatter_ps(void *__addr, __m512i __index, __m512 __v1) {
359+
// CIR-LABEL: test_mm512_i32scatter_ps
360+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dps.512"
361+
362+
// LLVM-LABEL: test_mm512_i32scatter_ps
363+
// LLVM: @llvm.x86.avx512.mask.scatter.dps.512
364+
return _mm512_i32scatter_ps(__addr, __index, __v1, 2);
365+
}
366+
367+
void test_mm512_mask_i32scatter_ps(void *__addr, __mmask16 __mask, __m512i __index, __m512 __v1) {
368+
// CIR-LABEL: test_mm512_mask_i32scatter_ps
369+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dps.512"
370+
371+
// LLVM-LABEL: test_mm512_mask_i32scatter_ps
372+
// LLVM: @llvm.x86.avx512.mask.scatter.dps.512
373+
return _mm512_mask_i32scatter_ps(__addr, __mask, __index, __v1, 2);
374+
}
375+
376+
void test_mm512_i64scatter_pd(void *__addr, __m512i __index, __m512d __v1) {
377+
// CIR-LABEL: test_mm512_i64scatter_pd
378+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpd.512"
379+
380+
// LLVM-LABEL: test_mm512_i64scatter_pd
381+
// LLVM: @llvm.x86.avx512.mask.scatter.qpd.512
382+
return _mm512_i64scatter_pd(__addr, __index, __v1, 2);
383+
}
384+
385+
void test_mm512_mask_i64scatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
386+
// CIR-LABEL: test_mm512_mask_i64scatter_pd
387+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpd.512"
388+
389+
// LLVM-LABEL: test_mm512_mask_i64scatter_pd
390+
// LLVM: @llvm.x86.avx512.mask.scatter.qpd.512
391+
return _mm512_mask_i64scatter_pd(__addr, __mask, __index, __v1, 2);
392+
}
393+
394+
void test_mm512_i64scatter_ps(void *__addr, __m512i __index, __m256 __v1) {
395+
// CIR-LABEL: test_mm512_i64scatter_ps
396+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qps.512"
397+
398+
// LLVM-LABEL: test_mm512_i64scatter_ps
399+
// LLVM: @llvm.x86.avx512.mask.scatter.qps.512
400+
return _mm512_i64scatter_ps(__addr, __index, __v1, 2);
401+
}
402+
403+
void test_mm512_mask_i64scatter_ps(void *__addr, __mmask8 __mask, __m512i __index, __m256 __v1) {
404+
// CIR-LABEL: test_mm512_mask_i64scatter_ps
405+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qps.512"
406+
407+
// LLVM-LABEL: test_mm512_mask_i64scatter_ps
408+
// LLVM: @llvm.x86.avx512.mask.scatter.qps.512
409+
return _mm512_mask_i64scatter_ps(__addr, __mask, __index, __v1, 2);
410+
}
411+
412+
void test_mm512_i32scatter_epi32(void *__addr, __m512i __index, __m512i __v1) {
413+
// CIR-LABEL: test_mm512_i32scatter_epi32
414+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpi.512"
415+
416+
// LLVM-LABEL: test_mm512_i32scatter_epi32
417+
// LLVM: @llvm.x86.avx512.mask.scatter.dpi.512
418+
return _mm512_i32scatter_epi32(__addr, __index, __v1, 2);
419+
}
420+
421+
void test_mm512_mask_i32scatter_epi32(void *__addr, __mmask16 __mask, __m512i __index, __m512i __v1) {
422+
// CIR-LABEL: test_mm512_mask_i32scatter_epi32
423+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpi.512"
424+
425+
// LLVM-LABEL: test_mm512_mask_i32scatter_epi32
426+
// LLVM: @llvm.x86.avx512.mask.scatter.dpi.512
427+
return _mm512_mask_i32scatter_epi32(__addr, __mask, __index, __v1, 2);
428+
}
429+
430+
void test_mm512_i64scatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
431+
// CIR-LABEL: test_mm512_i64scatter_epi64
432+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpq.512"
433+
434+
// LLVM-LABEL: test_mm512_i64scatter_epi64
435+
// LLVM: @llvm.x86.avx512.mask.scatter.qpq.512
436+
return _mm512_i64scatter_epi64(__addr, __index, __v1, 2);
437+
}
438+
439+
void test_mm512_mask_i64scatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
440+
// CIR-LABEL: test_mm512_mask_i64scatter_epi64
441+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpq.512"
442+
443+
// LLVM-LABEL: test_mm512_mask_i64scatter_epi64
444+
// LLVM: @llvm.x86.avx512.mask.scatter.qpq.512
445+
return _mm512_mask_i64scatter_epi64(__addr, __mask, __index, __v1, 2);
446+
}
447+
448+
void test_mm512_i64scatter_epi32(void *__addr, __m512i __index, __m256i __v1) {
449+
// CIR-LABEL: test_mm512_i64scatter_epi32
450+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpi.512"
451+
452+
// LLVM-LABEL: test_mm512_i64scatter_epi32
453+
// LLVM: @llvm.x86.avx512.mask.scatter.qpi.512
454+
return _mm512_i64scatter_epi32(__addr, __index, __v1, 2);
455+
}
456+
457+
void test_mm512_mask_i64scatter_epi32(void *__addr, __mmask8 __mask, __m512i __index, __m256i __v1) {
458+
// CIR-LABEL: test_mm512_mask_i64scatter_epi32
459+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpi.512"
460+
461+
// LLVM-LABEL: test_mm512_mask_i64scatter_epi32
462+
// LLVM: @llvm.x86.avx512.mask.scatter.qpi.512
463+
return _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, 2);
464+
}

0 commit comments

Comments
 (0)