From 1b5d890863ef1d29a779dc5ac25bf446be03a30c Mon Sep 17 00:00:00 2001 From: Tommy McMichen Date: Tue, 29 Jul 2025 11:24:31 -0700 Subject: [PATCH 1/2] [CIR][LoweringPrepare] Wrap `cir.va_arg` lowered code in a `cir.scope` The `cir.va_arg` lowering may introduce an if-else control flow. This breaks the verifier for control flow operations, e.g., `cir.for`, where there can be only one block inside of the `cond` and `step` regions. This PR wraps the lowered code in a `cir.scope` (only if it will lower to the if-else control flow). Added a test for `va_arg` inside for loop condition, the basic pattern is: `for (; va_arg(...););` --- .../Targets/LoweringPrepareX86CXXABI.cpp | 417 +++++++++--------- clang/test/CIR/Lowering/var-arg-x86_64.c | 25 +- 2 files changed, 243 insertions(+), 199 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp index 9e089e370acb..9694c862b5f2 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp @@ -129,223 +129,244 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( builder, datalayout, valist, ty, loc), ty)); - auto currentBlock = builder.getInsertionBlock(); - - // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of - // general purpose registers needed to pass type and num_fp to hold - // the number of floating point registers needed. - - // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into - // registers. In the case: l->gp_offset > 48 - num_gp * 8 or - // l->fp_offset > 304 - num_fp * 16 go to step 7. - // - // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of - // register save space). - - mlir::Value inRegs; - mlir::Value gp_offset_p, fp_offset_p; - mlir::Value gp_offset, fp_offset; - - if (neededInt) { - gp_offset_p = builder.createGetMemberOp(loc, valist, "gp_offset", 0); - gp_offset = builder.createLoad(loc, gp_offset_p); - inRegs = builder.getUnsignedInt(loc, 48 - neededInt * 8, 32); - inRegs = builder.createCompare(loc, cir::CmpOpKind::le, gp_offset, inRegs); - } + auto scopeOp = builder.create(loc, [&](mlir::OpBuilder + &opBuilder, + mlir::Type &yieldTy, + mlir::Location loc) { + cir::CIRBaseBuilderTy builder(opBuilder); + + mlir::Block *contBlock = builder.getInsertionBlock(); + + mlir::Block *currentBlock = builder.createBlock(contBlock); + builder.setInsertionPointToEnd(currentBlock); + + // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of + // general purpose registers needed to pass type and num_fp to hold + // the number of floating point registers needed. + + // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into + // registers. In the case: l->gp_offset > 48 - num_gp * 8 or + // l->fp_offset > 304 - num_fp * 16 go to step 7. + // + // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of + // register save space). + + mlir::Value inRegs; + mlir::Value gp_offset_p, fp_offset_p; + mlir::Value gp_offset, fp_offset; + + if (neededInt) { + gp_offset_p = builder.createGetMemberOp(loc, valist, "gp_offset", 0); + gp_offset = builder.createLoad(loc, gp_offset_p); + inRegs = builder.getUnsignedInt(loc, 48 - neededInt * 8, 32); + inRegs = + builder.createCompare(loc, cir::CmpOpKind::le, gp_offset, inRegs); + } - if (neededSSE) { - fp_offset_p = builder.createGetMemberOp(loc, valist, "fp_offset", 1); - fp_offset = builder.createLoad(loc, fp_offset_p); - mlir::Value fitsInFP = - builder.getUnsignedInt(loc, 176 - neededSSE * 16, 32); - fitsInFP = - builder.createCompare(loc, cir::CmpOpKind::le, fp_offset, fitsInFP); - inRegs = inRegs ? builder.createAnd(inRegs, fitsInFP) : fitsInFP; - } + if (neededSSE) { + fp_offset_p = builder.createGetMemberOp(loc, valist, "fp_offset", 1); + fp_offset = builder.createLoad(loc, fp_offset_p); + mlir::Value fitsInFP = + builder.getUnsignedInt(loc, 176 - neededSSE * 16, 32); + fitsInFP = + builder.createCompare(loc, cir::CmpOpKind::le, fp_offset, fitsInFP); + inRegs = inRegs ? builder.createAnd(inRegs, fitsInFP) : fitsInFP; + } - mlir::Block *contBlock = currentBlock->splitBlock(op); - mlir::Block *inRegBlock = builder.createBlock(contBlock); - mlir::Block *inMemBlock = builder.createBlock(contBlock); - builder.setInsertionPointToEnd(currentBlock); - builder.create(loc, inRegs, inRegBlock, inMemBlock); - - // Emit code to load the value if it was passed in registers. - builder.setInsertionPointToStart(inRegBlock); - - // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with - // an offset of l->gp_offset and/or l->fp_offset. This may require - // copying to a temporary location in case the parameter is passed - // in different register classes or requires an alignment greater - // than 8 for general purpose registers and 16 for XMM registers. - // - // FIXME: This really results in shameful code when we end up needing to - // collect arguments from different places; often what should result in a - // simple assembling of a structure from scattered addresses has many more - // loads than necessary. Can we clean this up? - mlir::Value regSaveArea = builder.createLoad( - loc, builder.createGetMemberOp(loc, valist, "reg_save_area", 3)); - mlir::Value regAddr; - - uint64_t tyAlign = datalayout.getABITypeAlign(ty).value(); - // The alignment of result address. - uint64_t alignment = 0; - if (neededInt && neededSSE) { - // FIXME: Cleanup. - assert(ai.isDirect() && "Unexpected ABI info for mixed regs"); - auto recordTy = mlir::cast(ai.getCoerceToType()); - cir::PointerType addrTy = builder.getPointerTo(ty); - - mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", - CharUnits::fromQuantity(tyAlign)); - tmp = builder.createPtrBitcast(tmp, recordTy); - assert(recordTy.getNumElements() == 2 && - "Unexpected ABI info for mixed regs"); - mlir::Type tyLo = recordTy.getMembers()[0]; - mlir::Type tyHi = recordTy.getMembers()[1]; - assert((isFPOrVectorOfFPType(tyLo) ^ isFPOrVectorOfFPType(tyHi)) && - "Unexpected ABI info for mixed regs"); - mlir::Value gpAddr = builder.createPtrStride(loc, regSaveArea, gp_offset); - mlir::Value fpAddr = builder.createPtrStride(loc, regSaveArea, fp_offset); - mlir::Value regLoAddr = isFPOrVectorOfFPType(tyLo) ? fpAddr : gpAddr; - mlir::Value regHiAddr = isFPOrVectorOfFPType(tyHi) ? gpAddr : fpAddr; - - // Copy the first element. - // FIXME: Our choice of alignment here and below is probably pessimistic. - mlir::Value v = builder.createAlignedLoad( - loc, regLoAddr, datalayout.getABITypeAlign(tyLo).value()); - builder.createStore(loc, v, - builder.createGetMemberOp(loc, tmp, "gp_offset", 0)); - - // Copy the second element. - v = builder.createAlignedLoad(loc, regHiAddr, - datalayout.getABITypeAlign(tyHi).value()); - builder.createStore(loc, v, - builder.createGetMemberOp(loc, tmp, "fp_offset", 1)); - - tmp = builder.createPtrBitcast(tmp, ty); - regAddr = tmp; - } else if (neededInt || neededSSE == 1) { - uint64_t tySize = datalayout.getTypeStoreSize(ty).getFixedValue(); - - mlir::Type coTy; - if (ai.isDirect()) - coTy = ai.getCoerceToType(); - - mlir::Value gpOrFpOffset = neededInt ? gp_offset : fp_offset; - alignment = neededInt ? 8 : 16; - uint64_t regSize = neededInt ? neededInt * 8 : 16; - // There are two cases require special handling: - // 1) - // ``` - // struct { - // struct {} a[8]; - // int b; - // }; - // ``` - // The lower 8 bytes of the structure are not stored, - // so an 8-byte offset is needed when accessing the structure. - // 2) - // ``` - // struct { - // long long a; - // struct {} b; - // }; - // ``` - // The stored size of this structure is smaller than its actual size, - // which may lead to reading past the end of the register save area. - if (coTy && (ai.getDirectOffset() == 8 || regSize < tySize)) { + mlir::Block *inRegBlock = builder.createBlock(contBlock); + mlir::Block *inMemBlock = builder.createBlock(contBlock); + builder.setInsertionPointToEnd(currentBlock); + builder.create(loc, inRegs, inRegBlock, inMemBlock); + + // Emit code to load the value if it was passed in registers. + builder.setInsertionPointToStart(inRegBlock); + + // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with + // an offset of l->gp_offset and/or l->fp_offset. This may require + // copying to a temporary location in case the parameter is passed + // in different register classes or requires an alignment greater + // than 8 for general purpose registers and 16 for XMM registers. + // + // FIXME: This really results in shameful code when we end up needing to + // collect arguments from different places; often what should result in a + // simple assembling of a structure from scattered addresses has many more + // loads than necessary. Can we clean this up? + mlir::Value regSaveArea = builder.createLoad( + loc, builder.createGetMemberOp(loc, valist, "reg_save_area", 3)); + mlir::Value regAddr; + + uint64_t tyAlign = datalayout.getABITypeAlign(ty).value(); + // The alignment of result address. + uint64_t alignment = 0; + if (neededInt && neededSSE) { + // FIXME: Cleanup. + assert(ai.isDirect() && "Unexpected ABI info for mixed regs"); + auto recordTy = mlir::cast(ai.getCoerceToType()); cir::PointerType addrTy = builder.getPointerTo(ty); + mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", CharUnits::fromQuantity(tyAlign)); - mlir::Value addr = - builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); - mlir::Value src = builder.createAlignedLoad( - loc, builder.createPtrBitcast(addr, coTy), tyAlign); - mlir::Value ptrOffset = - builder.getUnsignedInt(loc, ai.getDirectOffset(), 32); - mlir::Value dst = builder.createPtrStride(loc, tmp, ptrOffset); - builder.createStore(loc, src, dst); + tmp = builder.createPtrBitcast(tmp, recordTy); + assert(recordTy.getNumElements() == 2 && + "Unexpected ABI info for mixed regs"); + mlir::Type tyLo = recordTy.getMembers()[0]; + mlir::Type tyHi = recordTy.getMembers()[1]; + assert((isFPOrVectorOfFPType(tyLo) ^ isFPOrVectorOfFPType(tyHi)) && + "Unexpected ABI info for mixed regs"); + mlir::Value gpAddr = builder.createPtrStride(loc, regSaveArea, gp_offset); + mlir::Value fpAddr = builder.createPtrStride(loc, regSaveArea, fp_offset); + mlir::Value regLoAddr = isFPOrVectorOfFPType(tyLo) ? fpAddr : gpAddr; + mlir::Value regHiAddr = isFPOrVectorOfFPType(tyHi) ? gpAddr : fpAddr; + + // Copy the first element. + // FIXME: Our choice of alignment here and below is probably pessimistic. + mlir::Value v = builder.createAlignedLoad( + loc, regLoAddr, datalayout.getABITypeAlign(tyLo).value()); + builder.createStore(loc, v, + builder.createGetMemberOp(loc, tmp, "gp_offset", 0)); + + // Copy the second element. + v = builder.createAlignedLoad(loc, regHiAddr, + datalayout.getABITypeAlign(tyHi).value()); + builder.createStore(loc, v, + builder.createGetMemberOp(loc, tmp, "fp_offset", 1)); + + tmp = builder.createPtrBitcast(tmp, ty); regAddr = tmp; - } else { - regAddr = builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); - - // Copy into a temporary if the type is more aligned than the - // register save area. - if (neededInt && tyAlign > 8) { + } else if (neededInt || neededSSE == 1) { + uint64_t tySize = datalayout.getTypeStoreSize(ty).getFixedValue(); + + mlir::Type coTy; + if (ai.isDirect()) + coTy = ai.getCoerceToType(); + + mlir::Value gpOrFpOffset = neededInt ? gp_offset : fp_offset; + alignment = neededInt ? 8 : 16; + uint64_t regSize = neededInt ? neededInt * 8 : 16; + // There are two cases require special handling: + // 1) + // ``` + // struct { + // struct {} a[8]; + // int b; + // }; + // ``` + // The lower 8 bytes of the structure are not stored, + // so an 8-byte offset is needed when accessing the structure. + // 2) + // ``` + // struct { + // long long a; + // struct {} b; + // }; + // ``` + // The stored size of this structure is smaller than its actual size, + // which may lead to reading past the end of the register save area. + if (coTy && (ai.getDirectOffset() == 8 || regSize < tySize)) { cir::PointerType addrTy = builder.getPointerTo(ty); mlir::Value tmp = builder.createAlloca( loc, addrTy, ty, "tmp", CharUnits::fromQuantity(tyAlign)); - builder.createMemCpy(loc, tmp, regAddr, - builder.getUnsignedInt(loc, tySize, 32)); + mlir::Value addr = + builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); + mlir::Value src = builder.createAlignedLoad( + loc, builder.createPtrBitcast(addr, coTy), tyAlign); + mlir::Value ptrOffset = + builder.getUnsignedInt(loc, ai.getDirectOffset(), 32); + mlir::Value dst = builder.createPtrStride(loc, tmp, ptrOffset); + builder.createStore(loc, src, dst); regAddr = tmp; + } else { + regAddr = builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); + + // Copy into a temporary if the type is more aligned than the + // register save area. + if (neededInt && tyAlign > 8) { + cir::PointerType addrTy = builder.getPointerTo(ty); + mlir::Value tmp = builder.createAlloca( + loc, addrTy, ty, "tmp", CharUnits::fromQuantity(tyAlign)); + builder.createMemCpy(loc, tmp, regAddr, + builder.getUnsignedInt(loc, tySize, 32)); + regAddr = tmp; + } } + + } else { + assert(neededSSE == 2 && "Invalid number of needed registers!"); + // SSE registers are spaced 16 bytes apart in the register save + // area, we need to collect the two eightbytes together. + // The ABI isn't explicit about this, but it seems reasonable + // to assume that the slots are 16-byte aligned, since the stack is + // naturally 16-byte aligned and the prologue is expected to store + // all the SSE registers to the RSA. + + mlir::Value regAddrLo = + builder.createPtrStride(loc, regSaveArea, fp_offset); + mlir::Value regAddrHi = builder.createPtrStride( + loc, regAddrLo, builder.getUnsignedInt(loc, 16, /*numBits=*/32)); + + mlir::MLIRContext *Context = abiInfo.getContext().getMLIRContext(); + cir::RecordType recordTy = + ai.canHaveCoerceToType() + ? cast(ai.getCoerceToType()) + : cir::RecordType::get( + Context, + {DoubleType::get(Context), DoubleType::get(Context)}, + /*packed=*/false, /*padded=*/false, + cir::RecordType::Struct); + cir::PointerType addrTy = builder.getPointerTo(ty); + mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", + CharUnits::fromQuantity(tyAlign)); + tmp = builder.createPtrBitcast(tmp, recordTy); + mlir::Value v = builder.createLoad( + loc, builder.createPtrBitcast(regAddrLo, recordTy.getMembers()[0])); + builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 0)); + v = builder.createLoad( + loc, builder.createPtrBitcast(regAddrHi, recordTy.getMembers()[1])); + builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 1)); + + tmp = builder.createPtrBitcast(tmp, ty); + regAddr = tmp; } - } else { - assert(neededSSE == 2 && "Invalid number of needed registers!"); - // SSE registers are spaced 16 bytes apart in the register save - // area, we need to collect the two eightbytes together. - // The ABI isn't explicit about this, but it seems reasonable - // to assume that the slots are 16-byte aligned, since the stack is - // naturally 16-byte aligned and the prologue is expected to store - // all the SSE registers to the RSA. - - mlir::Value regAddrLo = - builder.createPtrStride(loc, regSaveArea, fp_offset); - mlir::Value regAddrHi = builder.createPtrStride( - loc, regAddrLo, builder.getUnsignedInt(loc, 16, /*numBits=*/32)); - - mlir::MLIRContext *Context = abiInfo.getContext().getMLIRContext(); - cir::RecordType recordTy = - ai.canHaveCoerceToType() - ? cast(ai.getCoerceToType()) - : cir::RecordType::get( - Context, {DoubleType::get(Context), DoubleType::get(Context)}, - /*packed=*/false, /*padded=*/false, cir::RecordType::Struct); - cir::PointerType addrTy = builder.getPointerTo(ty); - mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", - CharUnits::fromQuantity(tyAlign)); - tmp = builder.createPtrBitcast(tmp, recordTy); - mlir::Value v = builder.createLoad( - loc, builder.createPtrBitcast(regAddrLo, recordTy.getMembers()[0])); - builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 0)); - v = builder.createLoad( - loc, builder.createPtrBitcast(regAddrHi, recordTy.getMembers()[1])); - builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 1)); - - tmp = builder.createPtrBitcast(tmp, ty); - regAddr = tmp; - } + // AMD64-ABI 3.5.7p5: Step 5. Set: + // l->gp_offset = l->gp_offset + num_gp * 8 + // l->fp_offset = l->fp_offset + num_fp * 16. + if (neededInt) { + mlir::Value offset = builder.getUnsignedInt(loc, neededInt * 8, 32); + builder.createStore(loc, builder.createAdd(gp_offset, offset), + gp_offset_p); + } - // AMD64-ABI 3.5.7p5: Step 5. Set: - // l->gp_offset = l->gp_offset + num_gp * 8 - // l->fp_offset = l->fp_offset + num_fp * 16. - if (neededInt) { - mlir::Value offset = builder.getUnsignedInt(loc, neededInt * 8, 32); - builder.createStore(loc, builder.createAdd(gp_offset, offset), gp_offset_p); - } + if (neededSSE) { + mlir::Value offset = builder.getUnsignedInt(loc, neededSSE * 8, 32); + builder.createStore(loc, builder.createAdd(fp_offset, offset), + fp_offset_p); + } - if (neededSSE) { - mlir::Value offset = builder.getUnsignedInt(loc, neededSSE * 8, 32); - builder.createStore(loc, builder.createAdd(fp_offset, offset), fp_offset_p); - } + builder.create(loc, mlir::ValueRange{regAddr}, contBlock); + + // Emit code to load the value if it was passed in memory. + builder.setInsertionPointToStart(inMemBlock); + mlir::Value memAddr = + buildX86_64VAArgFromMemory(builder, datalayout, valist, ty, loc); + builder.create(loc, mlir::ValueRange{memAddr}, contBlock); + + // Yield the appropriate result. + builder.setInsertionPointToStart(contBlock); + mlir::Value res_addr = contBlock->addArgument(regAddr.getType(), loc); - builder.create(loc, mlir::ValueRange{regAddr}, contBlock); + mlir::Value result = + alignment + ? builder.createAlignedLoad( + loc, builder.createPtrBitcast(res_addr, ty), alignment) + : builder.createLoad(loc, builder.createPtrBitcast(res_addr, ty)); - // Emit code to load the value if it was passed in memory. - builder.setInsertionPointToStart(inMemBlock); - mlir::Value memAddr = - buildX86_64VAArgFromMemory(builder, datalayout, valist, ty, loc); - builder.create(loc, mlir::ValueRange{memAddr}, contBlock); + builder.create(loc, result); - // Return the appropriate result. - builder.setInsertionPointToStart(contBlock); - mlir::Value res_addr = contBlock->addArgument(regAddr.getType(), loc); + yieldTy = result.getType(); + }); - return alignment - ? builder.createAlignedLoad( - loc, builder.createPtrBitcast(res_addr, ty), alignment) - : builder.createLoad(loc, builder.createPtrBitcast(res_addr, ty)); + return scopeOp.getResult(0); } } // namespace diff --git a/clang/test/CIR/Lowering/var-arg-x86_64.c b/clang/test/CIR/Lowering/var-arg-x86_64.c index 1ae7e6369c25..bc95bd67d55d 100644 --- a/clang/test/CIR/Lowering/var-arg-x86_64.c +++ b/clang/test/CIR/Lowering/var-arg-x86_64.c @@ -47,6 +47,7 @@ double f1(int n, ...) { // CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]] // CIR: cir.va.start [[VASTED_VA_LIST]] // CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]] +// CIR: [[VAARG_RESULT:%.+]] = cir.scope // CIR: [[FP_OFFSET_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][1] {name = "fp_offset"} // CIR: [[FP_OFFSET:%.+]] = cir.load [[FP_OFFSET_P]] // CIR: [[OFFSET_CONSTANT:%.+]] = cir.const #cir.int<160> @@ -75,7 +76,9 @@ double f1(int n, ...) { // CIR: ^[[ContBlock]]([[ARG:.+]]: !cir.ptr // CIR: [[CASTED_ARG_P:%.+]] = cir.cast(bitcast, [[ARG]] // CIR: [[CASTED_ARG:%.+]] = cir.load align(16) [[CASTED_ARG_P]] -// CIR: cir.store{{.*}} [[CASTED_ARG]], [[RES]] +// CIR: cir.yield [[CASTED_ARG]] +// +// CIR: cir.store{{.*}} [[VAARG_RESULT]], [[RES]] long double f2(int n, ...) { va_list valist; va_start(valist, n); @@ -185,3 +188,23 @@ const char *f3(va_list args) { // ... // CIR: cir.return + +void f4(va_list args) { + for (; va_arg(args, int); ); +} +// CIR-LABEL: cir.func dso_local @f4 +// CIR: cir.for : cond { +// CIR: %[[VALIST:.*]] = cir.load align(8) %[[VALIST_VAR]] : !cir.ptr>, !cir.ptr +// CIR: %[[VAARG_RESULT:.*]] = cir.scope { +// ... // The contents are tested elsewhere. +// CIR: cir.yield {{.*}} : !s32i +// CIR: } : !s32i +// CIR: %[[CMP:.*]] = cir.cast(int_to_bool, %[[VAARG_RESULT]] : !s32i), !cir.bool +// CIR: cir.condition(%[[CMP]]) +// CIR: } body { +// CIR: cir.yield +// CIR: } step { +// CIR: cir.yield +// CIR: } +// CIR: cir.return +// CIR: } From d2ae8a07bed24c7c64dc526934fc85d0fd84e353 Mon Sep 17 00:00:00 2001 From: Tommy McMichen Date: Tue, 29 Jul 2025 11:48:38 -0700 Subject: [PATCH 2/2] [CIR][LoweringPrepare] Refactored to move build logic out of lambda --- .../Targets/LoweringPrepareX86CXXABI.cpp | 426 +++++++++--------- 1 file changed, 210 insertions(+), 216 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp index 9694c862b5f2..b6d118b9bbaa 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp @@ -129,242 +129,236 @@ mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( builder, datalayout, valist, ty, loc), ty)); - auto scopeOp = builder.create(loc, [&](mlir::OpBuilder - &opBuilder, - mlir::Type &yieldTy, - mlir::Location loc) { - cir::CIRBaseBuilderTy builder(opBuilder); - - mlir::Block *contBlock = builder.getInsertionBlock(); - - mlir::Block *currentBlock = builder.createBlock(contBlock); - builder.setInsertionPointToEnd(currentBlock); - - // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of - // general purpose registers needed to pass type and num_fp to hold - // the number of floating point registers needed. - - // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into - // registers. In the case: l->gp_offset > 48 - num_gp * 8 or - // l->fp_offset > 304 - num_fp * 16 go to step 7. - // - // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of - // register save space). - - mlir::Value inRegs; - mlir::Value gp_offset_p, fp_offset_p; - mlir::Value gp_offset, fp_offset; - - if (neededInt) { - gp_offset_p = builder.createGetMemberOp(loc, valist, "gp_offset", 0); - gp_offset = builder.createLoad(loc, gp_offset_p); - inRegs = builder.getUnsignedInt(loc, 48 - neededInt * 8, 32); - inRegs = - builder.createCompare(loc, cir::CmpOpKind::le, gp_offset, inRegs); - } + mlir::OpBuilder::InsertPoint scopeIP; + auto scopeOp = builder.create( + loc, + [&](mlir::OpBuilder &opBuilder, mlir::Type &yieldTy, mlir::Location loc) { + scopeIP = opBuilder.saveInsertionPoint(); + yieldTy = op.getType(); + }); + + mlir::Block *contBlock = scopeIP.getBlock(); + + mlir::Block *currentBlock = builder.createBlock(contBlock); + builder.setInsertionPointToEnd(currentBlock); + + // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of + // general purpose registers needed to pass type and num_fp to hold + // the number of floating point registers needed. + + // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into + // registers. In the case: l->gp_offset > 48 - num_gp * 8 or + // l->fp_offset > 304 - num_fp * 16 go to step 7. + // + // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of + // register save space). + + mlir::Value inRegs; + mlir::Value gp_offset_p, fp_offset_p; + mlir::Value gp_offset, fp_offset; + + if (neededInt) { + gp_offset_p = builder.createGetMemberOp(loc, valist, "gp_offset", 0); + gp_offset = builder.createLoad(loc, gp_offset_p); + inRegs = builder.getUnsignedInt(loc, 48 - neededInt * 8, 32); + inRegs = builder.createCompare(loc, cir::CmpOpKind::le, gp_offset, inRegs); + } - if (neededSSE) { - fp_offset_p = builder.createGetMemberOp(loc, valist, "fp_offset", 1); - fp_offset = builder.createLoad(loc, fp_offset_p); - mlir::Value fitsInFP = - builder.getUnsignedInt(loc, 176 - neededSSE * 16, 32); - fitsInFP = - builder.createCompare(loc, cir::CmpOpKind::le, fp_offset, fitsInFP); - inRegs = inRegs ? builder.createAnd(inRegs, fitsInFP) : fitsInFP; - } + if (neededSSE) { + fp_offset_p = builder.createGetMemberOp(loc, valist, "fp_offset", 1); + fp_offset = builder.createLoad(loc, fp_offset_p); + mlir::Value fitsInFP = + builder.getUnsignedInt(loc, 176 - neededSSE * 16, 32); + fitsInFP = + builder.createCompare(loc, cir::CmpOpKind::le, fp_offset, fitsInFP); + inRegs = inRegs ? builder.createAnd(inRegs, fitsInFP) : fitsInFP; + } - mlir::Block *inRegBlock = builder.createBlock(contBlock); - mlir::Block *inMemBlock = builder.createBlock(contBlock); - builder.setInsertionPointToEnd(currentBlock); - builder.create(loc, inRegs, inRegBlock, inMemBlock); - - // Emit code to load the value if it was passed in registers. - builder.setInsertionPointToStart(inRegBlock); - - // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with - // an offset of l->gp_offset and/or l->fp_offset. This may require - // copying to a temporary location in case the parameter is passed - // in different register classes or requires an alignment greater - // than 8 for general purpose registers and 16 for XMM registers. - // - // FIXME: This really results in shameful code when we end up needing to - // collect arguments from different places; often what should result in a - // simple assembling of a structure from scattered addresses has many more - // loads than necessary. Can we clean this up? - mlir::Value regSaveArea = builder.createLoad( - loc, builder.createGetMemberOp(loc, valist, "reg_save_area", 3)); - mlir::Value regAddr; - - uint64_t tyAlign = datalayout.getABITypeAlign(ty).value(); - // The alignment of result address. - uint64_t alignment = 0; - if (neededInt && neededSSE) { - // FIXME: Cleanup. - assert(ai.isDirect() && "Unexpected ABI info for mixed regs"); - auto recordTy = mlir::cast(ai.getCoerceToType()); + mlir::Block *inRegBlock = builder.createBlock(contBlock); + mlir::Block *inMemBlock = builder.createBlock(contBlock); + builder.setInsertionPointToEnd(currentBlock); + builder.create(loc, inRegs, inRegBlock, inMemBlock); + + // Emit code to load the value if it was passed in registers. + builder.setInsertionPointToStart(inRegBlock); + + // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with + // an offset of l->gp_offset and/or l->fp_offset. This may require + // copying to a temporary location in case the parameter is passed + // in different register classes or requires an alignment greater + // than 8 for general purpose registers and 16 for XMM registers. + // + // FIXME: This really results in shameful code when we end up needing to + // collect arguments from different places; often what should result in a + // simple assembling of a structure from scattered addresses has many more + // loads than necessary. Can we clean this up? + mlir::Value regSaveArea = builder.createLoad( + loc, builder.createGetMemberOp(loc, valist, "reg_save_area", 3)); + mlir::Value regAddr; + + uint64_t tyAlign = datalayout.getABITypeAlign(ty).value(); + // The alignment of result address. + uint64_t alignment = 0; + if (neededInt && neededSSE) { + // FIXME: Cleanup. + assert(ai.isDirect() && "Unexpected ABI info for mixed regs"); + auto recordTy = mlir::cast(ai.getCoerceToType()); + cir::PointerType addrTy = builder.getPointerTo(ty); + + mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", + CharUnits::fromQuantity(tyAlign)); + tmp = builder.createPtrBitcast(tmp, recordTy); + assert(recordTy.getNumElements() == 2 && + "Unexpected ABI info for mixed regs"); + mlir::Type tyLo = recordTy.getMembers()[0]; + mlir::Type tyHi = recordTy.getMembers()[1]; + assert((isFPOrVectorOfFPType(tyLo) ^ isFPOrVectorOfFPType(tyHi)) && + "Unexpected ABI info for mixed regs"); + mlir::Value gpAddr = builder.createPtrStride(loc, regSaveArea, gp_offset); + mlir::Value fpAddr = builder.createPtrStride(loc, regSaveArea, fp_offset); + mlir::Value regLoAddr = isFPOrVectorOfFPType(tyLo) ? fpAddr : gpAddr; + mlir::Value regHiAddr = isFPOrVectorOfFPType(tyHi) ? gpAddr : fpAddr; + + // Copy the first element. + // FIXME: Our choice of alignment here and below is probably pessimistic. + mlir::Value v = builder.createAlignedLoad( + loc, regLoAddr, datalayout.getABITypeAlign(tyLo).value()); + builder.createStore(loc, v, + builder.createGetMemberOp(loc, tmp, "gp_offset", 0)); + + // Copy the second element. + v = builder.createAlignedLoad(loc, regHiAddr, + datalayout.getABITypeAlign(tyHi).value()); + builder.createStore(loc, v, + builder.createGetMemberOp(loc, tmp, "fp_offset", 1)); + + tmp = builder.createPtrBitcast(tmp, ty); + regAddr = tmp; + } else if (neededInt || neededSSE == 1) { + uint64_t tySize = datalayout.getTypeStoreSize(ty).getFixedValue(); + + mlir::Type coTy; + if (ai.isDirect()) + coTy = ai.getCoerceToType(); + + mlir::Value gpOrFpOffset = neededInt ? gp_offset : fp_offset; + alignment = neededInt ? 8 : 16; + uint64_t regSize = neededInt ? neededInt * 8 : 16; + // There are two cases require special handling: + // 1) + // ``` + // struct { + // struct {} a[8]; + // int b; + // }; + // ``` + // The lower 8 bytes of the structure are not stored, + // so an 8-byte offset is needed when accessing the structure. + // 2) + // ``` + // struct { + // long long a; + // struct {} b; + // }; + // ``` + // The stored size of this structure is smaller than its actual size, + // which may lead to reading past the end of the register save area. + if (coTy && (ai.getDirectOffset() == 8 || regSize < tySize)) { cir::PointerType addrTy = builder.getPointerTo(ty); - mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", CharUnits::fromQuantity(tyAlign)); - tmp = builder.createPtrBitcast(tmp, recordTy); - assert(recordTy.getNumElements() == 2 && - "Unexpected ABI info for mixed regs"); - mlir::Type tyLo = recordTy.getMembers()[0]; - mlir::Type tyHi = recordTy.getMembers()[1]; - assert((isFPOrVectorOfFPType(tyLo) ^ isFPOrVectorOfFPType(tyHi)) && - "Unexpected ABI info for mixed regs"); - mlir::Value gpAddr = builder.createPtrStride(loc, regSaveArea, gp_offset); - mlir::Value fpAddr = builder.createPtrStride(loc, regSaveArea, fp_offset); - mlir::Value regLoAddr = isFPOrVectorOfFPType(tyLo) ? fpAddr : gpAddr; - mlir::Value regHiAddr = isFPOrVectorOfFPType(tyHi) ? gpAddr : fpAddr; - - // Copy the first element. - // FIXME: Our choice of alignment here and below is probably pessimistic. - mlir::Value v = builder.createAlignedLoad( - loc, regLoAddr, datalayout.getABITypeAlign(tyLo).value()); - builder.createStore(loc, v, - builder.createGetMemberOp(loc, tmp, "gp_offset", 0)); - - // Copy the second element. - v = builder.createAlignedLoad(loc, regHiAddr, - datalayout.getABITypeAlign(tyHi).value()); - builder.createStore(loc, v, - builder.createGetMemberOp(loc, tmp, "fp_offset", 1)); - - tmp = builder.createPtrBitcast(tmp, ty); + mlir::Value addr = + builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); + mlir::Value src = builder.createAlignedLoad( + loc, builder.createPtrBitcast(addr, coTy), tyAlign); + mlir::Value ptrOffset = + builder.getUnsignedInt(loc, ai.getDirectOffset(), 32); + mlir::Value dst = builder.createPtrStride(loc, tmp, ptrOffset); + builder.createStore(loc, src, dst); regAddr = tmp; - } else if (neededInt || neededSSE == 1) { - uint64_t tySize = datalayout.getTypeStoreSize(ty).getFixedValue(); - - mlir::Type coTy; - if (ai.isDirect()) - coTy = ai.getCoerceToType(); - - mlir::Value gpOrFpOffset = neededInt ? gp_offset : fp_offset; - alignment = neededInt ? 8 : 16; - uint64_t regSize = neededInt ? neededInt * 8 : 16; - // There are two cases require special handling: - // 1) - // ``` - // struct { - // struct {} a[8]; - // int b; - // }; - // ``` - // The lower 8 bytes of the structure are not stored, - // so an 8-byte offset is needed when accessing the structure. - // 2) - // ``` - // struct { - // long long a; - // struct {} b; - // }; - // ``` - // The stored size of this structure is smaller than its actual size, - // which may lead to reading past the end of the register save area. - if (coTy && (ai.getDirectOffset() == 8 || regSize < tySize)) { + } else { + regAddr = builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); + + // Copy into a temporary if the type is more aligned than the + // register save area. + if (neededInt && tyAlign > 8) { cir::PointerType addrTy = builder.getPointerTo(ty); mlir::Value tmp = builder.createAlloca( loc, addrTy, ty, "tmp", CharUnits::fromQuantity(tyAlign)); - mlir::Value addr = - builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); - mlir::Value src = builder.createAlignedLoad( - loc, builder.createPtrBitcast(addr, coTy), tyAlign); - mlir::Value ptrOffset = - builder.getUnsignedInt(loc, ai.getDirectOffset(), 32); - mlir::Value dst = builder.createPtrStride(loc, tmp, ptrOffset); - builder.createStore(loc, src, dst); + builder.createMemCpy(loc, tmp, regAddr, + builder.getUnsignedInt(loc, tySize, 32)); regAddr = tmp; - } else { - regAddr = builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); - - // Copy into a temporary if the type is more aligned than the - // register save area. - if (neededInt && tyAlign > 8) { - cir::PointerType addrTy = builder.getPointerTo(ty); - mlir::Value tmp = builder.createAlloca( - loc, addrTy, ty, "tmp", CharUnits::fromQuantity(tyAlign)); - builder.createMemCpy(loc, tmp, regAddr, - builder.getUnsignedInt(loc, tySize, 32)); - regAddr = tmp; - } } - - } else { - assert(neededSSE == 2 && "Invalid number of needed registers!"); - // SSE registers are spaced 16 bytes apart in the register save - // area, we need to collect the two eightbytes together. - // The ABI isn't explicit about this, but it seems reasonable - // to assume that the slots are 16-byte aligned, since the stack is - // naturally 16-byte aligned and the prologue is expected to store - // all the SSE registers to the RSA. - - mlir::Value regAddrLo = - builder.createPtrStride(loc, regSaveArea, fp_offset); - mlir::Value regAddrHi = builder.createPtrStride( - loc, regAddrLo, builder.getUnsignedInt(loc, 16, /*numBits=*/32)); - - mlir::MLIRContext *Context = abiInfo.getContext().getMLIRContext(); - cir::RecordType recordTy = - ai.canHaveCoerceToType() - ? cast(ai.getCoerceToType()) - : cir::RecordType::get( - Context, - {DoubleType::get(Context), DoubleType::get(Context)}, - /*packed=*/false, /*padded=*/false, - cir::RecordType::Struct); - cir::PointerType addrTy = builder.getPointerTo(ty); - mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", - CharUnits::fromQuantity(tyAlign)); - tmp = builder.createPtrBitcast(tmp, recordTy); - mlir::Value v = builder.createLoad( - loc, builder.createPtrBitcast(regAddrLo, recordTy.getMembers()[0])); - builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 0)); - v = builder.createLoad( - loc, builder.createPtrBitcast(regAddrHi, recordTy.getMembers()[1])); - builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 1)); - - tmp = builder.createPtrBitcast(tmp, ty); - regAddr = tmp; } - // AMD64-ABI 3.5.7p5: Step 5. Set: - // l->gp_offset = l->gp_offset + num_gp * 8 - // l->fp_offset = l->fp_offset + num_fp * 16. - if (neededInt) { - mlir::Value offset = builder.getUnsignedInt(loc, neededInt * 8, 32); - builder.createStore(loc, builder.createAdd(gp_offset, offset), - gp_offset_p); - } + } else { + assert(neededSSE == 2 && "Invalid number of needed registers!"); + // SSE registers are spaced 16 bytes apart in the register save + // area, we need to collect the two eightbytes together. + // The ABI isn't explicit about this, but it seems reasonable + // to assume that the slots are 16-byte aligned, since the stack is + // naturally 16-byte aligned and the prologue is expected to store + // all the SSE registers to the RSA. + + mlir::Value regAddrLo = + builder.createPtrStride(loc, regSaveArea, fp_offset); + mlir::Value regAddrHi = builder.createPtrStride( + loc, regAddrLo, builder.getUnsignedInt(loc, 16, /*numBits=*/32)); + + mlir::MLIRContext *Context = abiInfo.getContext().getMLIRContext(); + cir::RecordType recordTy = + ai.canHaveCoerceToType() + ? cast(ai.getCoerceToType()) + : cir::RecordType::get( + Context, {DoubleType::get(Context), DoubleType::get(Context)}, + /*packed=*/false, /*padded=*/false, cir::RecordType::Struct); + cir::PointerType addrTy = builder.getPointerTo(ty); + mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", + CharUnits::fromQuantity(tyAlign)); + tmp = builder.createPtrBitcast(tmp, recordTy); + mlir::Value v = builder.createLoad( + loc, builder.createPtrBitcast(regAddrLo, recordTy.getMembers()[0])); + builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 0)); + v = builder.createLoad( + loc, builder.createPtrBitcast(regAddrHi, recordTy.getMembers()[1])); + builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 1)); + + tmp = builder.createPtrBitcast(tmp, ty); + regAddr = tmp; + } - if (neededSSE) { - mlir::Value offset = builder.getUnsignedInt(loc, neededSSE * 8, 32); - builder.createStore(loc, builder.createAdd(fp_offset, offset), - fp_offset_p); - } + // AMD64-ABI 3.5.7p5: Step 5. Set: + // l->gp_offset = l->gp_offset + num_gp * 8 + // l->fp_offset = l->fp_offset + num_fp * 16. + if (neededInt) { + mlir::Value offset = builder.getUnsignedInt(loc, neededInt * 8, 32); + builder.createStore(loc, builder.createAdd(gp_offset, offset), gp_offset_p); + } - builder.create(loc, mlir::ValueRange{regAddr}, contBlock); + if (neededSSE) { + mlir::Value offset = builder.getUnsignedInt(loc, neededSSE * 8, 32); + builder.createStore(loc, builder.createAdd(fp_offset, offset), fp_offset_p); + } - // Emit code to load the value if it was passed in memory. - builder.setInsertionPointToStart(inMemBlock); - mlir::Value memAddr = - buildX86_64VAArgFromMemory(builder, datalayout, valist, ty, loc); - builder.create(loc, mlir::ValueRange{memAddr}, contBlock); + builder.create(loc, mlir::ValueRange{regAddr}, contBlock); - // Yield the appropriate result. - builder.setInsertionPointToStart(contBlock); - mlir::Value res_addr = contBlock->addArgument(regAddr.getType(), loc); + // Emit code to load the value if it was passed in memory. + builder.setInsertionPointToStart(inMemBlock); + mlir::Value memAddr = + buildX86_64VAArgFromMemory(builder, datalayout, valist, ty, loc); + builder.create(loc, mlir::ValueRange{memAddr}, contBlock); - mlir::Value result = - alignment - ? builder.createAlignedLoad( - loc, builder.createPtrBitcast(res_addr, ty), alignment) - : builder.createLoad(loc, builder.createPtrBitcast(res_addr, ty)); + // Yield the appropriate result. + builder.setInsertionPointToStart(contBlock); + mlir::Value res_addr = contBlock->addArgument(regAddr.getType(), loc); - builder.create(loc, result); + mlir::Value result = + alignment + ? builder.createAlignedLoad( + loc, builder.createPtrBitcast(res_addr, ty), alignment) + : builder.createLoad(loc, builder.createPtrBitcast(res_addr, ty)); - yieldTy = result.getType(); - }); + builder.create(loc, result); return scopeOp.getResult(0); }