Skip to content

[CIR][CUDA] Handle shared and local variables #1368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions clang/lib/CIR/CodeGen/CIRGenDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,9 +479,10 @@ CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &D,
// OpenCL variables in local address space and CUDA shared
// variables cannot have an initializer.
mlir::Attribute Init = nullptr;
if (D.hasAttr<CUDASharedAttr>() || D.hasAttr<LoaderUninitializedAttr>())
if (D.hasAttr<LoaderUninitializedAttr>())
llvm_unreachable("CUDA is NYI");
else if (Ty.getAddressSpace() != LangAS::opencl_local)
else if (Ty.getAddressSpace() != LangAS::opencl_local &&
!D.hasAttr<CUDASharedAttr>())
Init = builder.getZeroInitAttr(convertType(Ty));

cir::GlobalOp GV = builder.createVersionedGlobal(
Expand All @@ -499,9 +500,14 @@ CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &D,

setGVProperties(GV, &D);

// Make sure the result is of the correct type.
if (AS != builder.getAddrSpaceAttr(Ty.getAddressSpace()))
llvm_unreachable("address space cast NYI");
// OG checks if the expected address space, denoted by the type, is the
// same as the actual address space indicated by attributes. If they aren't
// the same, an addrspacecast is emitted when this variable is accessed.
// In CIR however, cir.get_global alreadys carries that information in
// !cir.ptr type - if this global is in OpenCL local address space, then its
// type would be !cir.ptr<..., addrspace(offload_local)>. Therefore we don't
// need an explicit address space cast in CIR: they will get emitted when
// lowering to LLVM IR.

// Ensure that the static local gets initialized by making sure the parent
// function gets emitted eventually.
Expand Down
14 changes: 12 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1604,7 +1604,14 @@ LangAS CIRGenModule::getGlobalConstantAddressSpace() const {
LangAS CIRGenModule::getLangTempAllocaAddressSpace() const {
if (getLangOpts().OpenCL)
return LangAS::opencl_private;
if (getLangOpts().SYCLIsDevice || getLangOpts().CUDAIsDevice ||

// For temporaries inside functions, CUDA treats them as normal variables.
// LangAS::cuda_device, on the other hand, is reserved for those variables
// explicitly marked with __device__.
if (getLangOpts().CUDAIsDevice)
return LangAS::Default;

if (getLangOpts().SYCLIsDevice ||
(getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice))
llvm_unreachable("NYI");
return LangAS::Default;
Expand Down Expand Up @@ -3983,8 +3990,11 @@ LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *D) {
(!D || D->getType().getAddressSpace() == LangAS::Default))
llvm_unreachable("NYI");

if (langOpts.CUDA && langOpts.CUDAIsDevice)
if (langOpts.CUDA && langOpts.CUDAIsDevice) {
if (D && D->hasAttr<CUDASharedAttr>())
return LangAS::cuda_shared;
llvm_unreachable("NYI");
}

if (langOpts.OpenMP)
llvm_unreachable("NYI");
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,9 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) {
case LangAS::opencl_global:
return Kind::offload_global;
case LangAS::opencl_local:
case LangAS::cuda_shared:
// Local means local among the work-group (OpenCL) or block (CUDA).
// All threads inside the kernel can access local memory.
return Kind::offload_local;
case LangAS::opencl_constant:
return Kind::offload_constant;
Expand All @@ -657,7 +660,6 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) {
case LangAS::opencl_global_host:
case LangAS::cuda_device:
case LangAS::cuda_constant:
case LangAS::cuda_shared:
case LangAS::sycl_global:
case LangAS::sycl_global_device:
case LangAS::sycl_global_host:
Expand Down
19 changes: 19 additions & 0 deletions clang/test/CIR/CodeGen/CUDA/address-spaces.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include "../Inputs/cuda.h"

// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
// RUN: -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
// RUN: %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s

__global__ void fn() {
int i = 0;
__shared__ int j;
j = i;
}

// CIR: cir.global "private" internal dsolocal addrspace(offload_local) @_ZZ2fnvE1j : !s32i
// CIR: cir.func @_Z2fnv
// CIR: [[Local:%[0-9]+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init]
// CIR: [[Shared:%[0-9]+]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr<!s32i, addrspace(offload_local)>
// CIR: [[Tmp:%[0-9]+]] = cir.load [[Local]] : !cir.ptr<!s32i>, !s32i
// CIR: cir.store [[Tmp]], [[Shared]] : !s32i, !cir.ptr<!s32i, addrspace(offload_local)>