diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index da646e24333d..333bbf0e4c95 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -479,9 +479,10 @@ CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &D, // OpenCL variables in local address space and CUDA shared // variables cannot have an initializer. mlir::Attribute Init = nullptr; - if (D.hasAttr() || D.hasAttr()) + if (D.hasAttr()) llvm_unreachable("CUDA is NYI"); - else if (Ty.getAddressSpace() != LangAS::opencl_local) + else if (Ty.getAddressSpace() != LangAS::opencl_local && + !D.hasAttr()) Init = builder.getZeroInitAttr(convertType(Ty)); cir::GlobalOp GV = builder.createVersionedGlobal( @@ -499,9 +500,14 @@ CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &D, setGVProperties(GV, &D); - // Make sure the result is of the correct type. - if (AS != builder.getAddrSpaceAttr(Ty.getAddressSpace())) - llvm_unreachable("address space cast NYI"); + // OG checks if the expected address space, denoted by the type, is the + // same as the actual address space indicated by attributes. If they aren't + // the same, an addrspacecast is emitted when this variable is accessed. + // In CIR however, cir.get_global alreadys carries that information in + // !cir.ptr type - if this global is in OpenCL local address space, then its + // type would be !cir.ptr<..., addrspace(offload_local)>. Therefore we don't + // need an explicit address space cast in CIR: they will get emitted when + // lowering to LLVM IR. // Ensure that the static local gets initialized by making sure the parent // function gets emitted eventually. diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 10dbd85edc4b..ac2125cf717c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -1604,7 +1604,14 @@ LangAS CIRGenModule::getGlobalConstantAddressSpace() const { LangAS CIRGenModule::getLangTempAllocaAddressSpace() const { if (getLangOpts().OpenCL) return LangAS::opencl_private; - if (getLangOpts().SYCLIsDevice || getLangOpts().CUDAIsDevice || + + // For temporaries inside functions, CUDA treats them as normal variables. + // LangAS::cuda_device, on the other hand, is reserved for those variables + // explicitly marked with __device__. + if (getLangOpts().CUDAIsDevice) + return LangAS::Default; + + if (getLangOpts().SYCLIsDevice || (getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice)) llvm_unreachable("NYI"); return LangAS::Default; @@ -3983,8 +3990,11 @@ LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *D) { (!D || D->getType().getAddressSpace() == LangAS::Default)) llvm_unreachable("NYI"); - if (langOpts.CUDA && langOpts.CUDAIsDevice) + if (langOpts.CUDA && langOpts.CUDAIsDevice) { + if (D && D->hasAttr()) + return LangAS::cuda_shared; llvm_unreachable("NYI"); + } if (langOpts.OpenMP) llvm_unreachable("NYI"); diff --git a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp index 9dbf12ad138c..1edc09f2183e 100644 --- a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp @@ -645,6 +645,9 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) { case LangAS::opencl_global: return Kind::offload_global; case LangAS::opencl_local: + case LangAS::cuda_shared: + // Local means local among the work-group (OpenCL) or block (CUDA). + // All threads inside the kernel can access local memory. return Kind::offload_local; case LangAS::opencl_constant: return Kind::offload_constant; @@ -657,7 +660,6 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) { case LangAS::opencl_global_host: case LangAS::cuda_device: case LangAS::cuda_constant: - case LangAS::cuda_shared: case LangAS::sycl_global: case LangAS::sycl_global_device: case LangAS::sycl_global_host: diff --git a/clang/test/CIR/CodeGen/CUDA/address-spaces.cu b/clang/test/CIR/CodeGen/CUDA/address-spaces.cu new file mode 100644 index 000000000000..364ab58742c3 --- /dev/null +++ b/clang/test/CIR/CodeGen/CUDA/address-spaces.cu @@ -0,0 +1,19 @@ +#include "../Inputs/cuda.h" + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \ +// RUN: -fcuda-is-device -emit-cir -target-sdk-version=12.3 \ +// RUN: %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +__global__ void fn() { + int i = 0; + __shared__ int j; + j = i; +} + +// CIR: cir.global "private" internal dsolocal addrspace(offload_local) @_ZZ2fnvE1j : !s32i +// CIR: cir.func @_Z2fnv +// CIR: [[Local:%[0-9]+]] = cir.alloca !s32i, !cir.ptr, ["i", init] +// CIR: [[Shared:%[0-9]+]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr +// CIR: [[Tmp:%[0-9]+]] = cir.load [[Local]] : !cir.ptr, !s32i +// CIR: cir.store [[Tmp]], [[Shared]] : !s32i, !cir.ptr