Skip to content

Commit dda69c8

Browse files
authored
[CIR][CUDA] Add attribute for CUDA fat binary name (#1377)
This is a preparation of generating registration functions in LoweringPrepare. CUDA compilation works as follows (irrelevant arguments omitted): ```sh # First compile for device, generating PTX assembly clang++ test.cu -fcuda-is-device -o device.s # Convert that into a binary file ptxas device.s --output-file device.o fatbin --create device.fatbin --image=profile=sm_52,file=device.o # Pass that file as an argument to host clang++ test.cu -fcuda-include-gpubinary device.fatbin -cuid="some unique id" ``` And from the name of GPU binary, we can obtain a handle for registration. So we add an attribute to ModuleOp, recording that name. If that `-fcuda-include-gpubinary` is not specified (like in the test `simple.cu`), OG will not generate any registration function. We do the same here by not generating the attribute.
1 parent cc67bf7 commit dda69c8

File tree

7 files changed

+41
-5
lines changed

7 files changed

+41
-5
lines changed

clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
//===----------------------------------------------------------------------===//
1919

2020
def CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName",
21-
"cuda_kernel_name"> {
21+
"cu.kernel_name"> {
2222
let summary = "Device-side function name for this stub.";
2323
let description =
2424
[{
@@ -35,4 +35,21 @@ def CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName",
3535
let assemblyFormat = "`<` $kernel_name `>`";
3636
}
3737

38+
def CUDABinaryHandleAttr : CIR_Attr<"CUDABinaryHandle",
39+
"cu.binary_handle"> {
40+
let summary = "Fat binary handle for device code.";
41+
let description =
42+
[{
43+
This attribute is attached to the ModuleOp and records the binary file
44+
name passed to host.
45+
46+
CUDA first compiles device-side code into a fat binary file. The file
47+
name is then passed into host-side code, which is used to create a handle
48+
and then generate various registration functions.
49+
}];
50+
51+
let parameters = (ins "std::string":$name);
52+
let assemblyFormat = "`<` $name `>`";
53+
}
54+
3855
#endif // MLIR_CIR_DIALECT_CIR_CUDA_ATTRS

clang/include/clang/CIR/Dialect/IR/CIRDialect.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def CIR_Dialect : Dialect {
4545
static llvm::StringRef getGlobalAnnotationsAttrName() { return "cir.global_annotations"; }
4646

4747
static llvm::StringRef getOpenCLVersionAttrName() { return "cir.cl.version"; }
48+
static llvm::StringRef getCUDABinaryHandleAttrName() { return "cir.cu.binary_handle"; }
4849

4950
void registerAttributes();
5051
void registerTypes();

clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "clang/CIR/Dialect/IR/CIRTypes.h"
2020
#include "llvm/Support/Casting.h"
2121
#include "llvm/Support/raw_ostream.h"
22-
#include <iostream>
2322

2423
using namespace clang;
2524
using namespace clang::CIRGen;
@@ -91,7 +90,6 @@ void CIRGenCUDARuntime::emitDeviceStubBodyNew(CIRGenFunction &cgf,
9190
llvm_unreachable("NYI");
9291

9392
std::string launchAPI = addPrefixToName("LaunchKernel");
94-
std::cout << "LaunchAPI is " << launchAPI << "\n";
9593
const IdentifierInfo &launchII = cgm.getASTContext().Idents.get(launchAPI);
9694
FunctionDecl *launchFD = nullptr;
9795
for (auto *result : dc->lookup(&launchII)) {

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,17 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext,
215215
/*line=*/0,
216216
/*col=*/0));
217217
}
218+
219+
// Set CUDA GPU binary handle.
220+
if (langOpts.CUDA) {
221+
std::string cudaBinaryName = codeGenOpts.CudaGpuBinaryFileName;
222+
if (!cudaBinaryName.empty()) {
223+
theModule->setAttr(
224+
cir::CIRDialect::getCUDABinaryHandleAttrName(),
225+
cir::CUDABinaryHandleAttr::get(&mlirContext, cudaBinaryName));
226+
}
227+
}
228+
218229
if (langOpts.Sanitize.has(SanitizerKind::Thread) ||
219230
(!codeGenOpts.RelaxedAliasing && codeGenOpts.OptimizationLevel > 0)) {
220231
tbaa.reset(new CIRGenTBAA(&mlirContext, astContext, genTypes, theModule,
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#include "../Inputs/cuda.h"
2+
3+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
4+
// RUN: -x cuda -emit-cir -target-sdk-version=12.3 \
5+
// RUN: -fcuda-include-gpubinary fatbin.o\
6+
// RUN: %s -o %t.cir
7+
// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
8+
9+
// CIR-HOST: module @"{{.*}}" attributes{{.*}}cir.cu.binary_handle = #cir.cu.binary_handle<fatbin.o>{{.*}}

clang/test/CIR/CodeGen/CUDA/simple.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
1212

1313
// Attribute for global_fn
14-
// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cuda_kernel_name<_Z9global_fni>{{.*}}
14+
// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}}
1515

1616
__host__ void host_fn(int *a, int *b, int *c) {}
1717
// CIR-HOST: cir.func @_Z7host_fnPiS_S_

clang/test/CIR/CodeGen/HIP/simple.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
1212

1313
// Attribute for global_fn
14-
// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cuda_kernel_name<_Z9global_fni>{{.*}}
14+
// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}}
1515

1616

1717
__host__ void host_fn(int *a, int *b, int *c) {}

0 commit comments

Comments
 (0)