1
- cmake_minimum_required (VERSION 3.11 )
1
+ cmake_minimum_required (VERSION 3.18 )
2
2
project (gemm_int8 LANGUAGES CXX )
3
3
4
+ # Set default build type to Release
5
+ if (NOT CMAKE_BUILD_TYPE )
6
+ set (CMAKE_BUILD_TYPE Release )
7
+ endif ()
8
+ message (STATUS "Build type: ${CMAKE_BUILD_TYPE} " )
9
+
10
+ # Set output directories for all build artifacts
11
+ set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} /lib )
12
+ set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} /lib )
13
+ set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} /bin )
4
14
5
- find_package (Git REQUIRED )
6
- if (GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR} /.git" )
7
- message (STATUS "Populating Git submodule." )
8
- execute_process (COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive
9
- WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
10
- RESULT_VARIABLE GIT_SUBMOD_RESULT )
11
- if (NOT GIT_SUBMOD_RESULT EQUAL "0" )
12
- message (FATAL_ERROR
13
- "git submodule updata --init --recursive failed with ${GIT_SUBMOD_RESULT} ." )
15
+ # Find Python executable
16
+ if (NOT DEFINED Python3_EXECUTABLE )
17
+ find_program (Python3_EXECUTABLE NAMES python3 python )
18
+ if (NOT Python3_EXECUTABLE )
19
+ message (FATAL_ERROR "Python3 executable not found. Please specify with -DPython3_EXECUTABLE=path/to/python" )
14
20
endif ()
15
21
endif ()
22
+ message (STATUS "Using Python executable: ${Python3_EXECUTABLE} " )
23
+
24
+ # Find Python package
25
+ find_package (Python3 COMPONENTS Development REQUIRED )
26
+ message (STATUS "Python3_INCLUDE_DIRS: ${Python3_INCLUDE_DIRS} " )
27
+
28
+ # Get Python include directories
29
+ execute_process (
30
+ COMMAND ${Python3_EXECUTABLE} -c "import sysconfig; print(sysconfig.get_path('include'))"
31
+ OUTPUT_VARIABLE PYTHON_INCLUDE_DIR
32
+ OUTPUT_STRIP_TRAILING_WHITESPACE
33
+ )
34
+ message (STATUS "Python include directory: ${PYTHON_INCLUDE_DIR} " )
35
+
36
+ # Find PyTorch
37
+ execute_process (
38
+ COMMAND ${Python3_EXECUTABLE} -c "import torch; print(torch.utils.cmake_prefix_path)"
39
+ RESULT_VARIABLE PYTORCH_RESULT
40
+ OUTPUT_VARIABLE TORCH_PREFIX_PATH
41
+ OUTPUT_STRIP_TRAILING_WHITESPACE
42
+ )
43
+ if (NOT PYTORCH_RESULT EQUAL 0 )
44
+ message (FATAL_ERROR "PyTorch not found. Please install PyTorch first." )
45
+ endif ()
46
+ list (APPEND CMAKE_PREFIX_PATH ${TORCH_PREFIX_PATH} )
47
+
48
+ # Enable CUDA
49
+ if (NOT DEFINED BUILD_CUDA )
50
+ set (BUILD_CUDA ON )
51
+ endif ()
16
52
53
+ if (BUILD_CUDA )
54
+ # NVCC compatibility check for newer MSVC compilers
55
+ if (MSVC AND MSVC_VERSION VERSION_GREATER_EQUAL 1940 )
56
+ string (APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler" )
57
+ endif ()
58
+
59
+ enable_language (CUDA )
60
+ find_package (CUDAToolkit REQUIRED )
61
+
62
+ # Convert the CUDA version from X.Y.z to XY
63
+ string (REGEX MATCH "^[0-9]+.[0-9]+" _CUDA_VERSION_FIRST_TWO "${CMAKE_CUDA_COMPILER_VERSION} " )
64
+ string (REPLACE "." "" CUDA_VERSION_SHORT "${_CUDA_VERSION_FIRST_TWO} " )
65
+
66
+ message (STATUS "CUDA Version: ${CUDA_VERSION_SHORT} (${CMAKE_CUDA_COMPILER_VERSION} )" )
67
+ message (STATUS "CUDA Compiler: ${CMAKE_CUDA_COMPILER} " )
68
+
69
+ # IMPORTANT: This is the key change - disable PyTorch's architecture detection
70
+ set (TORCH_CUDA_ARCH_LIST "" )
71
+
72
+ # Default architectures if not provided
73
+ if (NOT DEFINED COMPUTE_CAPABILITY )
74
+ set (COMPUTE_CAPABILITY "70;75;80;86;89;90;90a" CACHE STRING "CUDA Compute Capabilities" )
75
+ endif ()
76
+
77
+ message (STATUS "CUDA Capabilities Selected: ${COMPUTE_CAPABILITY} " )
78
+
79
+ # Configure architectures for compilation - explicitly set with our choices
80
+ set (CMAKE_CUDA_ARCHITECTURES ${COMPUTE_CAPABILITY} )
81
+
82
+ # Set explicit NVCC flags to override any auto-detection
83
+ set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --use_fast_math" )
84
+
85
+ # Add explicit architecture flags to NVCC
86
+ foreach (ARCH ${COMPUTE_CAPABILITY} )
87
+ string (APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_${ARCH} ,code=sm_${ARCH} " )
88
+ endforeach ()
89
+
90
+ # For the latest architecture, also add PTX
91
+ list (GET COMPUTE_CAPABILITY -1 LATEST_ARCH )
92
+ string (APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_${LATEST_ARCH} ,code=compute_${LATEST_ARCH} " )
93
+
94
+ message (STATUS "CUDA Flags: ${CMAKE_CUDA_FLAGS} " )
95
+
96
+ # Set C++ standard for CUDA
97
+ set (CMAKE_CUDA_STANDARD 17 )
98
+ set (CMAKE_CUDA_STANDARD_REQUIRED ON )
99
+
100
+ # Define that we're building with CUDA
101
+ add_compile_definitions (BUILD_CUDA )
102
+ endif ()
103
+
104
+ # Set C++ standard
105
+ set (CMAKE_CXX_STANDARD 17 )
106
+ set (CMAKE_CXX_STANDARD_REQUIRED ON )
107
+
108
+ # Include CUTLASS headers (without building the entire library)
109
+ include_directories (${CMAKE_SOURCE_DIR} /cutlass/include )
110
+ include_directories (${CMAKE_SOURCE_DIR} /cutlass/tools/util/include )
111
+
112
+ # Setup include directories
113
+ include_directories (${CMAKE_SOURCE_DIR} )
114
+ include_directories (${CMAKE_SOURCE_DIR} /csrc/kernels/include )
115
+ include_directories (${Python3_INCLUDE_DIRS} )
116
+ include_directories (${PYTHON_INCLUDE_DIR} )
117
+
118
+ # Find PyTorch - IMPORTANT: Do this after setting TORCH_CUDA_ARCH_LIST
119
+ find_package (Torch REQUIRED )
120
+ message (STATUS "Found PyTorch: ${TORCH_INCLUDE_DIRS} " )
121
+
122
+ # Create source files list
123
+ set (CPP_FILES csrc/kernels/bindings.cpp )
124
+ set (CUDA_FILES csrc/kernels/gemm.cu )
125
+
126
+ # Add source files based on backend
127
+ if (BUILD_CUDA )
128
+ set (SRC_FILES ${CPP_FILES} ${CUDA_FILES} )
129
+ set (OUTPUT_NAME "gemm_int8_CUDA" )
130
+ else ()
131
+ set (SRC_FILES ${CPP_FILES} )
132
+ set (OUTPUT_NAME "gemm_int8_CPU" )
133
+ endif ()
134
+
135
+ # Create the extension library
136
+ add_library (gemm_int8 SHARED ${SRC_FILES} )
137
+
138
+ # Link dependencies
139
+ if (BUILD_CUDA )
140
+ target_link_libraries (gemm_int8 PRIVATE
141
+ "${TORCH_LIBRARIES} "
142
+ Python3::Python
143
+ CUDA::cudart
144
+ CUDA::cublas
145
+ )
146
+ else ()
147
+ target_link_libraries (gemm_int8 PRIVATE
148
+ "${TORCH_LIBRARIES} "
149
+ Python3::Python
150
+ )
151
+ endif ()
152
+
153
+ target_include_directories (gemm_int8 PRIVATE
154
+ ${TORCH_INCLUDE_DIRS}
155
+ ${Python3_INCLUDE_DIRS}
156
+ ${PYTHON_INCLUDE_DIR}
157
+ )
158
+
159
+ # Set output properties
160
+ set_target_properties (gemm_int8 PROPERTIES
161
+ OUTPUT_NAME "${OUTPUT_NAME} "
162
+ PREFIX ""
163
+ )
164
+
165
+ # Configure output directories based on platform
166
+ if (WIN32 )
167
+ # Windows-specific settings
168
+ set (CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON )
169
+
170
+ if (MSVC )
171
+ set_target_properties (gemm_int8 PROPERTIES
172
+ RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR} /gemm_int8"
173
+ RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR} /gemm_int8"
174
+ )
175
+ endif ()
176
+ else ()
177
+ # Linux/macOS settings
178
+ set_target_properties (gemm_int8 PROPERTIES
179
+ LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR} /gemm_int8"
180
+ )
181
+ endif ()
17
182
18
- set (_saved_CMAKE_MESSAGE_LOG_LEVEL ${CMAKE_MESSAGE_LOG_LEVEL} )
19
- set (CMAKE_MESSAGE_LOG_LEVEL ERROR )
20
- add_subdirectory (cutlass )
21
- set (CMAKE_MESSAGE_LOG_LEVEL ${_saved_CMAKE_MESSAGE_LOG_LEVEL} )
183
+ # Make a custom command to copy the built library to the Python package
184
+ add_custom_command (
185
+ TARGET gemm_int8
186
+ POST_BUILD
187
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
188
+ $< TARGET_FILE:gemm_int8>
189
+ "${CMAKE_SOURCE_DIR} /gemm_int8/$<TARGET_FILE_NAME:gemm_int8>"
190
+ COMMENT "Copying library to Python package directory"
191
+ )
22
192
23
- include_directories ("${CMAKE_SOURCE_DIR} " )
24
- include_directories (cutlass/tools/util/include )
25
- include_directories (cutlass/include )
26
- include_directories (gemm_int8/kernels/include )
193
+ # Debug info
194
+ message (STATUS "Source files: ${SRC_FILES} " )
195
+ message (STATUS "Library will be copied to: ${CMAKE_SOURCE_DIR} /gemm_int8/$<TARGET_FILE_NAME:gemm_int8>" )
27
196
28
- get_property (dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES )
29
- foreach (dir ${dirs} )
30
- message (STATUS "dir='${dir} '" )
31
- endforeach ()
197
+ # Print architecture settings again at the end to confirm
198
+ if (BUILD_CUDA )
199
+ message (STATUS "Final CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES} " )
200
+ message (STATUS "Final CUDA flags: ${CMAKE_CUDA_FLAGS} " )
201
+ endif ()
0 commit comments