Skip to content

Commit fb198fa

Browse files
Merge ci into main (squashed)
1 parent ea01ba2 commit fb198fa

File tree

14 files changed

+798
-155
lines changed

14 files changed

+798
-155
lines changed
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
name: Build and Release
2+
3+
on:
4+
push:
5+
branches: [main, ci]
6+
pull_request:
7+
branches: [main]
8+
workflow_dispatch: {} # Allow manual trigger
9+
10+
concurrency:
11+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
12+
cancel-in-progress: true
13+
14+
jobs:
15+
build-with-cuda:
16+
runs-on: ubuntu-latest
17+
container:
18+
image: pytorch/pytorch:2.6.0-cuda12.6-cudnn9-devel
19+
steps:
20+
- name: Install build dependencies
21+
run: |
22+
apt-get update && apt-get install -y --no-install-recommends \
23+
cmake ninja-build git
24+
25+
- name: Checkout code
26+
uses: actions/checkout@v4
27+
with:
28+
submodules: recursive
29+
30+
- name: Set CUDA_HOME
31+
run: |
32+
# Find nvcc location
33+
NVCC_PATH=$(which nvcc)
34+
# Extract CUDA installation directory (remove /bin/nvcc from path)
35+
export CUDA_HOME=$(dirname $(dirname $NVCC_PATH))
36+
echo "CUDA_HOME=${CUDA_HOME}" >> $GITHUB_ENV
37+
echo "Found CUDA installation at: ${CUDA_HOME}"
38+
echo "${CUDA_HOME}/bin" >> $GITHUB_PATH
39+
40+
- name: Verify CUDA installation
41+
run: |
42+
nvcc -V
43+
echo "CUDA_HOME: ${CUDA_HOME}"
44+
ls -la ${CUDA_HOME}/bin
45+
echo "PATH: $PATH"
46+
pwd
47+
ls . -alh
48+
ls cutlass -alh
49+
ls gemm_int8 -alh
50+
51+
- name: Build C++/CUDA (CMake)
52+
run: |
53+
chmod +x build.sh
54+
./build.sh
55+
env:
56+
CUDA_PATH: ${CUDA_HOME}
57+
58+
- name: Build wheel
59+
run: ./build.sh --wheel
60+
env:
61+
CUDA_PATH: ${CUDA_HOME}
62+
63+
- name: Upload build artifact
64+
uses: actions/upload-artifact@v4
65+
with:
66+
name: wheel
67+
path: dist/*.whl
68+
retention-days: 7
69+
70+
publish-release:
71+
needs: build-with-cuda
72+
runs-on: ubuntu-latest
73+
permissions:
74+
contents: write
75+
steps:
76+
- name: Checkout code to get version
77+
uses: actions/checkout@v4
78+
79+
- name: Extract version
80+
id: extract_version
81+
run: |
82+
VERSION=$(grep version pyproject.toml | head -n1 | awk -F'"' '{print $2}')
83+
echo "Package version: $VERSION"
84+
echo "version=$VERSION" >> $GITHUB_OUTPUT
85+
86+
- name: Download wheel artifacts
87+
uses: actions/download-artifact@v4
88+
with:
89+
name: wheel
90+
path: wheels/
91+
92+
- name: List wheels
93+
run: ls -la wheels/
94+
95+
- name: Create/Update Release
96+
uses: softprops/action-gh-release@v2.0.8
97+
with:
98+
files: wheels/*.whl
99+
prerelease: false
100+
name: "v${{ steps.extract_version.outputs.version }}"
101+
tag_name: "v${{ steps.extract_version.outputs.version }}"
102+
make_latest: true
103+
draft: false
104+
target_commitish: ${{ github.sha }}

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,6 @@ int8_ada.egg-info/
2020
__pycache__/
2121
CMakeCache*
2222
gemm_int8.egg-info/
23+
build/*
24+
dist/*
25+
*.cmake

CMakeLists.txt

Lines changed: 192 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,201 @@
1-
cmake_minimum_required(VERSION 3.11)
1+
cmake_minimum_required(VERSION 3.18)
22
project(gemm_int8 LANGUAGES CXX)
33

4+
# Set default build type to Release
5+
if(NOT CMAKE_BUILD_TYPE)
6+
set(CMAKE_BUILD_TYPE Release)
7+
endif()
8+
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
9+
10+
# Set output directories for all build artifacts
11+
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
12+
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
13+
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
414

5-
find_package(Git REQUIRED)
6-
if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
7-
message(STATUS "Populating Git submodule.")
8-
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive
9-
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
10-
RESULT_VARIABLE GIT_SUBMOD_RESULT)
11-
if(NOT GIT_SUBMOD_RESULT EQUAL "0")
12-
message(FATAL_ERROR
13-
"git submodule updata --init --recursive failed with ${GIT_SUBMOD_RESULT}.")
15+
# Find Python executable
16+
if(NOT DEFINED Python3_EXECUTABLE)
17+
find_program(Python3_EXECUTABLE NAMES python3 python)
18+
if(NOT Python3_EXECUTABLE)
19+
message(FATAL_ERROR "Python3 executable not found. Please specify with -DPython3_EXECUTABLE=path/to/python")
1420
endif()
1521
endif()
22+
message(STATUS "Using Python executable: ${Python3_EXECUTABLE}")
23+
24+
# Find Python package
25+
find_package(Python3 COMPONENTS Development REQUIRED)
26+
message(STATUS "Python3_INCLUDE_DIRS: ${Python3_INCLUDE_DIRS}")
27+
28+
# Get Python include directories
29+
execute_process(
30+
COMMAND ${Python3_EXECUTABLE} -c "import sysconfig; print(sysconfig.get_path('include'))"
31+
OUTPUT_VARIABLE PYTHON_INCLUDE_DIR
32+
OUTPUT_STRIP_TRAILING_WHITESPACE
33+
)
34+
message(STATUS "Python include directory: ${PYTHON_INCLUDE_DIR}")
35+
36+
# Find PyTorch
37+
execute_process(
38+
COMMAND ${Python3_EXECUTABLE} -c "import torch; print(torch.utils.cmake_prefix_path)"
39+
RESULT_VARIABLE PYTORCH_RESULT
40+
OUTPUT_VARIABLE TORCH_PREFIX_PATH
41+
OUTPUT_STRIP_TRAILING_WHITESPACE
42+
)
43+
if(NOT PYTORCH_RESULT EQUAL 0)
44+
message(FATAL_ERROR "PyTorch not found. Please install PyTorch first.")
45+
endif()
46+
list(APPEND CMAKE_PREFIX_PATH ${TORCH_PREFIX_PATH})
47+
48+
# Enable CUDA
49+
if(NOT DEFINED BUILD_CUDA)
50+
set(BUILD_CUDA ON)
51+
endif()
1652

53+
if(BUILD_CUDA)
54+
# NVCC compatibility check for newer MSVC compilers
55+
if(MSVC AND MSVC_VERSION VERSION_GREATER_EQUAL 1940)
56+
string(APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler")
57+
endif()
58+
59+
enable_language(CUDA)
60+
find_package(CUDAToolkit REQUIRED)
61+
62+
# Convert the CUDA version from X.Y.z to XY
63+
string(REGEX MATCH "^[0-9]+.[0-9]+" _CUDA_VERSION_FIRST_TWO "${CMAKE_CUDA_COMPILER_VERSION}")
64+
string(REPLACE "." "" CUDA_VERSION_SHORT "${_CUDA_VERSION_FIRST_TWO}")
65+
66+
message(STATUS "CUDA Version: ${CUDA_VERSION_SHORT} (${CMAKE_CUDA_COMPILER_VERSION})")
67+
message(STATUS "CUDA Compiler: ${CMAKE_CUDA_COMPILER}")
68+
69+
# IMPORTANT: This is the key change - disable PyTorch's architecture detection
70+
set(TORCH_CUDA_ARCH_LIST "")
71+
72+
# Default architectures if not provided
73+
if(NOT DEFINED COMPUTE_CAPABILITY)
74+
set(COMPUTE_CAPABILITY "70;75;80;86;89;90;90a" CACHE STRING "CUDA Compute Capabilities")
75+
endif()
76+
77+
message(STATUS "CUDA Capabilities Selected: ${COMPUTE_CAPABILITY}")
78+
79+
# Configure architectures for compilation - explicitly set with our choices
80+
set(CMAKE_CUDA_ARCHITECTURES ${COMPUTE_CAPABILITY})
81+
82+
# Set explicit NVCC flags to override any auto-detection
83+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --use_fast_math")
84+
85+
# Add explicit architecture flags to NVCC
86+
foreach(ARCH ${COMPUTE_CAPABILITY})
87+
string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_${ARCH},code=sm_${ARCH}")
88+
endforeach()
89+
90+
# For the latest architecture, also add PTX
91+
list(GET COMPUTE_CAPABILITY -1 LATEST_ARCH)
92+
string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_${LATEST_ARCH},code=compute_${LATEST_ARCH}")
93+
94+
message(STATUS "CUDA Flags: ${CMAKE_CUDA_FLAGS}")
95+
96+
# Set C++ standard for CUDA
97+
set(CMAKE_CUDA_STANDARD 17)
98+
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
99+
100+
# Define that we're building with CUDA
101+
add_compile_definitions(BUILD_CUDA)
102+
endif()
103+
104+
# Set C++ standard
105+
set(CMAKE_CXX_STANDARD 17)
106+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
107+
108+
# Include CUTLASS headers (without building the entire library)
109+
include_directories(${CMAKE_SOURCE_DIR}/cutlass/include)
110+
include_directories(${CMAKE_SOURCE_DIR}/cutlass/tools/util/include)
111+
112+
# Setup include directories
113+
include_directories(${CMAKE_SOURCE_DIR})
114+
include_directories(${CMAKE_SOURCE_DIR}/csrc/kernels/include)
115+
include_directories(${Python3_INCLUDE_DIRS})
116+
include_directories(${PYTHON_INCLUDE_DIR})
117+
118+
# Find PyTorch - IMPORTANT: Do this after setting TORCH_CUDA_ARCH_LIST
119+
find_package(Torch REQUIRED)
120+
message(STATUS "Found PyTorch: ${TORCH_INCLUDE_DIRS}")
121+
122+
# Create source files list
123+
set(CPP_FILES csrc/kernels/bindings.cpp)
124+
set(CUDA_FILES csrc/kernels/gemm.cu)
125+
126+
# Add source files based on backend
127+
if(BUILD_CUDA)
128+
set(SRC_FILES ${CPP_FILES} ${CUDA_FILES})
129+
set(OUTPUT_NAME "gemm_int8_CUDA")
130+
else()
131+
set(SRC_FILES ${CPP_FILES})
132+
set(OUTPUT_NAME "gemm_int8_CPU")
133+
endif()
134+
135+
# Create the extension library
136+
add_library(gemm_int8 SHARED ${SRC_FILES})
137+
138+
# Link dependencies
139+
if(BUILD_CUDA)
140+
target_link_libraries(gemm_int8 PRIVATE
141+
"${TORCH_LIBRARIES}"
142+
Python3::Python
143+
CUDA::cudart
144+
CUDA::cublas
145+
)
146+
else()
147+
target_link_libraries(gemm_int8 PRIVATE
148+
"${TORCH_LIBRARIES}"
149+
Python3::Python
150+
)
151+
endif()
152+
153+
target_include_directories(gemm_int8 PRIVATE
154+
${TORCH_INCLUDE_DIRS}
155+
${Python3_INCLUDE_DIRS}
156+
${PYTHON_INCLUDE_DIR}
157+
)
158+
159+
# Set output properties
160+
set_target_properties(gemm_int8 PROPERTIES
161+
OUTPUT_NAME "${OUTPUT_NAME}"
162+
PREFIX ""
163+
)
164+
165+
# Configure output directories based on platform
166+
if(WIN32)
167+
# Windows-specific settings
168+
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
169+
170+
if(MSVC)
171+
set_target_properties(gemm_int8 PROPERTIES
172+
RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_SOURCE_DIR}/gemm_int8"
173+
RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_SOURCE_DIR}/gemm_int8"
174+
)
175+
endif()
176+
else()
177+
# Linux/macOS settings
178+
set_target_properties(gemm_int8 PROPERTIES
179+
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/gemm_int8"
180+
)
181+
endif()
17182

18-
set(_saved_CMAKE_MESSAGE_LOG_LEVEL ${CMAKE_MESSAGE_LOG_LEVEL})
19-
set(CMAKE_MESSAGE_LOG_LEVEL ERROR)
20-
add_subdirectory(cutlass)
21-
set(CMAKE_MESSAGE_LOG_LEVEL ${_saved_CMAKE_MESSAGE_LOG_LEVEL})
183+
# Make a custom command to copy the built library to the Python package
184+
add_custom_command(
185+
TARGET gemm_int8
186+
POST_BUILD
187+
COMMAND ${CMAKE_COMMAND} -E copy_if_different
188+
$<TARGET_FILE:gemm_int8>
189+
"${CMAKE_SOURCE_DIR}/gemm_int8/$<TARGET_FILE_NAME:gemm_int8>"
190+
COMMENT "Copying library to Python package directory"
191+
)
22192

23-
include_directories("${CMAKE_SOURCE_DIR}")
24-
include_directories(cutlass/tools/util/include)
25-
include_directories(cutlass/include)
26-
include_directories(gemm_int8/kernels/include)
193+
# Debug info
194+
message(STATUS "Source files: ${SRC_FILES}")
195+
message(STATUS "Library will be copied to: ${CMAKE_SOURCE_DIR}/gemm_int8/$<TARGET_FILE_NAME:gemm_int8>")
27196

28-
get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
29-
foreach(dir ${dirs})
30-
message(STATUS "dir='${dir}'")
31-
endforeach()
197+
# Print architecture settings again at the end to confirm
198+
if(BUILD_CUDA)
199+
message(STATUS "Final CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
200+
message(STATUS "Final CUDA flags: ${CMAKE_CUDA_FLAGS}")
201+
endif()

0 commit comments

Comments
 (0)