Skip to content

Improve hardware feature detection in CMake files and do further refactoring & enhancement #824

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 35 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
b7b9267
Remove incorrect comment
mhucka Jun 29, 2025
4658564
Detect AVX & SSE and only build corresponding parts
mhucka Jun 29, 2025
e2c35ec
Consolidate common flags into the top-level CMakeLists.txt
mhucka Jun 29, 2025
1785a09
Silence warning about "using serial compilation"
mhucka Jun 29, 2025
d90c642
Use add_compile_options() instead of set()
mhucka Jun 29, 2025
7fbf19a
Fix cmake syntax
mhucka Jun 30, 2025
86c2ac8
Fix typo
mhucka Jun 30, 2025
f518df6
Add -march=native for the basic version
mhucka Jun 30, 2025
3941a3f
Use march=native with clang on MacOS
mhucka Jun 30, 2025
bb3a9f5
chore: merge branch
mhucka Jun 30, 2025
20c9dd8
Attempt to fix AVX compilation issues on MacOS
mhucka Jul 1, 2025
ba036d7
It seems -mbmi2 flag exists on Macos Intel after all
mhucka Jul 1, 2025
2490847
Simplify logic for setting flags
mhucka Jul 1, 2025
f00af25
Be more careful about SSE flags on Windows
mhucka Jul 1, 2025
9d1ca83
Fix determining AVX & SSE features of the host CPU
mhucka Jul 1, 2025
835db11
chore: merge branch
mhucka Jul 1, 2025
edbadb1
Fix missing NOT
mhucka Jul 2, 2025
12541ed
Merge branch 'master' into mh-consolidate-cmake-configs
mhucka Jul 3, 2025
c94360e
Move GetPybind11.cmake file to dev_tools/cmake
mhucka Jul 7, 2025
7f35ee2
Move and overhaul `GetPybind11.cmake`
mhucka Jul 7, 2025
640b572
Add new CMake macro for checking CPU vector instruction sets
mhucka Jul 7, 2025
b083ab7
Remove no-longer-needed pybind_interface/GetCUDAARCHS.cmake
mhucka Jul 7, 2025
28f52d9
Remove unnecessary CMake instructions
mhucka Jul 7, 2025
13e839c
Remove unnecessary CMake code & do some deduplication
mhucka Jul 7, 2025
eb93692
Rewrite the logic and remove unnecessary CMake code
mhucka Jul 7, 2025
a9e4991
Remove unnecessary CMake code and simply setting the arch
mhucka Jul 7, 2025
344cfb0
Overhaul the top-level CMakeLists.txt file (again)
mhucka Jul 7, 2025
9759a38
Don't bother setting number of threads for cibuildwheel
mhucka Jul 7, 2025
5bd427c
Try without CMP0179
mhucka Jul 7, 2025
4669af3
tmp
mhucka Jul 7, 2025
8d22fe8
tmp
mhucka Jul 7, 2025
c273d11
OpenMP is not required -- adjust CMake files accordingly
mhucka Jul 7, 2025
a08dbb3
Reduce & improve some of the excessive messages printed
mhucka Jul 7, 2025
f8f1ab1
Need to require Python3 Development.Module
mhucka Jul 7, 2025
a469ddd
Configure LTO in a portable way
mhucka Jul 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/ci_build_wheels.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ on:
branches:
- master
- main
- mh-consolidate-cmake-configs

pull_request:
types: [opened, synchronize]
Expand Down Expand Up @@ -56,4 +57,5 @@ jobs:
uses: ./.github/workflows/reusable_build_wheels.yaml
secrets: inherit
with:
debug: ${{inputs.debug == true}}
# debug: ${{inputs.debug == true}}
debug: true
17 changes: 3 additions & 14 deletions .github/workflows/reusable_build_wheels.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ jobs:
{os: macos-15, arch: arm64},
{os: windows-2025, arch: AMD64},
]
env:
# SHELLOPTS is used by Bash. Add xtrace when debugging is turned on.
SHELLOPTS: ${{inputs.debug && 'xtrace' || '' }}
steps:
- name: Check out a copy of the git repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
Expand Down Expand Up @@ -89,19 +92,6 @@ jobs:
os=${{matrix.conf.os}}
echo MACOSX_DEPLOYMENT_TARGET=${os: -2} >> "$GITHUB_ENV"

- if: startsWith(matrix.conf.os, 'ubuntu')
name: Determine the number of threads to use (Linux)
run: echo "num_threads=$(( $(nproc) - 1 ))" >> "$GITHUB_ENV"

- if: startsWith(matrix.conf.os, 'macos')
name: Determine the number of threads to use (MacOS)
run: echo "num_threads=$(( $(sysctl -n hw.ncpu) - 1 ))" >> "$GITHUB_ENV"

- if: startsWith(matrix.conf.os, 'win')
name: Determine the number of threads to use (Windows)
shell: bash
run: echo "num_threads=$(( NUMBER_OF_PROCESSORS - 1 ))" >> "$GITHUB_ENV"

- name: Build and test wheels
env:
# Note: additional cibuildwheel settings are in pyproject.toml.
Expand All @@ -110,7 +100,6 @@ jobs:
CIBW_BUILD_VERBOSITY: ${{inputs.debug && 1 || ''}}
# Color codes make the raw logs hard to read. (CMake uses CLICOLOR.)
CLICOLOR: ${{inputs.debug && 0 || ''}}
CMAKE_BUILD_PARALLEL_LEVEL: ${{env.num_threads}}
run: |
cibuildwheel --output-dir wheelhouse

Expand Down
118 changes: 86 additions & 32 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,44 +15,84 @@
cmake_minimum_required(VERSION 3.31)
project(qsim LANGUAGES CXX)

include(CheckLanguage)
check_language(CUDA)
# ~~~~~ Set project-wide policies ~~~~~

# The following settings mirror what is in our hand-written Makefiles.
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

# This text is prepended to messages printed by this config file so it's
# easier to figure out what came from where in the logs.
set(MSG_PREFIX "[qsim cmake configuration]")
# Check we have Python libraries & header files necessary to build modules.
find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module)

# ~~~~~ Analyze the host's hardware & software features ~~~~~

# CMake normally sets CMAKE_APPLE_SILICON_PROCESSOR on Apple Silicon; however,
# it doesn't happen when running builds using cibuildwheel, even on Apple
# Silicon. We have had better luck checking and seting it ourselves.
# Silicon. It's more reliable to check and set it ourselves.
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin"
AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
set(CMAKE_APPLE_SILICON_PROCESSOR TRUE)
message(STATUS "${MSG_PREFIX} detected Apple Silicon")
message(VERBOSE "Detected Apple Silicon")
else()
set(CMAKE_APPLE_SILICON_PROCESSOR FALSE)
message(STATUS "${MSG_PREFIX} did not detect Apple Silicon")
endif()

find_package(OpenMP COMPONENTS CXX NO_POLICY_SCOPE)
if(NOT OpenMP_CXX_FOUND)
message(STATUS "(Without OpenMP, qsim cannot support thread parallelization)")
endif()

include(CheckLanguage)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
message(STATUS "${MSG_PREFIX} found CUDA compiler "
"${CMAKE_CUDA_COMPILER} ${CMAKE_CUDA_COMPILER_VERSION}")
else()
message(STATUS "${MSG_PREFIX} did not find CUDA compiler")
# Did not find the CUDA framewwork, so check for the HIP as an alternative.
execute_process(COMMAND which hipcc
OUTPUT_VARIABLE has_hipcc
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(has_hipcc)
message(STATUS "${MSG_PREFIX} found hipcc")
project(qsim LANGUAGES CXX HIP)
# If CUDA is not available, check for HIP as an alternative.
message(STATUS "CUDA not available; looking for a HIP compiler")
find_program(_HIP_COMPILER hipcc)
if(_HIP_COMPILER)
enable_language(HIP)
message(STATUS "CUDA not available; looking for a HIP compiler - found")
else()
message(STATUS "${MSG_PREFIX} did not find hipcc")
message(STATUS "CUDA not available; looking for a HIP compiler - not found")
message(STATUS "(Without CUDA or HIP, qsim cannot use GPUs for acceleration)")
endif()
endif()

find_package(OpenMP REQUIRED)
include(dev_tools/cmake/CheckCPU.cmake)
# Note: CMake uses "WIN32" for Windows targets, including Win64.
if(WIN32)
check_cpu_support("avx2" CPU_SUPPORTS_AVX2)
check_cpu_support("avx512f" CPU_SUPPORTS_AVX512)
check_cpu_support("sse4.1" CPU_SUPPORTS_SSE4)
elseif(LINUX)
check_cpu_support("avx2" CPU_SUPPORTS_AVX2)
check_cpu_support("avx512f" CPU_SUPPORTS_AVX512)
check_cpu_support("sse4" CPU_SUPPORTS_SSE4)
elseif(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR)
check_cpu_support("avx2_0" CPU_SUPPORTS_AVX2)
check_cpu_support("avx512f" CPU_SUPPORTS_AVX512)
check_cpu_support("sse4_1" CPU_SUPPORTS_SSE4)
endif()

# Configure LTO for compilers that support it.
include(CheckIPOSupported)
check_ipo_supported(RESULT HAVE_LTO)

# ~~~~~ Configure the build ~~~~~

if(WIN32)
add_compile_options(/O2)
else()
add_compile_options(-O3 -D_GLIBCXX_USE_CXX11_ABI=1)
endif()

if(HAVE_LTO)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()

include(dev_tools/cmake/GetPybind11.cmake)

# Always build the basic part.
add_subdirectory(pybind_interface/basic)
Expand All @@ -65,20 +105,34 @@ if(NOT CMAKE_APPLE_SILICON_PROCESSOR)
if(DEFINED ENV{CUQUANTUM_ROOT})
add_subdirectory(pybind_interface/custatevec)
endif()
elseif(has_hipcc)
elseif(HAVE_HIPCC)
add_subdirectory(pybind_interface/hip)
endif()

add_subdirectory(pybind_interface/sse)
add_subdirectory(pybind_interface/avx512)
add_subdirectory(pybind_interface/avx2)
endif()
if(CPU_SUPPORTS_AVX2)
add_subdirectory(pybind_interface/avx2)
endif()

# Additional miscellanous settings.
# The following settings mirror what is in our hand-written Makefiles.
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
if(CPU_SUPPORTS_AVX512)
add_subdirectory(pybind_interface/avx512)
endif()

# Print additional useful info.
message(STATUS "${MSG_PREFIX} OpenMP found = ${OPENMP_FOUND}")
message(STATUS "${MSG_PREFIX} shell $PATH = $ENV{PATH}")
if(CPU_SUPPORTS_SSE4)
add_subdirectory(pybind_interface/sse)
endif()
endif()

if(APPLE)
include_directories(
"/usr/local/include"
"/usr/local/opt/llvm/include"
"/opt/homebrew/include"
"/opt/homebrew/opt/llvm@19/include"
)
link_directories(
"/usr/local/lib"
"/usr/local/opt/llvm/lib"
"/opt/homebrew/lib"
"/opt/homebrew/opt/llvm@19/lib"
)
endif()
104 changes: 104 additions & 0 deletions dev_tools/cmake/CheckCPU.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

include(CheckCXXCompilerFlag)
include(CheckCXXSourceRuns)

macro(check_cpu_support _FEATURE_STRING _FEATURE_FLAG)
set(${_FEATURE_FLAG} FALSE)

message(STATUS "Testing platform support for ${_FEATURE_STRING}")
if(WIN32)
# On Windows, there's no built-in method to learn the CPU flags. Third-
# party tools exist, but downloading & running them is a security risk.
# We resort instead to compiling and running our own small program.
set(_CHECKER_FILE_PATH "${CMAKE_BINARY_DIR}/checker.cpp")
file(WRITE ${_CHECKER_FILE_PATH} "${_WIN32_CHECKER_SRC}")
try_run(
_CHECKER_RETURN_VALUE
_CHECKER_COMPILED
"${CMAKE_BINARY_DIR}"
"${_CHECKER_FILE_PATH}"
RUN_OUTPUT_VARIABLE _CPU_FEATURES
)
if(_CHECKER_COMPILED AND _CHECKER_RETURN_VALUE EQUAL 0)
string(FIND "${_CPU_FEATURES}" ${_FEATURE_STRING} _FOUND)
if(NOT _FOUND EQUAL -1)
set(${_FEATURE_FLAG} TRUE)
endif()
else()
message(STATUS "Unable to autodetect vector instruction sets")
if(NOT _CHECKER_COMPILED)
message(STATUS " (failed to compile CPU checker utility)")
else()
message(STATUS " (got an error trying to run our CPU checker)")
endif()
endif()

elseif(LINUX)
execute_process(
COMMAND bash --noprofile -c "grep -q ${_FEATURE_STRING} /proc/cpuinfo"
RESULT_VARIABLE _EXIT_CODE
)
if(_EXIT_CODE EQUAL 0)
set(${_FEATURE_FLAG} TRUE)
endif()

elseif(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR)
execute_process(
COMMAND bash --noprofile -c "sysctl -n hw.optional.${_FEATURE_STRING}"
RESULT_VARIABLE _EXIT_CODE
OUTPUT_VARIABLE _FLAG_VALUE
)
if(_EXIT_CODE EQUAL 0 AND _FLAG_VALUE EQUAL "1")
set(${_FEATURE_FLAG} TRUE)
endif()
endif()

if(${_FEATURE_FLAG})
message(STATUS "Testing platform support for ${_FEATURE_STRING} - found")
else()
message(STATUS "Testing platform support for ${_FEATURE_STRING} - not found")
endif()
endmacro()

# Small Windows C++ program to test bits in certain Intel CPU registers.
# Info about the registers in Intel CPUs: https://en.wikipedia.org/wiki/CPUID
#
# EAX ECX Bit Name
# 1 0 19 sse4.1
# 1 0 20 sse4.2
# 1 0 28 avx
# 7 0 5 avx2
# 7 0 16 avx512f
#
# Note: CMake caches the output of try_run() by default; therefore, this program
# will not be executed each time try_run() is called.

set(_WIN32_CHECKER_SRC "
#include <iostream>
#include <string>
#include <intrin.h>

int main() {
int cpuInfo[4];
__cpuidex(cpuInfo, 1, 0);
std::cout << ((cpuInfo[2] & (1 << 19)) ? \"sse4.1\\n\" : \"\");
std::cout << ((cpuInfo[2] & (1 << 20)) ? \"sse4.2\\n\" : \"\");
__cpuidex(cpuInfo, 7, 0);
std::cout << ((cpuInfo[1] & (1 << 5)) ? \"avx2\\n\" : \"\")
<< ((cpuInfo[1] & (1 << 16)) ? \"avx512f\\n\" : \"\");
return 0;
}
")
40 changes: 40 additions & 0 deletions dev_tools/cmake/GetPybind11.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if(NOT pybind11_FOUND)
set(MIN_PYBIND_VERSION "2.13.6")

find_package(
pybind11
CONFIG
HINTS "${Python3_SITELIB}"
NO_POLICY_SCOPE)

# qsim's requirements.txt and setup.py both include a requirement for
# "pybind11[global]", so the Pybind11 CMake plugin should be found no matter
# whether the user is doing a "pip install qsim" or a local build. Still, we
# want to be sure, and also want to make sure to get the min version we need.
if(NOT pybind11_FOUND OR ${pybind11_VERSION} VERSION_LESS ${MIN_PYBIND_VERSION})
include(FetchContent)
FetchContent_Declare(
pybind11
GIT_REPOSITORY https://github.com/pybind/pybind11
GIT_TAG "v${MIN_PYBIND_VERSION}"
OVERRIDE_FIND_PACKAGE
)
FetchContent_MakeAvailable(pybind11)
endif()

include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_INCLUDE_DIR})
endif()
18 changes: 0 additions & 18 deletions pybind_interface/GetPybind11.cmake

This file was deleted.

Loading
Loading