Updates and lint

LyricZhao · LyricZhao · commit bffc64b296a1 · 2025-09-02T14:38:36.000+08:00
diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml
@@ -170,7 +170,7 @@ jobs:
           export MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2)
           export NVCC_THREADS=2
           export TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX"
-          export DEEP_GEMM_NO_LOCAL_VERSION=${{ inputs.use-local-version && 'FALSE' || 'TRUE' }}
+          export DG_NO_LOCAL_VERSION=${{ inputs.use-local-version && '0' || '1' }}
 
           # 5h timeout since GH allows max 6h and we want some buffer
           EXIT_CODE=0
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -41,7 +41,7 @@ jobs:
         # Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the
         # manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
         os: [ubuntu-22.04]
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
         torch-version: ["2.4.0", "2.5.1", "2.6.0", "2.7.1", "2.8.0"]
         cuda-version: ["12.9.1"]
         # We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
@@ -83,8 +83,8 @@ jobs:
           pip install torch --index-url https://download.pytorch.org/whl/cpu
       - name: Build core package
         env:
-          DEEP_GEMM_NO_LOCAL_VERSION: "TRUE"
-          DEEP_GEMM_SKIP_CUDA_BUILD: "TRUE"
+          DG_NO_LOCAL_VERSION: "1"
+          DG_SKIP_CUDA_BUILD: "1"
         run: |
           python setup.py sdist --dist-dir=dist
       - name: Deploy
diff --git a/deep_gemm/__init__.py b/deep_gemm/__init__.py
@@ -73,4 +73,4 @@ def _find_cuda_home() -> str:
     _find_cuda_home()                           # CUDA home
 )
 
-__version__ = "2.0.0"
+__version__ = '2.0.0'
diff --git a/setup.py b/setup.py
@@ -1,32 +1,31 @@
 import ast
 import os
 import re
-import setuptools
 import shutil
+import setuptools
 import subprocess
 import sys
-import urllib
 import torch
 import platform
+import urllib
+import urllib.error
+import urllib.request
 from setuptools import find_packages
 from setuptools.command.build_py import build_py
-from torch.utils.cpp_extension import CUDAExtension, CUDA_HOME
-from pathlib import Path
 from packaging.version import parse
+from pathlib import Path
+from torch.utils.cpp_extension import CUDAExtension, CUDA_HOME
 from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
 
-SKIP_CUDA_BUILD = os.getenv("DEEP_GEMM_SKIP_CUDA_BUILD", "FALSE") == "TRUE"
-NO_LOCAL_VERSION = os.getenv("DEEP_GEMM_NO_LOCAL_VERSION", "FALSE") == "TRUE"
-FORCE_BUILD = os.getenv("DEEP_GEMM_FORCE_BUILD", "FALSE") == "TRUE"
 
-BASE_WHEEL_URL = (
-    "https://github.com/DeepSeek-AI/DeepGEMM/releases/download/{tag_name}/{wheel_name}"
-)
-PACKAGE_NAME = "deep_gemm"
-
-current_dir = os.path.dirname(os.path.realpath(__file__))
+# Compiler flags
 cxx_flags = ['-std=c++17', '-O3', '-fPIC', '-Wno-psabi', '-Wno-deprecated-declarations',
              f'-D_GLIBCXX_USE_CXX11_ABI={int(torch.compiled_with_cxx11_abi())}']
+if int(os.environ.get('DG_JIT_USE_RUNTIME_API', '0')):
+    cxx_flags.append('-DDG_JIT_USE_RUNTIME_API')
+
+# Sources
+current_dir = os.path.dirname(os.path.realpath(__file__))
 sources = ['csrc/python_api.cpp']
 build_include_dirs = [
     f'{CUDA_HOME}/include',
@@ -45,67 +44,60 @@
     'third-party/cutlass/include/cutlass',
 ]
 
-# Use runtime API
-if int(os.environ.get('DG_JIT_USE_RUNTIME_API', '0')):
-    cxx_flags.append('-DDG_JIT_USE_RUNTIME_API')
+# Release
+base_wheel_url = 'https://github.com/DeepSeek-AI/DeepGEMM/releases/download/{tag_name}/{wheel_name}'
+
 
 def get_package_version():
-    with open(Path(current_dir) / "deep_gemm" / "__init__.py", "r") as f:
-        version_match = re.search(r"^__version__\s*=\s*(.*)$", f.read(), re.MULTILINE)
+    with open(Path(current_dir) / 'deep_gemm' / '__init__.py', 'r') as f:
+        version_match = re.search(r'^__version__\s*=\s*(.*)$', f.read(), re.MULTILINE)
     public_version = ast.literal_eval(version_match.group(1))
-    revision = ""
+    revision = ''
 
-    if not NO_LOCAL_VERSION:
+    if int(os.getenv('DG_NO_LOCAL_VERSION', '0')) == 0:
+        # noinspection PyBroadException
         try:
-            cmd = ["git", "rev-parse", "--short", "HEAD"]
-            revision = "+" + subprocess.check_output(cmd).decode("ascii").rstrip()
+            cmd = ['git', 'rev-parse', '--short', 'HEAD']
+            revision = '+' + subprocess.check_output(cmd).decode('ascii').rstrip()
         except:
-            revision = ""
+            revision = ''
+    return f'{public_version}{revision}'
 
-    return f"{public_version}{revision}"
 
 def get_platform():
-    """
-    Returns the platform name as used in wheel filenames.
-    """
-    if sys.platform.startswith("linux"):
-        return f"linux_{platform.uname().machine}"
-    elif sys.platform == "darwin":
-        mac_version = ".".join(platform.mac_ver()[0].split(".")[:2])
-        return f"macosx_{mac_version}_x86_64"
-    elif sys.platform == "win32":
-        return "win_amd64"
+    if sys.platform.startswith('linux'):
+        return f'linux_{platform.uname().machine}'
     else:
-        raise ValueError("Unsupported platform: {}".format(sys.platform))
+        raise ValueError('Unsupported platform: {}'.format(sys.platform))
+
 
 def get_wheel_url():
-    torch_version_raw = parse(torch.__version__)
-    python_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
+    torch_version = parse(torch.__version__)
+    torch_version = f'{torch_version.major}.{torch_version.minor}'
+    python_version = f'cp{sys.version_info.major}{sys.version_info.minor}'
     platform_name = get_platform()
-    grouped_gemm_version = get_package_version()
-    torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}"
+    deep_gemm_version = get_package_version()
     cxx11_abi = str(torch._C._GLIBCXX_USE_CXX11_ABI).upper()
 
     # Determine the version numbers that will be used to determine the correct wheel
     # We're using the CUDA version used to build torch, not the one currently installed
-    # _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
-    torch_cuda_version = parse(torch.version.cuda)
-    # For CUDA 11, we only compile for CUDA 11.8, and for CUDA 12 we only compile for CUDA 12.3
-    # to save CI time. Minor versions should be compatible.
-    torch_cuda_version = (
-        parse("11.8") if torch_cuda_version.major == 11 else parse("12.3")
-    )
-    # cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}"
-    cuda_version = f"{torch_cuda_version.major}"
+    cuda_version = parse(torch.version.cuda)
+    cuda_version = f'{cuda_version.major}'
 
     # Determine wheel URL based on CUDA version, torch version, python version and OS
-    wheel_filename = f"{PACKAGE_NAME}-{grouped_gemm_version}+cu{cuda_version}torch{torch_version}cxx11abi{cxx11_abi}-{python_version}-{python_version}-{platform_name}.whl"
+    wheel_filename = f'deep_gemm-{deep_gemm_version}+cu{cuda_version}torch{torch_version}cxx11abi{cxx11_abi}-{python_version}-{python_version}-{platform_name}.whl'
+    wheel_url = base_wheel_url.format(tag_name=f'v{deep_gemm_version}', wheel_name=wheel_filename)
+    return wheel_url, wheel_filename
 
-    wheel_url = BASE_WHEEL_URL.format(
-        tag_name=f"v{grouped_gemm_version}", wheel_name=wheel_filename
-    )
 
-    return wheel_url, wheel_filename
+def get_ext_modules():
+    if os.getenv('DG_SKIP_CUDA_BUILD', '0') != 0:
+        return []
+
+    return [CUDAExtension(name='deep_gemm_cpp',
+                          sources=sources,
+                          include_dirs=build_include_dirs,)]
+
 
 class CustomBuildPy(build_py):
     def run(self):
@@ -145,60 +137,31 @@ def prepare_includes(self):
             # Copy the directory
             shutil.copytree(src_dir, dst_dir)
 
-if not SKIP_CUDA_BUILD:
-    ext_modules = [
-        CUDAExtension(
-            name="deep_gemm_cpp",
-            sources=sources,
-            include_dirs=build_include_dirs,
-        )
-    ]
-else:
-    ext_modules = []
 
 class CachedWheelsCommand(_bdist_wheel):
-    """
-    The CachedWheelsCommand plugs into the default bdist wheel, which is ran by pip when it cannot
-    find an existing wheel (which is currently the case for all grouped gemm installs). We use
-    the environment parameters to detect whether there is already a pre-built version of a compatible
-    wheel available and short-circuits the standard full build pipeline.
-    """
-
     def run(self):
-        if FORCE_BUILD:
+        if int(os.getenv('DG_FORCE_BUILD', '0')) != 0:
             return super().run()
 
         wheel_url, wheel_filename = get_wheel_url()
-        print("Guessing wheel URL: ", wheel_url)
+        print(f'Try to download wheel from URL: {wheel_url}')
         try:
             urllib.request.urlretrieve(wheel_url, wheel_filename)
 
             # Make the archive
-            # Lifted from the root wheel processing command
-            # https://github.com/pypa/wheel/blob/cf71108ff9f6ffc36978069acb28824b44ae028e/src/wheel/bdist_wheel.py#LL381C9-L381C85
             if not os.path.exists(self.dist_dir):
                 os.makedirs(self.dist_dir)
-
             impl_tag, abi_tag, plat_tag = self.get_tag()
-            archive_basename = f"{self.wheel_dist_name}-{impl_tag}-{abi_tag}-{plat_tag}"
-
-            wheel_path = os.path.join(self.dist_dir, archive_basename + ".whl")
-            print("Raw wheel path", wheel_path)
+            archive_basename = f'{self.wheel_dist_name}-{impl_tag}-{abi_tag}-{plat_tag}'
+            wheel_path = os.path.join(self.dist_dir, archive_basename + '.whl')
             os.rename(wheel_filename, wheel_path)
         except (urllib.error.HTTPError, urllib.error.URLError):
-            print("Precompiled wheel not found. Building from source...")
+            print('Precompiled wheel not found. Building from source...')
             # If the wheel could not be downloaded, build from source
             super().run()
 
 
 if __name__ == '__main__':
-    # noinspection PyBroadException
-    try:
-        cmd = ['git', 'rev-parse', '--short', 'HEAD']
-        revision = '+' + subprocess.check_output(cmd).decode('ascii').rstrip()
-    except:
-        revision = ''
-
     # noinspection PyTypeChecker
     setuptools.setup(
         name='deep_gemm',
@@ -211,14 +174,7 @@ def run(self):
                 'include/cutlass/**/*',
             ]
         },
-        ext_modules=[
-            CUDAExtension(name='deep_gemm_cpp',
-                          sources=sources,
-                          include_dirs=build_include_dirs,
-                          libraries=build_libraries,
-                          library_dirs=build_library_dirs,
-                          extra_compile_args=cxx_flags)
-        ],
+        ext_modules=get_ext_modules(),
         zip_safe=False,
         cmdclass={
             'build_py': CustomBuildPy,

Original file line number	Diff line number	Diff line change
`@@ -73,4 +73,4 @@ def _find_cuda_home() -> str:`
`73`	`73`	`_find_cuda_home() # CUDA home`
`74`	`74`	`)`
`75`	`75`
`76`		`-__version__ = "2.0.0"`
	`76`	`+__version__ = '2.0.0'`