diff --git a/README.rst b/README.rst index 1367cf3c..f1a71349 100644 --- a/README.rst +++ b/README.rst @@ -18,3 +18,12 @@ codecs for use in data storage and communication applications. .. image:: https://codecov.io/gh/zarr-developers/numcodecs/branch/main/graph/badge.svg :target: https://codecov.io/gh/zarr-developers/numcodecs + +--- +If you already have native Blosc, Zstd, and LZ4 installed on your system and want to use these system libraries instead of the vendored sources, you +should set the `NUMCODECS_USE_SYSTEM_LIBS=1` environment variable when building the wheel, like this: + + $ NUMCODECS_USE_SYSTEM_LIBS=1 pip install numcodecs --no-binary numcodecs + +Blosc, Zstd, and LZ4 are found via the `pkg-config` utility. Moreover, you must build all 3 `blosc`, `libzstd`, and `liblz4` +components. C-Blosc comes with full sources for LZ4, LZ4HC, Snappy, Zlib and Zstd and in general, you should not worry about not having (or CMake not finding) the libraries in your system because by default the included sources will be automatically compiled and included in the C-Blosc library. This effectively means that you can be confident in having a complete support for all the codecs in all the Blosc deployments (unless you are explicitly excluding support for some of them). To compile blosc, see these [instructions](https://github.com/Blosc/c-blosc?tab=readme-ov-file#compiling-the-blosc-library). \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 387603f3..09fe6fe6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,7 @@ requires = [ "Cython", "py-cpuinfo", "numpy>2", + "pkgconfig" ] build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index c27fad35..d5e02a00 100644 --- a/setup.py +++ b/setup.py @@ -6,127 +6,127 @@ from glob import glob import cpuinfo +import pkgconfig from Cython.Distutils.build_ext import new_build_ext as build_ext from setuptools import Extension, setup from setuptools.errors import CCompilerError, ExecError, PlatformError # determine CPU support for SSE2 and AVX2 cpu_info = cpuinfo.get_cpu_info() -flags = cpu_info.get('flags', []) +flags = cpu_info.get("flags", []) machine = cpuinfo.platform.machine() # only check for x86 features on x86_64 arch have_sse2 = False have_avx2 = False -if machine == 'x86_64': - have_sse2 = 'sse2' in flags - have_avx2 = 'avx2' in flags +if machine == "x86_64": + have_sse2 = "sse2" in flags + have_avx2 = "avx2" in flags -disable_sse2 = 'DISABLE_NUMCODECS_SSE2' in os.environ -disable_avx2 = 'DISABLE_NUMCODECS_AVX2' in os.environ +disable_sse2 = "DISABLE_NUMCODECS_SSE2" in os.environ +disable_avx2 = "DISABLE_NUMCODECS_AVX2" in os.environ +use_system_libraries = "NUMCODECS_USE_SYSTEM_LIBS" in os.environ # setup common compile arguments -have_cflags = 'CFLAGS' in os.environ +have_cflags = "CFLAGS" in os.environ base_compile_args = [] if have_cflags: # respect compiler options set by user pass -elif os.name == 'posix' and machine == 'x86_64': +elif os.name == "posix" and machine == "x86_64": if disable_sse2: - base_compile_args.append('-mno-sse2') + base_compile_args.append("-mno-sse2") elif have_sse2: - base_compile_args.append('-msse2') + base_compile_args.append("-msse2") if disable_avx2: - base_compile_args.append('-mno-avx2') + base_compile_args.append("-mno-avx2") elif have_avx2: - base_compile_args.append('-mavx2') + base_compile_args.append("-mavx2") # On macOS, force libc++ in case the system tries to use `stdlibc++`. # The latter is often absent from modern macOS systems. -if sys.platform == 'darwin': - base_compile_args.append('-stdlib=libc++') +if sys.platform == "darwin": + base_compile_args.append("-stdlib=libc++") def info(*msg): - kwargs = {'file': sys.stdout} - print('[numcodecs]', *msg, **kwargs) + kwargs = {"file": sys.stdout} + print("[numcodecs]", *msg, **kwargs) def error(*msg): - kwargs = {'file': sys.stderr} - print('[numcodecs]', *msg, **kwargs) + kwargs = {"file": sys.stderr} + print("[numcodecs]", *msg, **kwargs) -def blosc_extension(): - info('setting up Blosc extension') +def _blosc_extension_with_vendored_libs(): + info("setting up Blosc extension from vendored sources") extra_compile_args = base_compile_args.copy() extra_link_args = [] define_macros = [] # ensure pthread is properly linked on POSIX systems - if os.name == 'posix': - extra_compile_args.append('-pthread') - extra_link_args.append('-pthread') + if os.name == "posix": + extra_compile_args.append("-pthread") + extra_link_args.append("-pthread") # setup blosc sources - blosc_sources = [f for f in glob('c-blosc/blosc/*.c') if 'avx2' not in f and 'sse2' not in f] - include_dirs = [os.path.join('c-blosc', 'blosc')] + blosc_sources = [f for f in glob("c-blosc/blosc/*.c") if "avx2" not in f and "sse2" not in f] + include_dirs = [os.path.join("c-blosc", "blosc")] # add internal complibs - blosc_sources += glob('c-blosc/internal-complibs/lz4*/*.c') - blosc_sources += glob('c-blosc/internal-complibs/snappy*/*.cc') - blosc_sources += glob('c-blosc/internal-complibs/zlib*/*.c') - blosc_sources += glob('c-blosc/internal-complibs/zstd*/common/*.c') - blosc_sources += glob('c-blosc/internal-complibs/zstd*/compress/*.c') - blosc_sources += glob('c-blosc/internal-complibs/zstd*/decompress/*.c') - blosc_sources += glob('c-blosc/internal-complibs/zstd*/dictBuilder/*.c') - include_dirs += [d for d in glob('c-blosc/internal-complibs/*') if os.path.isdir(d)] - include_dirs += [d for d in glob('c-blosc/internal-complibs/*/*') if os.path.isdir(d)] - include_dirs += [d for d in glob('c-blosc/internal-complibs/*/*/*') if os.path.isdir(d)] + blosc_sources += glob("c-blosc/internal-complibs/lz4*/*.c") + blosc_sources += glob("c-blosc/internal-complibs/snappy*/*.cc") + blosc_sources += glob("c-blosc/internal-complibs/zlib*/*.c") + blosc_sources += glob("c-blosc/internal-complibs/zstd*/common/*.c") + blosc_sources += glob("c-blosc/internal-complibs/zstd*/compress/*.c") + blosc_sources += glob("c-blosc/internal-complibs/zstd*/decompress/*.c") + blosc_sources += glob("c-blosc/internal-complibs/zstd*/dictBuilder/*.c") + include_dirs += [d for d in glob("c-blosc/internal-complibs/*") if os.path.isdir(d)] + include_dirs += [d for d in glob("c-blosc/internal-complibs/*/*") if os.path.isdir(d)] + include_dirs += [d for d in glob("c-blosc/internal-complibs/*/*/*") if os.path.isdir(d)] # remove minizip because Python.h 3.8 tries to include crypt.h - include_dirs = [d for d in include_dirs if 'minizip' not in d] + include_dirs = [d for d in include_dirs if "minizip" not in d] define_macros += [ - ('HAVE_LZ4', 1), + ("HAVE_LZ4", 1), # ('HAVE_SNAPPY', 1), - ('HAVE_ZLIB', 1), - ('HAVE_ZSTD', 1), + ("HAVE_ZLIB", 1), + ("HAVE_ZSTD", 1), ] # define_macros += [('CYTHON_TRACE', '1')] # SSE2 if have_sse2 and not disable_sse2: - info('compiling Blosc extension with SSE2 support') - extra_compile_args.append('-DSHUFFLE_SSE2_ENABLED') - blosc_sources += [f for f in glob('c-blosc/blosc/*.c') if 'sse2' in f] - if os.name == 'nt': - define_macros += [('__SSE2__', 1)] + info("compiling Blosc extension with SSE2 support") + extra_compile_args.append("-DSHUFFLE_SSE2_ENABLED") + blosc_sources += [f for f in glob("c-blosc/blosc/*.c") if "sse2" in f] + if os.name == "nt": + define_macros += [("__SSE2__", 1)] else: - info('compiling Blosc extension without SSE2 support') + info("compiling Blosc extension without SSE2 support") # AVX2 if have_avx2 and not disable_avx2: - info('compiling Blosc extension with AVX2 support') - extra_compile_args.append('-DSHUFFLE_AVX2_ENABLED') - blosc_sources += [f for f in glob('c-blosc/blosc/*.c') if 'avx2' in f] - if os.name == 'nt': - define_macros += [('__AVX2__', 1)] + info("compiling Blosc extension with AVX2 support") + extra_compile_args.append("-DSHUFFLE_AVX2_ENABLED") + blosc_sources += [f for f in glob("c-blosc/blosc/*.c") if "avx2" in f] + if os.name == "nt": + define_macros += [("__AVX2__", 1)] else: - info('compiling Blosc extension without AVX2 support') + info("compiling Blosc extension without AVX2 support") # include assembly files - if cpuinfo.platform.machine() == 'x86_64': - extra_objects = [ - S[:-1] + 'o' for S in glob("c-blosc/internal-complibs/zstd*/decompress/*amd64.S") - ] + if cpuinfo.platform.machine() == "x86_64": + extra_objects = [S[:-1] + "o" for S in glob("c-blosc/internal-complibs/zstd*/decompress/*amd64.S")] else: extra_objects = [] - sources = ['numcodecs/blosc.pyx'] + sources = ["numcodecs/blosc.pyx"] # define extension module return [ Extension( - 'numcodecs.blosc', + "numcodecs.blosc", sources=sources + blosc_sources, include_dirs=include_dirs, define_macros=define_macros, @@ -137,8 +137,74 @@ def blosc_extension(): ] -def zstd_extension(): - info('setting up Zstandard extension') +def _blosc_extension_with_system_libs(): + info("setting up Blosc extension with system libraries") + + extra_compile_args = base_compile_args.copy() + extra_link_args = [] + + # ensure pthread is properly linked on POSIX systems + if os.name == "posix": + extra_compile_args.append("-pthread") + extra_link_args.append("-pthread") + + blosc_package_configuration = pkgconfig.parse("blosc") + + define_macros = blosc_package_configuration["define_macros"] + include_dirs = blosc_package_configuration["include_dirs"] + libraries = blosc_package_configuration["libraries"] + library_dirs = blosc_package_configuration["library_dirs"] + + # remove minizip because Python.h 3.8 tries to include crypt.h + include_dirs = [d for d in include_dirs if "minizip" not in d] + + # define_macros += [('CYTHON_TRACE', '1')] + + # SSE2 + if have_sse2 and not disable_sse2: + info("compiling Blosc extension with SSE2 support") + extra_compile_args.append("-DSHUFFLE_SSE2_ENABLED") + if os.name == "nt": + define_macros += [("__SSE2__", 1)] + else: + info("compiling Blosc extension without SSE2 support") + + # AVX2 + if have_avx2 and not disable_avx2: + info("compiling Blosc extension with AVX2 support") + extra_compile_args.append("-DSHUFFLE_AVX2_ENABLED") + if os.name == "nt": + define_macros += [("__AVX2__", 1)] + else: + info("compiling Blosc extension without AVX2 support") + + sources = ["numcodecs/blosc.pyx"] + + # define extension module + extensions = [ + Extension( + "numcodecs.blosc", + sources=sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + libraries=libraries, + library_dirs=library_dirs, + ), + ] + + return extensions + + +def blosc_extension(): + if use_system_libraries: + return _blosc_extension_with_system_libs() + else: + return _blosc_extension_with_vendored_libs() + + +def _zstd_extension_with_vendored_sources(): + info("setting up Zstandard extension from vendored sources") zstd_sources = [] extra_compile_args = base_compile_args.copy() @@ -146,28 +212,26 @@ def zstd_extension(): define_macros = [] # setup sources - use zstd bundled in blosc - zstd_sources += glob('c-blosc/internal-complibs/zstd*/common/*.c') - zstd_sources += glob('c-blosc/internal-complibs/zstd*/compress/*.c') - zstd_sources += glob('c-blosc/internal-complibs/zstd*/decompress/*.c') - zstd_sources += glob('c-blosc/internal-complibs/zstd*/dictBuilder/*.c') - include_dirs += [d for d in glob('c-blosc/internal-complibs/zstd*') if os.path.isdir(d)] - include_dirs += [d for d in glob('c-blosc/internal-complibs/zstd*/*') if os.path.isdir(d)] + zstd_sources += glob("c-blosc/internal-complibs/zstd*/common/*.c") + zstd_sources += glob("c-blosc/internal-complibs/zstd*/compress/*.c") + zstd_sources += glob("c-blosc/internal-complibs/zstd*/decompress/*.c") + zstd_sources += glob("c-blosc/internal-complibs/zstd*/dictBuilder/*.c") + include_dirs += [d for d in glob("c-blosc/internal-complibs/zstd*") if os.path.isdir(d)] + include_dirs += [d for d in glob("c-blosc/internal-complibs/zstd*/*") if os.path.isdir(d)] # define_macros += [('CYTHON_TRACE', '1')] - sources = ['numcodecs/zstd.pyx'] + sources = ["numcodecs/zstd.pyx"] # include assembly files - if cpuinfo.platform.machine() == 'x86_64': - extra_objects = [ - S[:-1] + 'o' for S in glob("c-blosc/internal-complibs/zstd*/decompress/*amd64.S") - ] + if cpuinfo.platform.machine() == "x86_64": + extra_objects = [S[:-1] + "o" for S in glob("c-blosc/internal-complibs/zstd*/decompress/*amd64.S")] else: extra_objects = [] # define extension module return [ Extension( - 'numcodecs.zstd', + "numcodecs.zstd", sources=sources + zstd_sources, include_dirs=include_dirs, define_macros=define_macros, @@ -177,24 +241,62 @@ def zstd_extension(): ] -def lz4_extension(): - info('setting up LZ4 extension') +def _zstd_extension_with_system_libs(): + info("setting up Zstandard extension with system libraries") + + extra_compile_args = base_compile_args.copy() + + zstd_package_configuration = pkgconfig.parse("libzstd") + include_dirs = zstd_package_configuration["include_dirs"] + define_macros = zstd_package_configuration["define_macros"] + libraries = zstd_package_configuration["libraries"] + library_dirs = zstd_package_configuration["library_dirs"] + + # define_macros += [('CYTHON_TRACE', '1')] + + sources = ["numcodecs/zstd.pyx"] + + # define extension module + extensions = [ + Extension( + "numcodecs.zstd", + sources=sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + libraries=libraries, + library_dirs=library_dirs, + ), + ] + + return extensions + + +def zstd_extension(): + if use_system_libraries: + return _zstd_extension_with_system_libs() + else: + return _zstd_extension_with_vendored_sources() + + +def _lz4_extension_with_vendored_sources(): + info("setting up LZ4 extension from vendored sources") extra_compile_args = base_compile_args.copy() define_macros = [] # setup sources - use LZ4 bundled in blosc - lz4_sources = glob('c-blosc/internal-complibs/lz4*/*.c') - include_dirs = [d for d in glob('c-blosc/internal-complibs/lz4*') if os.path.isdir(d)] - include_dirs += ['numcodecs'] + lz4_sources = glob("c-blosc/internal-complibs/lz4*/*.c") + include_dirs = [d for d in glob("c-blosc/internal-complibs/lz4*") if os.path.isdir(d)] + include_dirs += ["numcodecs"] # define_macros += [('CYTHON_TRACE', '1')] - sources = ['numcodecs/lz4.pyx'] + sources = ["numcodecs/lz4.pyx"] # define extension module return [ Extension( - 'numcodecs.lz4', + "numcodecs.lz4", sources=sources + lz4_sources, include_dirs=include_dirs, define_macros=define_macros, @@ -203,23 +305,62 @@ def lz4_extension(): ] +def _lz4_extension_with_system_libs(): + info("setting up LZ4 extension with system libraries") + + extra_compile_args = base_compile_args.copy() + + lz4_package_configuration = pkgconfig.parse("liblz4") + include_dirs = lz4_package_configuration["include_dirs"] + define_macros = lz4_package_configuration["define_macros"] + libraries = lz4_package_configuration["libraries"] + library_dirs = lz4_package_configuration["library_dirs"] + + include_dirs += ["numcodecs"] + # define_macros += [('CYTHON_TRACE', '1')] + + sources = ["numcodecs/lz4.pyx"] + + # define extension module + extensions = [ + Extension( + "numcodecs.lz4", + sources=sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + libraries=libraries, + library_dirs=library_dirs, + ), + ] + + return extensions + + +def lz4_extension(): + if use_system_libraries: + return _lz4_extension_with_system_libs() + else: + return _lz4_extension_with_vendored_sources() + + def vlen_extension(): - info('setting up vlen extension') + info("setting up vlen extension") import numpy extra_compile_args = base_compile_args.copy() define_macros = [] # setup sources - include_dirs = ['numcodecs', numpy.get_include()] + include_dirs = ["numcodecs", numpy.get_include()] # define_macros += [('CYTHON_TRACE', '1')] - sources = ['numcodecs/vlen.pyx'] + sources = ["numcodecs/vlen.pyx"] # define extension module return [ Extension( - 'numcodecs.vlen', + "numcodecs.vlen", sources=sources, include_dirs=include_dirs, define_macros=define_macros, @@ -229,21 +370,21 @@ def vlen_extension(): def fletcher_extension(): - info('setting up fletcher32 extension') + info("setting up fletcher32 extension") extra_compile_args = base_compile_args.copy() define_macros = [] # setup sources - include_dirs = ['numcodecs'] + include_dirs = ["numcodecs"] # define_macros += [('CYTHON_TRACE', '1')] - sources = ['numcodecs/fletcher32.pyx'] + sources = ["numcodecs/fletcher32.pyx"] # define extension module return [ Extension( - 'numcodecs.fletcher32', + "numcodecs.fletcher32", sources=sources, include_dirs=include_dirs, define_macros=define_macros, @@ -253,21 +394,21 @@ def fletcher_extension(): def jenkins_extension(): - info('setting up jenkins extension') + info("setting up jenkins extension") extra_compile_args = base_compile_args.copy() define_macros = [] # setup sources - include_dirs = ['numcodecs'] - define_macros += [('CYTHON_TRACE', '1')] + include_dirs = ["numcodecs"] + define_macros += [("CYTHON_TRACE", "1")] - sources = ['numcodecs/jenkins.pyx'] + sources = ["numcodecs/jenkins.pyx"] # define extension module return [ Extension( - 'numcodecs.jenkins', + "numcodecs.jenkins", sources=sources, include_dirs=include_dirs, define_macros=define_macros, @@ -277,16 +418,16 @@ def jenkins_extension(): def compat_extension(): - info('setting up compat extension') + info("setting up compat extension") extra_compile_args = base_compile_args.copy() - sources = ['numcodecs/compat_ext.pyx'] + sources = ["numcodecs/compat_ext.pyx"] # define extension module return [ Extension( - 'numcodecs.compat_ext', + "numcodecs.compat_ext", sources=sources, extra_compile_args=extra_compile_args, ), @@ -294,19 +435,19 @@ def compat_extension(): def shuffle_extension(): - info('setting up shuffle extension') + info("setting up shuffle extension") extra_compile_args = base_compile_args.copy() - sources = ['numcodecs/_shuffle.pyx'] + sources = ["numcodecs/_shuffle.pyx"] # define extension module return [ - Extension('numcodecs._shuffle', sources=sources, extra_compile_args=extra_compile_args), + Extension("numcodecs._shuffle", sources=sources, extra_compile_args=extra_compile_args), ] -if sys.platform == 'win32': +if sys.platform == "win32": ext_errors = (CCompilerError, ExecError, PlatformError, IOError, ValueError) else: ext_errors = (CCompilerError, ExecError, PlatformError) @@ -322,33 +463,33 @@ class ve_build_ext(build_ext): def run(self): try: machine = cpuinfo.platform.machine() - if machine in ('x86_64', 'aarch64'): - pattern = '*amd64.S' if machine == 'x86_64' else '*aarch64.S' - S_files = glob(f'c-blosc/internal-complibs/zstd*/decompress/{pattern}') + if machine in ("x86_64", "aarch64"): + pattern = "*amd64.S" if machine == "x86_64" else "*aarch64.S" + S_files = glob(f"c-blosc/internal-complibs/zstd*/decompress/{pattern}") compiler = ccompiler.new_compiler() customize_compiler(compiler) - compiler.src_extensions.append('.S') + compiler.src_extensions.append(".S") compiler.compile(S_files) build_ext.run(self) except PlatformError as e: error(e) - raise BuildFailed from e + raise BuildFailed def build_extension(self, ext): try: build_ext.build_extension(self, ext) except ext_errors as e: error(e) - raise BuildFailed from e + raise BuildFailed class Sclean(clean): # Clean up .o files created by .S files def run(self): - if cpuinfo.platform.machine() == 'x86_64': - o_files = glob('c-blosc/internal-complibs/zstd*/decompress/*amd64.o') + if cpuinfo.platform.machine() == "x86_64": + o_files = glob("c-blosc/internal-complibs/zstd*/decompress/*amd64.o") for f in o_files: os.remove(f) @@ -368,7 +509,10 @@ def run_setup(with_extensions): + jenkins_extension() ) - cmdclass = {'build_ext': ve_build_ext, 'clean': Sclean} + if use_system_libraries: + cmdclass = {"build_ext": build_ext, "clean": Sclean} + else: + cmdclass = {"build_ext": ve_build_ext, "clean": Sclean} else: ext_modules = [] cmdclass = {} @@ -379,7 +523,7 @@ def run_setup(with_extensions): ) -if __name__ == '__main__': - is_pypy = hasattr(sys, 'pypy_translation_info') - with_extensions = not is_pypy and 'DISABLE_NUMCODECS_CEXT' not in os.environ +if __name__ == "__main__": + is_pypy = hasattr(sys, "pypy_translation_info") + with_extensions = not is_pypy and "DISABLE_NUMCODECS_CEXT" not in os.environ run_setup(with_extensions)