diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 2d1d4d58..37a80387 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -8,9 +8,11 @@ on: - "quaddtype-v*" paths: - "quaddtype/**" + - ".github/workflows/**" pull_request: paths: - "quaddtype/**" + - ".github/workflows/**" workflow_dispatch: jobs: @@ -19,12 +21,19 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + with: + submodules: recursive - name: Set up Python uses: actions/setup-python@v4 with: python-version: ">=3.10.0" + - name: Verify QuadBLAS submodule + run: | + ls -la quaddtype/numpy_quaddtype/QBLAS/ + ls -la quaddtype/numpy_quaddtype/QBLAS/include/quadblas/ + - name: Install cibuildwheel run: pip install cibuildwheel==2.20.0 @@ -34,16 +43,23 @@ jobs: CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 CIBW_BUILD_VERBOSITY: "3" CIBW_BEFORE_ALL: | + yum update -y + yum install -y cmake gcc gcc-c++ make git pkgconfig + # Install SLEEF in container git clone --branch 3.8 https://github.com/shibatch/sleef.git cd sleef - cmake -S . -B build -DSLEEF_BUILD_QUAD:BOOL=ON -DSLEEF_BUILD_SHARED_LIBS:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON + cmake -S . -B build \ + -DSLEEF_BUILD_QUAD:BOOL=ON \ + -DSLEEF_BUILD_SHARED_LIBS:BOOL=ON \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON cmake --build build/ --clean-first -j cmake --install build --prefix /usr/local CIBW_ENVIRONMENT: > - CFLAGS="-I/usr/local/include $CFLAGS" - CXXFLAGS="-I/usr/local/include $CXXFLAGS" - LDFLAGS="-L/usr/local/lib64 $LDFLAGS" - LD_LIBRARY_PATH="/usr/local/lib64:$LD_LIBRARY_PATH" + CFLAGS="-I/usr/local/include -I{project}/numpy_quaddtype/QBLAS/include $CFLAGS" + CXXFLAGS="-I/usr/local/include -I{project}/numpy_quaddtype/QBLAS/include -fext-numeric-literals $CXXFLAGS" + LDFLAGS="-L/usr/local/lib64 -L/usr/local/lib -Wl,-rpath,/usr/local/lib64 -Wl,-rpath,/usr/local/lib -fopenmp $LDFLAGS" + LD_LIBRARY_PATH="/usr/local/lib64:/usr/local/lib:$LD_LIBRARY_PATH" + PKG_CONFIG_PATH="/usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH" CIBW_REPAIR_WHEEL_COMMAND: | auditwheel repair -w {dest_dir} --plat manylinux_2_28_x86_64 {wheel} CIBW_TEST_COMMAND: | @@ -68,15 +84,21 @@ jobs: steps: - uses: actions/checkout@v3 + with: + submodules: recursive - name: Set up Python uses: actions/setup-python@v4 with: python-version: "3.10" + - name: Install dependencies + run: | + brew install cmake libomp git + - name: Install SLEEF env: - MACOSX_DEPLOYMENT_TARGET: "11.0" + MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os == 'macos-13' && '13.0' || '14.0' }} run: | git clone --branch 3.8 https://github.com/shibatch/sleef.git cd sleef @@ -84,11 +106,17 @@ jobs: -DSLEEF_BUILD_QUAD:BOOL=ON \ -DSLEEF_BUILD_SHARED_LIBS:BOOL=ON \ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=${{ matrix.os == 'macos-13' && '13.0' || '14.0' }} \ -DCMAKE_INSTALL_RPATH="@loader_path/../lib" \ -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON cmake --build build/ --clean-first -j sudo cmake --install build --prefix /usr/local + + - name: Verify QuadBLAS submodule + run: | + ls -la quaddtype/numpy_quaddtype/QBLAS/ + ls -la quaddtype/numpy_quaddtype/QBLAS/include/quadblas/ + - name: Install cibuildwheel run: pip install cibuildwheel==2.20.0 @@ -96,18 +124,19 @@ jobs: env: CIBW_BUILD: "cp310-* cp311-* cp312-*" CIBW_ARCHS_MACOS: ${{ matrix.os == 'macos-13' && 'x86_64' || 'arm64' }} - CIBW_BUILD_VERBOSITY: "1" + CIBW_BUILD_VERBOSITY: "3" CIBW_ENVIRONMENT: > - MACOSX_DEPLOYMENT_TARGET="11.0" + MACOSX_DEPLOYMENT_TARGET="${{ matrix.os == 'macos-13' && '13.0' || '14.0' }}" DYLD_LIBRARY_PATH="/usr/local/lib:$DYLD_LIBRARY_PATH" - CFLAGS="-I/usr/local/include $CFLAGS" - CXXFLAGS="-I/usr/local/include $CXXFLAGS" + CFLAGS="-I/usr/local/include -I{project}/numpy_quaddtype/QBLAS/include $CFLAGS" + CXXFLAGS="-I/usr/local/include -I{project}/numpy_quaddtype/QBLAS/include $CXXFLAGS" LDFLAGS="-L/usr/local/lib $LDFLAGS" + PKG_CONFIG_PATH="/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH" CIBW_REPAIR_WHEEL_COMMAND: > delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} CIBW_TEST_COMMAND: | pip install {package}[test] - pytest {project}/tests + pytest -s {project}/tests CIBW_TEST_EXTRAS: "test" run: | python -m cibuildwheel --output-dir wheelhouse @@ -118,6 +147,7 @@ jobs: path: ./quaddtype/wheelhouse/*.whl name: wheels-${{ matrix.os }} + # disabling QBLAS optimization for windows due to incompatibility with MSVC build_wheels_windows: name: Build wheels on Windows runs-on: windows-latest @@ -127,6 +157,8 @@ jobs: steps: - uses: actions/checkout@v3 + with: + submodules: recursive - name: Setup MSVC uses: ilammy/msvc-dev-cmd@v1 @@ -142,6 +174,12 @@ jobs: - name: Install CMake uses: lukka/get-cmake@latest + - name: Verify QuadBLAS submodule + shell: pwsh + run: | + Get-ChildItem quaddtype/numpy_quaddtype/QBLAS/ + Get-ChildItem quaddtype/numpy_quaddtype/QBLAS/include/quadblas/ + - name: Clone and Build SLEEF shell: pwsh run: | @@ -151,16 +189,6 @@ jobs: cmake --build build --config Release cmake --install build --prefix "C:/sleef" --config Release - - name: Setup build environment - shell: pwsh - run: | - $env:INCLUDE += ";C:\sleef\include" - $env:LIB += ";C:\sleef\lib" - $env:PATH = "C:\sleef\bin;$env:PATH" - echo "INCLUDE=$env:INCLUDE" >> $env:GITHUB_ENV - echo "LIB=$env:LIB" >> $env:GITHUB_ENV - echo "PATH=$env:PATH" >> $env:GITHUB_ENV - - name: Install build dependencies shell: bash -l {0} run: | @@ -177,10 +205,17 @@ jobs: MSSdk: "1" CIBW_BEFORE_BUILD: | pip install meson meson-python ninja numpy + CIBW_ENVIRONMENT: > + INCLUDE="C:/sleef/include;{project}/numpy_quaddtype/QBLAS/include;$INCLUDE" + LIB="C:/sleef/lib;$LIB" + PATH="C:/sleef/bin;$PATH" + CFLAGS="/IC:/sleef/include /I{project}/numpy_quaddtype/QBLAS/include /DDISABLE_QUADBLAS $CFLAGS" + CXXFLAGS="/IC:/sleef/include /I{project}/numpy_quaddtype/QBLAS/include /DDISABLE_QUADBLAS $CXXFLAGS" + LDFLAGS="C:/sleef/lib/sleef.lib C:/sleef/lib/sleefquad.lib $LDFLAGS" CIBW_REPAIR_WHEEL_COMMAND: 'delvewheel repair -w {dest_dir} {wheel} --add-path C:\sleef\bin' CIBW_TEST_COMMAND: | pip install {package}[test] - python -m pytest -v {project}/test + pytest -s {project}/tests CIBW_TEST_EXTRAS: test CIBW_TEST_FAIL_FAST: 1 shell: pwsh @@ -199,56 +234,21 @@ jobs: needs: [build_wheels_linux, build_wheels_macos, build_wheels_windows] runs-on: ubuntu-latest if: startsWith(github.ref, 'refs/tags/quaddtype-v') - + environment: name: quadtype_release url: https://pypi.org/p/numpy-quaddtype - + permissions: - id-token: write # IMPORTANT: mandatory for trusted publishing - + id-token: write # IMPORTANT: mandatory for trusted publishing + steps: - name: Download all workflow run artifacts uses: actions/download-artifact@v4 with: path: dist - + - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: dist/* - - # With the current setup, we are not creating a release on GitHub. - # create_release: - # name: Create Release - # needs: [build_wheels_linux, build_wheels_macos, build_wheels_windows] - # runs-on: ubuntu-latest - # if: startsWith(github.ref, 'refs/tags/quaddtype-v') - - # steps: - # - name: Checkout code - # uses: actions/checkout@v2 - - # - name: Download all workflow run artifacts - # uses: actions/download-artifact@v4 - # with: - # path: artifacts - - # - name: Create Release - # id: create_release - # uses: actions/create-release@v1 - # env: - # GITHUB_TOKEN: ${{ secrets.QUADDTYPE_GITHUB_TOKEN }} - # with: - # tag_name: ${{ github.ref }} - # release_name: Release ${{ github.ref }} - # draft: false - # prerelease: false - - # - name: Upload Release Assets - # uses: softprops/action-gh-release@v1 - # if: startsWith(github.ref, 'refs/tags/') - # with: - # files: ./artifacts/**/*.whl - # env: - # GITHUB_TOKEN: ${{ secrets.QUADDTYPE_GITHUB_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9267d9f4..542093c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,17 +59,37 @@ jobs: run: | sudo apt-get update sudo apt-get install -y libmpfr-dev libssl-dev libfftw3-dev + - name: Install SLEEF run: | + sudo apt-get update -y + sudo apt-get install -y cmake gcc g++ make git pkg-config git clone --branch 3.8 https://github.com/shibatch/sleef.git cd sleef cmake -S . -B build -DSLEEF_BUILD_QUAD:BOOL=ON -DSLEEF_BUILD_SHARED_LIBS:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON cmake --build build/ --clean-first -j - sudo cmake --install build --prefix /usr + sudo cmake --install build --prefix /usr/local + - name: Install quaddtype working-directory: quaddtype run: | - LDFLAGS="-Wl,-rpath,/usr/lib" python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug" + # Initialize submodules first + git submodule update --init --recursive + ls -la numpy_quaddtype/QBLAS/ + + # Set environment variables with proper export and correct paths + export CFLAGS="-I/usr/local/include -I$(pwd)/numpy_quaddtype/QBLAS/include" + export CXXFLAGS="-I/usr/local/include -I$(pwd)/numpy_quaddtype/QBLAS/include -fext-numeric-literals" + export LDFLAGS="-L/usr/local/lib64 -L/usr/local/lib -Wl,-rpath,/usr/local/lib64 -Wl,-rpath,/usr/local/lib -fopenmp" + export LD_LIBRARY_PATH="/usr/local/lib64:/usr/local/lib:$LD_LIBRARY_PATH" + + # Install with meson args to ensure the C++ flags are passed through + python -m pip install . -v --no-build-isolation \ + -Cbuilddir=build \ + -C'compile-args=-v' \ + -Csetup-args="-Dbuildtype=debug" \ + -Csetup-args="-Dcpp_args=-fext-numeric-literals" + - name: Run quaddtype tests working-directory: quaddtype run: | diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..523c79c8 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "quaddtype/numpy_quaddtype/QBLAS"] + path = quaddtype/numpy_quaddtype/QBLAS + url = https://github.com/SwayamInSync/QBLAS diff --git a/quaddtype/README.md b/quaddtype/README.md index 614a3c2d..11eef1d2 100644 --- a/quaddtype/README.md +++ b/quaddtype/README.md @@ -27,7 +27,7 @@ np.array([1,2,3], dtype=QuadPrecDType("longdouble")) The code needs the quad precision pieces of the sleef library, which is not available on most systems by default, so we have to generate -that first. The below assumes one has the required pieces to build +that first. The below assumes one has the required pieces to build sleef (cmake and libmpfr-dev), and that one is in the package directory locally. @@ -40,6 +40,7 @@ cd .. ``` Building the `numpy-quaddtype` package from locally installed sleef: + ```bash export SLEEF_DIR=$PWD/sleef/build export LIBRARY_PATH=$SLEEF_DIR/lib @@ -53,11 +54,17 @@ source temp/bin/activate # Install the package pip install meson-python numpy pytest -export LDFLAGS="-Wl,-rpath,$SLEEF_DIR/lib" +export LDFLAGS="-Wl,-rpath,$SLEEF_DIR/lib -fopenmp -latomic -lpthread" +export CFLAGS="-fPIC" +export CXXFLAGS="-fPIC" + +# To build without QBLAS (default for MSVC) +# export CFLAGS="-fPIC -DDISABLE_QUADBLAS" +# export CXXFLAGS="-fPIC -DDISABLE_QUADBLAS" + python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' # Run the tests cd .. python -m pytest ``` - diff --git a/quaddtype/meson.build b/quaddtype/meson.build index e7e8dd97..c000675a 100644 --- a/quaddtype/meson.build +++ b/quaddtype/meson.build @@ -23,9 +23,17 @@ incdir_numpy = run_command(py, check : true ).stdout().strip() +# Add OpenMP dependency (optional, for threading) +openmp_dep = dependency('openmp', required: false) +dependencies = [sleef_dep, py_dep] +if openmp_dep.found() + dependencies += openmp_dep +endif + includes = include_directories( [ incdir_numpy, + 'numpy_quaddtype/QBLAS/include', 'numpy_quaddtype/src', ] ) @@ -42,10 +50,21 @@ srcs = [ 'numpy_quaddtype/src/scalar_ops.h', 'numpy_quaddtype/src/scalar_ops.cpp', 'numpy_quaddtype/src/ops.hpp', - 'numpy_quaddtype/src/umath.h', - 'numpy_quaddtype/src/umath.cpp', 'numpy_quaddtype/src/dragon4.h', - 'numpy_quaddtype/src/dragon4.c' + 'numpy_quaddtype/src/dragon4.c', + 'numpy_quaddtype/src/quadblas_interface.h', + 'numpy_quaddtype/src/quadblas_interface.cpp', + 'numpy_quaddtype/src/umath/umath.h', + 'numpy_quaddtype/src/umath/umath.cpp', + 'numpy_quaddtype/src/umath/binary_ops.h', + 'numpy_quaddtype/src/umath/binary_ops.cpp', + 'numpy_quaddtype/src/umath/unary_ops.h', + 'numpy_quaddtype/src/umath/unary_ops.cpp', + 'numpy_quaddtype/src/umath/comparison_ops.h', + 'numpy_quaddtype/src/umath/comparison_ops.cpp', + 'numpy_quaddtype/src/umath/promoters.hpp', + 'numpy_quaddtype/src/umath/matmul.h', + 'numpy_quaddtype/src/umath/matmul.cpp', ] py.install_sources( @@ -60,7 +79,7 @@ py.extension_module('_quaddtype_main', srcs, link_args: is_windows ? ['/DEFAULTLIB:sleef', '/DEFAULTLIB:sleefquad'] : ['-lsleef', '-lsleefquad'], link_language: 'cpp', - dependencies: [sleef_dep, py_dep], + dependencies: dependencies, install: true, subdir: 'numpy_quaddtype', include_directories: includes diff --git a/quaddtype/numpy_quaddtype/QBLAS b/quaddtype/numpy_quaddtype/QBLAS new file mode 160000 index 00000000..9468e24a --- /dev/null +++ b/quaddtype/numpy_quaddtype/QBLAS @@ -0,0 +1 @@ +Subproject commit 9468e24a02b731563eba2aee0350e9219b36c102 diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py index e469a4c1..878180bc 100644 --- a/quaddtype/numpy_quaddtype/__init__.py +++ b/quaddtype/numpy_quaddtype/__init__.py @@ -2,13 +2,21 @@ QuadPrecision, QuadPrecDType, is_longdouble_128, - get_sleef_constant + get_sleef_constant, + set_num_threads, + get_num_threads, + get_quadblas_version ) +import multiprocessing + __all__ = [ 'QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision', - 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128', 'pi', 'e', - 'log2e', 'log10e', 'ln2', 'ln10', 'max_value', 'min_value', 'epsilon' + 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128', + # Constants + 'pi', 'e', 'log2e', 'log10e', 'ln2', 'ln10', 'max_value', 'min_value', 'epsilon', + # QuadBLAS related functions + 'set_num_threads', 'get_num_threads', 'get_quadblas_version' ] def SleefQuadPrecision(value): diff --git a/quaddtype/numpy_quaddtype/src/quadblas_interface.cpp b/quaddtype/numpy_quaddtype/src/quadblas_interface.cpp new file mode 100644 index 00000000..65feb604 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/quadblas_interface.cpp @@ -0,0 +1,233 @@ +#include "quadblas_interface.h" +#include +#include + +#ifndef DISABLE_QUADBLAS +#include "../QBLAS/include/quadblas/quadblas.hpp" +#endif // DISABLE_QUADBLAS + +extern "C" { + + +#ifndef DISABLE_QUADBLAS + +int +qblas_dot(size_t n, Sleef_quad *x, size_t incx, Sleef_quad *y, size_t incy, Sleef_quad *result) +{ + if (!x || !y || !result || n == 0) { + return -1; + } + + try { + *result = QuadBLAS::dot(n, x, incx, y, incy); + return 0; + } + catch (...) { + return -1; + } +} + +int +qblas_gemv(char layout, char trans, size_t m, size_t n, Sleef_quad *alpha, Sleef_quad *A, + size_t lda, Sleef_quad *x, size_t incx, Sleef_quad *beta, Sleef_quad *y, size_t incy) +{ + if (!alpha || !A || !x || !beta || !y || m == 0 || n == 0) { + return -1; + } + + try { + // Convert layout + QuadBLAS::Layout qblas_layout; + if (layout == 'R' || layout == 'r') { + qblas_layout = QuadBLAS::Layout::RowMajor; + } + else if (layout == 'C' || layout == 'c') { + qblas_layout = QuadBLAS::Layout::ColMajor; + } + else { + return -1; // Invalid layout + } + + // Handle transpose (swap dimensions for transpose) + size_t actual_m = m, actual_n = n; + if (trans == 'T' || trans == 't' || trans == 'C' || trans == 'c') { + std::swap(actual_m, actual_n); + // For transpose, we need to adjust the layout + if (qblas_layout == QuadBLAS::Layout::RowMajor) { + qblas_layout = QuadBLAS::Layout::ColMajor; + } + else { + qblas_layout = QuadBLAS::Layout::RowMajor; + } + } + + // Call QBLAS GEMV + QuadBLAS::gemv(qblas_layout, actual_m, actual_n, *alpha, A, lda, x, incx, *beta, y, incy); + + return 0; + } + catch (...) { + return -1; + } +} + +int +qblas_gemm(char layout, char transa, char transb, size_t m, size_t n, size_t k, Sleef_quad *alpha, + Sleef_quad *A, size_t lda, Sleef_quad *B, size_t ldb, Sleef_quad *beta, Sleef_quad *C, + size_t ldc) +{ + if (!alpha || !A || !B || !beta || !C || m == 0 || n == 0 || k == 0) { + return -1; + } + + try { + QuadBLAS::Layout qblas_layout; + if (layout == 'R' || layout == 'r') { + qblas_layout = QuadBLAS::Layout::RowMajor; + } + else if (layout == 'C' || layout == 'c') { + qblas_layout = QuadBLAS::Layout::ColMajor; + } + else { + return -1; // Invalid layout + } + + // For now, we only support no transpose + // TODO: Implement transpose support if needed + if ((transa != 'N' && transa != 'n') || (transb != 'N' && transb != 'n')) { + return -1; // Transpose not implemented yet + } + + QuadBLAS::gemm(qblas_layout, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc); + + return 0; + } + catch (...) { + return -1; + } +} + +int +qblas_supports_backend(QuadBackendType backend) +{ + // QBLAS only supports SLEEF backend + return (backend == BACKEND_SLEEF) ? 1 : 0; +} + +PyObject * +py_quadblas_set_num_threads(PyObject *self, PyObject *args) +{ + int num_threads; + if (!PyArg_ParseTuple(args, "i", &num_threads)) { + return NULL; + } + + if (num_threads <= 0) { + PyErr_SetString(PyExc_ValueError, "Number of threads must be positive"); + return NULL; + } + + QuadBLAS::set_num_threads(num_threads); + Py_RETURN_NONE; +} + +PyObject * +py_quadblas_get_num_threads(PyObject *self, PyObject *args) +{ + int num_threads = QuadBLAS::get_num_threads(); + return PyLong_FromLong(num_threads); +} + +PyObject * +py_quadblas_get_version(PyObject *self, PyObject *args) +{ + return PyUnicode_FromString("QuadBLAS 1.0.0 - High Performance Quad Precision BLAS"); +} + +int +_quadblas_set_num_threads(int num_threads) +{ + QuadBLAS::set_num_threads(num_threads); + return 0; +} + +int +_quadblas_get_num_threads(void) +{ + int num_threads = QuadBLAS::get_num_threads(); + return num_threads; +} + +#else // DISABLE_QUADBLAS + + +int +qblas_dot(size_t n, Sleef_quad *x, size_t incx, Sleef_quad *y, size_t incy, Sleef_quad *result) +{ + return -1; // QBLAS is disabled, dot product not available +} + +int +qblas_gemv(char layout, char trans, size_t m, size_t n, Sleef_quad *alpha, Sleef_quad *A, + size_t lda, Sleef_quad *x, size_t incx, Sleef_quad *beta, Sleef_quad *y, size_t incy) +{ + return -1; // QBLAS is disabled, GEMV not available +} + +int +qblas_gemm(char layout, char transa, char transb, size_t m, size_t n, size_t k, Sleef_quad *alpha, + Sleef_quad *A, size_t lda, Sleef_quad *B, size_t ldb, Sleef_quad *beta, Sleef_quad *C, + size_t ldc) +{ + return -1; // QBLAS is disabled, GEMM not available +} + +int +qblas_supports_backend(QuadBackendType backend) +{ + return -1; // QBLAS is disabled, backend support not available +} + +PyObject * +py_quadblas_set_num_threads(PyObject *self, PyObject *args) +{ + // raise error + PyErr_SetString(PyExc_NotImplementedError, "QuadBLAS is disabled"); + return NULL; +} + +PyObject * +py_quadblas_get_num_threads(PyObject *self, PyObject *args) +{ + // raise error + PyErr_SetString(PyExc_NotImplementedError, "QuadBLAS is disabled"); + return NULL; +} + +PyObject * +py_quadblas_get_version(PyObject *self, PyObject *args) +{ + // raise error + PyErr_SetString(PyExc_NotImplementedError, "QuadBLAS is disabled"); + return NULL; +} + +int +_quadblas_set_num_threads(int num_threads) +{ + // raise error + PyErr_SetString(PyExc_NotImplementedError, "QuadBLAS is disabled"); + return -1; +} + +int +_quadblas_get_num_threads(void) +{ + // raise error + PyErr_SetString(PyExc_NotImplementedError, "QuadBLAS is disabled"); + return -1; +} + +#endif // DISABLE_QUADBLAS + +} // extern "C" \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/quadblas_interface.h b/quaddtype/numpy_quaddtype/src/quadblas_interface.h new file mode 100644 index 00000000..76033ebc --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/quadblas_interface.h @@ -0,0 +1,44 @@ +#ifndef QUADBLAS_INTERFACE_H +#define QUADBLAS_INTERFACE_H + +#include +#include +#include "quad_common.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int +qblas_dot(size_t n, Sleef_quad *x, size_t incx, Sleef_quad *y, size_t incy, Sleef_quad *result); + +int +qblas_gemv(char layout, char trans, size_t m, size_t n, Sleef_quad *alpha, Sleef_quad *A, + size_t lda, Sleef_quad *x, size_t incx, Sleef_quad *beta, Sleef_quad *y, size_t incy); + +int +qblas_gemm(char layout, char transa, char transb, size_t m, size_t n, size_t k, Sleef_quad *alpha, + Sleef_quad *A, size_t lda, Sleef_quad *B, size_t ldb, Sleef_quad *beta, Sleef_quad *C, + size_t ldc); + +int +qblas_supports_backend(QuadBackendType backend); + +PyObject * +py_quadblas_set_num_threads(PyObject *self, PyObject *args); +PyObject * +py_quadblas_get_num_threads(PyObject *self, PyObject *args); +PyObject * +py_quadblas_get_version(PyObject *self, PyObject *args); + +int +_quadblas_set_num_threads(int num_threads); +int +_quadblas_get_num_threads(void); + +#ifdef __cplusplus +} +#endif + +#endif // QUADBLAS_INTERFACE_H \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c index f2935299..1e8fd535 100644 --- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c +++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c @@ -14,49 +14,60 @@ #include "scalar.h" #include "dtype.h" -#include "umath.h" +#include "umath/umath.h" #include "quad_common.h" +#include "quadblas_interface.h" #include "float.h" - -static PyObject* py_is_longdouble_128(PyObject* self, PyObject* args) { - if(sizeof(long double) == 16 && - LDBL_MANT_DIG == 113 && - LDBL_MAX_EXP == 16384) { +static PyObject * +py_is_longdouble_128(PyObject *self, PyObject *args) +{ + if (sizeof(long double) == 16 && LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384) { Py_RETURN_TRUE; - } else { + } + else { Py_RETURN_FALSE; } } -static PyObject* get_sleef_constant(PyObject* self, PyObject* args) { - const char* constant_name; +static PyObject * +get_sleef_constant(PyObject *self, PyObject *args) +{ + const char *constant_name; if (!PyArg_ParseTuple(args, "s", &constant_name)) { return NULL; } - QuadPrecisionObject* result = QuadPrecision_raw_new(BACKEND_SLEEF); + QuadPrecisionObject *result = QuadPrecision_raw_new(BACKEND_SLEEF); if (result == NULL) { return NULL; } if (strcmp(constant_name, "pi") == 0) { result->value.sleef_value = SLEEF_M_PIq; - } else if (strcmp(constant_name, "e") == 0) { + } + else if (strcmp(constant_name, "e") == 0) { result->value.sleef_value = SLEEF_M_Eq; - } else if (strcmp(constant_name, "log2e") == 0) { + } + else if (strcmp(constant_name, "log2e") == 0) { result->value.sleef_value = SLEEF_M_LOG2Eq; - } else if (strcmp(constant_name, "log10e") == 0) { + } + else if (strcmp(constant_name, "log10e") == 0) { result->value.sleef_value = SLEEF_M_LOG10Eq; - } else if (strcmp(constant_name, "ln2") == 0) { + } + else if (strcmp(constant_name, "ln2") == 0) { result->value.sleef_value = SLEEF_M_LN2q; - } else if (strcmp(constant_name, "ln10") == 0) { + } + else if (strcmp(constant_name, "ln10") == 0) { result->value.sleef_value = SLEEF_M_LN10q; - } else if (strcmp(constant_name, "quad_max") == 0) { + } + else if (strcmp(constant_name, "quad_max") == 0) { result->value.sleef_value = SLEEF_QUAD_MAX; - } else if (strcmp(constant_name, "quad_min") == 0) { + } + else if (strcmp(constant_name, "quad_min") == 0) { result->value.sleef_value = SLEEF_QUAD_MIN; - } else if (strcmp(constant_name, "epsilon") == 0) { + } + else if (strcmp(constant_name, "epsilon") == 0) { result->value.sleef_value = SLEEF_QUAD_EPSILON; } else { @@ -65,22 +76,23 @@ static PyObject* get_sleef_constant(PyObject* self, PyObject* args) { return NULL; } - return (PyObject*)result; + return (PyObject *)result; } static PyMethodDef module_methods[] = { - {"is_longdouble_128", py_is_longdouble_128, METH_NOARGS, "Check if long double is 128-bit"}, - {"get_sleef_constant", get_sleef_constant, METH_VARARGS, "Get Sleef constant by name"}, - {NULL, NULL, 0, NULL} -}; + {"is_longdouble_128", py_is_longdouble_128, METH_NOARGS, "Check if long double is 128-bit"}, + {"get_sleef_constant", get_sleef_constant, METH_VARARGS, "Get Sleef constant by name"}, + {"set_num_threads", py_quadblas_set_num_threads, METH_VARARGS, + "Set number of threads for QuadBLAS"}, + {"get_num_threads", py_quadblas_get_num_threads, METH_NOARGS, + "Get number of threads for QuadBLAS"}, + {"get_quadblas_version", py_quadblas_get_version, METH_NOARGS, "Get QuadBLAS version"}, + {NULL, NULL, 0, NULL}}; static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - .m_name = "_quaddtype_main", + PyModuleDef_HEAD_INIT, .m_name = "_quaddtype_main", .m_doc = "Quad (128-bit) floating point Data Type for NumPy with multiple backends", - .m_size = -1, - .m_methods = module_methods -}; + .m_size = -1, .m_methods = module_methods}; PyMODINIT_FUNC PyInit__quaddtype_main(void) diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp deleted file mode 100644 index 0058236a..00000000 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ /dev/null @@ -1,806 +0,0 @@ -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NPY_TARGET_VERSION NPY_2_0_API_VERSION -#define NO_IMPORT_ARRAY -#define NO_IMPORT_UFUNC - -extern "C" { -#include -#include - -#include "numpy/arrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/ufuncobject.h" - -#include "numpy/dtype_api.h" -} -#include "quad_common.h" -#include "scalar.h" -#include "dtype.h" -#include "umath.h" -#include "ops.hpp" - -// helper debugging function -static const char * -get_dtype_name(PyArray_DTypeMeta *dtype) -{ - if (dtype == &QuadPrecDType) { - return "QuadPrecDType"; - } - else if (dtype == &PyArray_BoolDType) { - return "BoolDType"; - } - else if (dtype == &PyArray_ByteDType) { - return "ByteDType"; - } - else if (dtype == &PyArray_UByteDType) { - return "UByteDType"; - } - else if (dtype == &PyArray_ShortDType) { - return "ShortDType"; - } - else if (dtype == &PyArray_UShortDType) { - return "UShortDType"; - } - else if (dtype == &PyArray_IntDType) { - return "IntDType"; - } - else if (dtype == &PyArray_UIntDType) { - return "UIntDType"; - } - else if (dtype == &PyArray_LongDType) { - return "LongDType"; - } - else if (dtype == &PyArray_ULongDType) { - return "ULongDType"; - } - else if (dtype == &PyArray_LongLongDType) { - return "LongLongDType"; - } - else if (dtype == &PyArray_ULongLongDType) { - return "ULongLongDType"; - } - else if (dtype == &PyArray_FloatDType) { - return "FloatDType"; - } - else if (dtype == &PyArray_DoubleDType) { - return "DoubleDType"; - } - else if (dtype == &PyArray_LongDoubleDType) { - return "LongDoubleDType"; - } - else { - return "UnknownDType"; - } -} - -static NPY_CASTING -quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], - PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], - npy_intp *NPY_UNUSED(view_offset)) -{ - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - - if (given_descrs[1] == NULL) { - Py_INCREF(given_descrs[0]); - loop_descrs[1] = given_descrs[0]; - } - else { - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; - } - - QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)given_descrs[0]; - QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)loop_descrs[1]; - - if (descr_in->backend != descr_out->backend) { - return NPY_UNSAFE_CASTING; - } - - return NPY_NO_CASTING; -} - -template -int -quad_generic_unary_op_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - npy_intp in_stride = strides[0]; - npy_intp out_stride = strides[1]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); - - quad_value in, out; - while (N--) { - memcpy(&in, in_ptr, elem_size); - if (backend == BACKEND_SLEEF) { - out.sleef_value = sleef_op(&in.sleef_value); - } - else { - out.longdouble_value = longdouble_op(&in.longdouble_value); - } - memcpy(out_ptr, &out, elem_size); - - in_ptr += in_stride; - out_ptr += out_stride; - } - return 0; -} - -template -int -quad_generic_unary_op_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - npy_intp in_stride = strides[0]; - npy_intp out_stride = strides[1]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - - while (N--) { - if (backend == BACKEND_SLEEF) { - *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in_ptr); - } - else { - *(long double *)out_ptr = longdouble_op((long double *)in_ptr); - } - in_ptr += in_stride; - out_ptr += out_stride; - } - return 0; -} - -template -int -create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) -{ - PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); - if (ufunc == NULL) { - return -1; - } - - PyArray_DTypeMeta *dtypes[2] = {&QuadPrecDType, &QuadPrecDType}; - - PyType_Slot slots[] = { - {NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors}, - {NPY_METH_strided_loop, - (void *)&quad_generic_unary_op_strided_loop_aligned}, - {NPY_METH_unaligned_strided_loop, - (void *)&quad_generic_unary_op_strided_loop_unaligned}, - {0, NULL}}; - - PyArrayMethod_Spec Spec = { - .name = "quad_unary_op", - .nin = 1, - .nout = 1, - .casting = NPY_NO_CASTING, - .flags = NPY_METH_SUPPORTS_UNALIGNED, - .dtypes = dtypes, - .slots = slots, - }; - - if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { - return -1; - } - - return 0; -} - -int -init_quad_unary_ops(PyObject *numpy) -{ - if (create_quad_unary_ufunc(numpy, "negative") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "positive") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "absolute") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "rint") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "trunc") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "floor") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "ceil") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "sqrt") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "square") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log2") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log10") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log1p") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "exp") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "exp2") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "sin") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "cos") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "tan") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "arcsin") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "arccos") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "arctan") < 0) { - return -1; - } - return 0; -} - -// Binary ufuncs - -static NPY_CASTING -quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], - PyArray_Descr *const given_descrs[], - PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) -{ - QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; - QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; - QuadBackendType target_backend; - - // Determine target backend and if casting is needed - NPY_CASTING casting = NPY_NO_CASTING; - if (descr_in1->backend != descr_in2->backend) { - target_backend = BACKEND_LONGDOUBLE; - casting = NPY_SAFE_CASTING; - } - else { - target_backend = descr_in1->backend; - } - - // Set up input descriptors, casting if necessary - for (int i = 0; i < 2; i++) { - if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { - loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); - if (!loop_descrs[i]) { - return (NPY_CASTING)-1; - } - } - else { - Py_INCREF(given_descrs[i]); - loop_descrs[i] = given_descrs[i]; - } - } - - // Set up output descriptor - if (given_descrs[2] == NULL) { - loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); - if (!loop_descrs[2]) { - return (NPY_CASTING)-1; - } - } - else { - QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2]; - if (descr_out->backend != target_backend) { - loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); - if (!loop_descrs[2]) { - return (NPY_CASTING)-1; - } - } - else { - Py_INCREF(given_descrs[2]); - loop_descrs[2] = given_descrs[2]; - } - } - return casting; -} - -template -int -quad_generic_binop_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); - - quad_value in1, in2, out; - while (N--) { - memcpy(&in1, in1_ptr, elem_size); - memcpy(&in2, in2_ptr, elem_size); - if (backend == BACKEND_SLEEF) { - out.sleef_value = sleef_op(&in1.sleef_value, &in2.sleef_value); - } - else { - out.longdouble_value = longdouble_op(&in1.longdouble_value, &in2.longdouble_value); - } - memcpy(out_ptr, &out, elem_size); - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - -template -int -quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - - while (N--) { - if (backend == BACKEND_SLEEF) { - *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr); - } - else { - *(long double *)out_ptr = longdouble_op((long double *)in1_ptr, (long double *)in2_ptr); - } - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - -static int -quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], - PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) -{ - int nin = ufunc->nin; - int nargs = ufunc->nargs; - PyArray_DTypeMeta *common = NULL; - bool has_quad = false; - - // Handle the special case for reductions - if (op_dtypes[0] == NULL) { - assert(nin == 2 && ufunc->nout == 1); /* must be reduction */ - for (int i = 0; i < 3; i++) { - Py_INCREF(op_dtypes[1]); - new_op_dtypes[i] = op_dtypes[1]; - } - return 0; - } - - // Check if any input or signature is QuadPrecision - for (int i = 0; i < nin; i++) { - if (op_dtypes[i] == &QuadPrecDType) { - has_quad = true; - } - } - - if (has_quad) { - common = &QuadPrecDType; - } - else { - for (int i = nin; i < nargs; i++) { - if (signature[i] != NULL) { - if (common == NULL) { - Py_INCREF(signature[i]); - common = signature[i]; - } - else if (common != signature[i]) { - Py_CLEAR(common); // Not homogeneous, unset common - break; - } - } - } - } - // If no common output dtype, use standard promotion for inputs - if (common == NULL) { - common = PyArray_PromoteDTypeSequence(nin, op_dtypes); - if (common == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) { - PyErr_Clear(); // Do not propagate normal promotion errors - } - - return -1; - } - } - - // Set all new_op_dtypes to the common dtype - for (int i = 0; i < nargs; i++) { - if (signature[i]) { - // If signature is specified for this argument, use it - Py_INCREF(signature[i]); - new_op_dtypes[i] = signature[i]; - } - else { - // Otherwise, use the common dtype - Py_INCREF(common); - - new_op_dtypes[i] = common; - } - } - - Py_XDECREF(common); - - return 0; -} - -template -int -create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) -{ - PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); - if (ufunc == NULL) { - return -1; - } - - PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType}; - - PyType_Slot slots[] = { - {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors}, - {NPY_METH_strided_loop, - (void *)&quad_generic_binop_strided_loop_aligned}, - {NPY_METH_unaligned_strided_loop, - (void *)&quad_generic_binop_strided_loop_unaligned}, - {0, NULL}}; - - PyArrayMethod_Spec Spec = { - .name = "quad_binop", - .nin = 2, - .nout = 1, - .casting = NPY_NO_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE), - .dtypes = dtypes, - .slots = slots, - }; - - if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { - return -1; - } - - PyObject *promoter_capsule = - PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL); - if (promoter_capsule == NULL) { - return -1; - } - - PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type); - if (DTypes == 0) { - Py_DECREF(promoter_capsule); - return -1; - } - - if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - return -1; - } - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - return 0; -} - -int -init_quad_binary_ops(PyObject *numpy) -{ - if (create_quad_binary_ufunc(numpy, "add") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "subtract") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "multiply") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "divide") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "power") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "mod") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "minimum") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "maximum") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "arctan2") < 0) { - return -1; - } - return 0; -} - -// comparison functions - -static NPY_CASTING -quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], - PyArray_Descr *const given_descrs[], - PyArray_Descr *loop_descrs[], - npy_intp *NPY_UNUSED(view_offset)) -{ - QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; - QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; - QuadBackendType target_backend; - - // As dealing with different backends then cast to boolean - NPY_CASTING casting = NPY_NO_CASTING; - if (descr_in1->backend != descr_in2->backend) { - target_backend = BACKEND_LONGDOUBLE; - casting = NPY_SAFE_CASTING; - } - else { - target_backend = descr_in1->backend; - } - - // Set up input descriptors, casting if necessary - for (int i = 0; i < 2; i++) { - if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { - loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); - if (!loop_descrs[i]) { - return (NPY_CASTING)-1; - } - } - else { - Py_INCREF(given_descrs[i]); - loop_descrs[i] = given_descrs[i]; - } - } - - // Set up output descriptor - loop_descrs[2] = PyArray_DescrFromType(NPY_BOOL); - if (!loop_descrs[2]) { - return (NPY_CASTING)-1; - } - return casting; -} - -template -int -quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); - - quad_value in1, in2; - while (N--) { - memcpy(&in1, in1_ptr, elem_size); - memcpy(&in2, in2_ptr, elem_size); - npy_bool result; - - if (backend == BACKEND_SLEEF) { - result = sleef_comp(&in1.sleef_value, &in2.sleef_value); - } - else { - result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); - } - - memcpy(out_ptr, &result, sizeof(npy_bool)); - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - -template -int -quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - while (N--) { - quad_value in1 = *(quad_value *)in1_ptr; - quad_value in2 = *(quad_value *)in2_ptr; - - npy_bool result; - - if (backend == BACKEND_SLEEF) { - result = sleef_comp(&in1.sleef_value, &in2.sleef_value); - } - else { - result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); - } - - *(npy_bool *)out_ptr = result; - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - -NPY_NO_EXPORT int -comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], - PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) -{ - PyArray_DTypeMeta *new_signature[NPY_MAXARGS]; - memcpy(new_signature, signature, 3 * sizeof(PyArray_DTypeMeta *)); - new_signature[2] = NULL; - int res = quad_ufunc_promoter(ufunc, op_dtypes, new_signature, new_op_dtypes); - if (res < 0) { - return -1; - } - Py_XSETREF(new_op_dtypes[2], &PyArray_BoolDType); - return 0; -} - -template -int -create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) -{ - PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); - if (ufunc == NULL) { - return -1; - } - - PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType}; - - PyType_Slot slots[] = { - {NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors}, - {NPY_METH_strided_loop, - (void *)&quad_generic_comp_strided_loop_aligned}, - {NPY_METH_unaligned_strided_loop, - (void *)&quad_generic_comp_strided_loop}, - {0, NULL}}; - - PyArrayMethod_Spec Spec = { - .name = "quad_comp", - .nin = 2, - .nout = 1, - .casting = NPY_SAFE_CASTING, - .flags = NPY_METH_SUPPORTS_UNALIGNED, - .dtypes = dtypes, - .slots = slots, - }; - - if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { - return -1; - } - - PyObject *promoter_capsule = - PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL); - if (promoter_capsule == NULL) { - return -1; - } - - PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArray_BoolDType); - if (DTypes == 0) { - Py_DECREF(promoter_capsule); - return -1; - } - - if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - return -1; - } - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - - return 0; -} - -int -init_quad_comps(PyObject *numpy) -{ - if (create_quad_comparison_ufunc(numpy, "equal") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "not_equal") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "less") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "less_equal") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "greater") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "greater_equal") < - 0) { - return -1; - } - - return 0; -} - -int -init_quad_umath(void) -{ - PyObject *numpy = PyImport_ImportModule("numpy"); - if (!numpy) { - PyErr_SetString(PyExc_ImportError, "Failed to import numpy module"); - return -1; - } - - if (init_quad_unary_ops(numpy) < 0) { - PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad unary operations"); - goto err; - } - - if (init_quad_binary_ops(numpy) < 0) { - PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad binary operations"); - goto err; - } - - if (init_quad_comps(numpy) < 0) { - PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad comparison operations"); - goto err; - } - - Py_DECREF(numpy); - return 0; - -err: - Py_DECREF(numpy); - return -1; -} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/binary_ops.cpp b/quaddtype/numpy_quaddtype/src/umath/binary_ops.cpp new file mode 100644 index 00000000..aa6d19ce --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/binary_ops.cpp @@ -0,0 +1,235 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + + +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" +#include "numpy/dtype_api.h" +#include "numpy/ndarraytypes.h" + +#include "../quad_common.h" +#include "../scalar.h" +#include "../dtype.h" +#include "../ops.hpp" +#include "promoters.hpp" +#include "binary_ops.h" + +static NPY_CASTING +quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], + PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) +{ + QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + QuadBackendType target_backend; + + // Determine target backend and if casting is needed + NPY_CASTING casting = NPY_NO_CASTING; + if (descr_in1->backend != descr_in2->backend) { + target_backend = BACKEND_LONGDOUBLE; + casting = NPY_SAFE_CASTING; + } + else { + target_backend = descr_in1->backend; + } + + // Set up input descriptors, casting if necessary + for (int i = 0; i < 2; i++) { + if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { + loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[i]) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[i]); + loop_descrs[i] = given_descrs[i]; + } + } + + // Set up output descriptor + if (given_descrs[2] == NULL) { + loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + } + else { + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2]; + if (descr_out->backend != target_backend) { + loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[2]); + loop_descrs[2] = given_descrs[2]; + } + } + return casting; +} + +template +int +quad_generic_binop_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in1, in2, out; + while (N--) { + memcpy(&in1, in1_ptr, elem_size); + memcpy(&in2, in2_ptr, elem_size); + if (backend == BACKEND_SLEEF) { + out.sleef_value = sleef_op(&in1.sleef_value, &in2.sleef_value); + } + else { + out.longdouble_value = longdouble_op(&in1.longdouble_value, &in2.longdouble_value); + } + memcpy(out_ptr, &out, elem_size); + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + + while (N--) { + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr); + } + else { + *(long double *)out_ptr = longdouble_op((long double *)in1_ptr, (long double *)in2_ptr); + } + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + + +template +int +create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) +{ + PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); + if (ufunc == NULL) { + return -1; + } + + PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType}; + + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors}, + {NPY_METH_strided_loop, + (void *)&quad_generic_binop_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_binop_strided_loop_unaligned}, + {0, NULL}}; + + PyArrayMethod_Spec Spec = { + .name = "quad_binop", + .nin = 2, + .nout = 1, + .casting = NPY_NO_CASTING, + .flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE), + .dtypes = dtypes, + .slots = slots, + }; + + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { + return -1; + } + + PyObject *promoter_capsule = + PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL); + if (promoter_capsule == NULL) { + return -1; + } + + PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type); + if (DTypes == 0) { + Py_DECREF(promoter_capsule); + return -1; + } + + if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + return -1; + } + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + return 0; +} + + +int +init_quad_binary_ops(PyObject *numpy) +{ + if (create_quad_binary_ufunc(numpy, "add") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "subtract") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "multiply") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "divide") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "power") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "mod") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "minimum") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "maximum") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "arctan2") < 0) { + return -1; + } + return 0; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/binary_ops.h b/quaddtype/numpy_quaddtype/src/umath/binary_ops.h new file mode 100644 index 00000000..c45ec453 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/binary_ops.h @@ -0,0 +1,9 @@ +#ifndef _QUADDTYPE_BINARY_OPS_H +#define _QUADDTYPE_BINARY_OPS_H + +#include + +int +init_quad_binary_ops(PyObject *numpy); + +#endif \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/comparison_ops.cpp b/quaddtype/numpy_quaddtype/src/umath/comparison_ops.cpp new file mode 100644 index 00000000..095b6d39 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/comparison_ops.cpp @@ -0,0 +1,240 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + + +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" +#include "numpy/dtype_api.h" +#include "numpy/ndarraytypes.h" + +#include "../quad_common.h" +#include "../scalar.h" +#include "../dtype.h" +#include "umath.h" +#include "../ops.hpp" +#include "promoters.hpp" +#include "binary_ops.h" +#include "comparison_ops.h" + + +static NPY_CASTING +quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], + PyArray_Descr *loop_descrs[], + npy_intp *NPY_UNUSED(view_offset)) +{ + QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + QuadBackendType target_backend; + + // As dealing with different backends then cast to boolean + NPY_CASTING casting = NPY_NO_CASTING; + if (descr_in1->backend != descr_in2->backend) { + target_backend = BACKEND_LONGDOUBLE; + casting = NPY_SAFE_CASTING; + } + else { + target_backend = descr_in1->backend; + } + + // Set up input descriptors, casting if necessary + for (int i = 0; i < 2; i++) { + if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { + loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[i]) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[i]); + loop_descrs[i] = given_descrs[i]; + } + } + + // Set up output descriptor + loop_descrs[2] = PyArray_DescrFromType(NPY_BOOL); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + return casting; +} + +template +int +quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in1, in2; + while (N--) { + memcpy(&in1, in1_ptr, elem_size); + memcpy(&in2, in2_ptr, elem_size); + npy_bool result; + + if (backend == BACKEND_SLEEF) { + result = sleef_comp(&in1.sleef_value, &in2.sleef_value); + } + else { + result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); + } + + memcpy(out_ptr, &result, sizeof(npy_bool)); + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + while (N--) { + quad_value in1 = *(quad_value *)in1_ptr; + quad_value in2 = *(quad_value *)in2_ptr; + + npy_bool result; + + if (backend == BACKEND_SLEEF) { + result = sleef_comp(&in1.sleef_value, &in2.sleef_value); + } + else { + result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); + } + + *(npy_bool *)out_ptr = result; + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + +NPY_NO_EXPORT int +comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], + PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) +{ + PyArray_DTypeMeta *new_signature[NPY_MAXARGS]; + memcpy(new_signature, signature, 3 * sizeof(PyArray_DTypeMeta *)); + new_signature[2] = NULL; + int res = quad_ufunc_promoter(ufunc, op_dtypes, new_signature, new_op_dtypes); + if (res < 0) { + return -1; + } + Py_XSETREF(new_op_dtypes[2], &PyArray_BoolDType); + return 0; +} + +template +int +create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) +{ + PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); + if (ufunc == NULL) { + return -1; + } + + PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType}; + + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors}, + {NPY_METH_strided_loop, + (void *)&quad_generic_comp_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_comp_strided_loop}, + {0, NULL}}; + + PyArrayMethod_Spec Spec = { + .name = "quad_comp", + .nin = 2, + .nout = 1, + .casting = NPY_SAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { + return -1; + } + + PyObject *promoter_capsule = + PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL); + if (promoter_capsule == NULL) { + return -1; + } + + PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArray_BoolDType); + if (DTypes == 0) { + Py_DECREF(promoter_capsule); + return -1; + } + + if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + return -1; + } + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + + return 0; +} + +int +init_quad_comps(PyObject *numpy) +{ + if (create_quad_comparison_ufunc(numpy, "equal") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "not_equal") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "less") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "less_equal") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "greater") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "greater_equal") < + 0) { + return -1; + } + + return 0; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/comparison_ops.h b/quaddtype/numpy_quaddtype/src/umath/comparison_ops.h new file mode 100644 index 00000000..e3b8cc0e --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/comparison_ops.h @@ -0,0 +1,9 @@ +#ifndef _QUADDTYPE_COMPARISON_OPS_H +#define _QUADDTYPE_COMPARISON_OPS_H + +#include + +int +init_quad_comps(PyObject *numpy); + +#endif \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/matmul.cpp b/quaddtype/numpy_quaddtype/src/umath/matmul.cpp new file mode 100644 index 00000000..354a342d --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/matmul.cpp @@ -0,0 +1,450 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + +extern "C" { +#include +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/ufuncobject.h" +#include "numpy/dtype_api.h" +} + +#include "../quad_common.h" +#include "../scalar.h" +#include "../dtype.h" +#include "../ops.hpp" +#include "matmul.h" +#include "promoters.hpp" +#include "../quadblas_interface.h" + +static NPY_CASTING +quad_matmul_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], + npy_intp *NPY_UNUSED(view_offset)) +{ + QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + + // QBLAS only supports SLEEF backend + if (descr_in1->backend != BACKEND_SLEEF || descr_in2->backend != BACKEND_SLEEF) { + PyErr_SetString(PyExc_NotImplementedError, + "QBLAS-accelerated matmul only supports SLEEF backend. " + "Other backends are not supported with QBLAS."); + return (NPY_CASTING)-1; + } + + // Both inputs must use SLEEF backend + QuadBackendType target_backend = BACKEND_SLEEF; + NPY_CASTING casting = NPY_NO_CASTING; + + // Set up input descriptors + for (int i = 0; i < 2; i++) { + Py_INCREF(given_descrs[i]); + loop_descrs[i] = given_descrs[i]; + } + + // Set up output descriptor + if (given_descrs[2] == NULL) { + loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + } + else { + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2]; + if (descr_out->backend != target_backend) { + PyErr_SetString(PyExc_NotImplementedError, + "QBLAS-accelerated matmul only supports SLEEF backend for output."); + return (NPY_CASTING)-1; + } + else { + Py_INCREF(given_descrs[2]); + loop_descrs[2] = given_descrs[2]; + } + } + return casting; +} + +enum MatmulOperationType { + MATMUL_DOT, + MATMUL_GEMV, + MATMUL_GEMM +}; + +static MatmulOperationType +determine_operation_type(npy_intp m, npy_intp n, npy_intp p) +{ + if (m == 1 && p == 1) { + return MATMUL_DOT; + } + else if (p == 1) { + return MATMUL_GEMV; + } + else { + return MATMUL_GEMM; + } +} + +static int +quad_matmul_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + // Extract dimensions + npy_intp N = dimensions[0]; // Batch size, this remains always 1 for matmul afaik + npy_intp m = dimensions[1]; // Rows of first matrix + npy_intp n = dimensions[2]; // Cols of first matrix / rows of second matrix + npy_intp p = dimensions[3]; // Cols of second matrix + + // batch strides + npy_intp A_stride = strides[0]; + npy_intp B_stride = strides[1]; + npy_intp C_stride = strides[2]; + + // core strides for matrix dimensions + npy_intp A_row_stride = strides[3]; + npy_intp A_col_stride = strides[4]; + npy_intp B_row_stride = strides[5]; + npy_intp B_col_stride = strides[6]; + npy_intp C_row_stride = strides[7]; + npy_intp C_col_stride = strides[8]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + if (descr->backend != BACKEND_SLEEF) { + PyErr_SetString(PyExc_RuntimeError, "Internal error: non-SLEEF backend in QBLAS matmul"); + return -1; + } + + MatmulOperationType op_type = determine_operation_type(m, n, p); + Sleef_quad alpha = Sleef_cast_from_doubleq1(1.0); + Sleef_quad beta = Sleef_cast_from_doubleq1(0.0); + + char *A = data[0]; + char *B = data[1]; + char *C = data[2]; + + Sleef_quad *A_ptr = (Sleef_quad *)A; + Sleef_quad *B_ptr = (Sleef_quad *)B; + Sleef_quad *C_ptr = (Sleef_quad *)C; + + int result = -1; + + switch (op_type) { + case MATMUL_DOT: { + size_t incx = A_col_stride / sizeof(Sleef_quad); + size_t incy = B_row_stride / sizeof(Sleef_quad); + + result = qblas_dot(n, A_ptr, incx, B_ptr, incy, C_ptr); + break; + } + + case MATMUL_GEMV: { + size_t lda = A_row_stride / sizeof(Sleef_quad); + size_t incx = B_row_stride / sizeof(Sleef_quad); + size_t incy = C_row_stride / sizeof(Sleef_quad); + + memset(C_ptr, 0, m * p * sizeof(Sleef_quad)); + + result = + qblas_gemv('R', 'N', m, n, &alpha, A_ptr, lda, B_ptr, incx, &beta, C_ptr, incy); + break; + } + + case MATMUL_GEMM: { + size_t lda = A_row_stride / sizeof(Sleef_quad); + size_t ldb = B_row_stride / sizeof(Sleef_quad); + size_t ldc_numpy = C_row_stride / sizeof(Sleef_quad); + + memset(C_ptr, 0, m * p * sizeof(Sleef_quad)); + + size_t ldc_temp = p; + + result = qblas_gemm('R', 'N', 'N', m, p, n, &alpha, A_ptr, lda, B_ptr, ldb, &beta, + C_ptr, ldc_numpy); + break; + } + } + + if (result != 0) { + PyErr_SetString(PyExc_RuntimeError, "QBLAS operation failed"); + return -1; + } + + return 0; +} + +static int +quad_matmul_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + // Extract dimensions + npy_intp N = dimensions[0]; // Batch size, this remains always 1 for matmul afaik + npy_intp m = dimensions[1]; // Rows of first matrix + npy_intp n = dimensions[2]; // Cols of first matrix / rows of second matrix + npy_intp p = dimensions[3]; // Cols of second matrix + + // batch strides + npy_intp A_stride = strides[0]; + npy_intp B_stride = strides[1]; + npy_intp C_stride = strides[2]; + + // core strides for matrix dimensions + npy_intp A_row_stride = strides[3]; + npy_intp A_col_stride = strides[4]; + npy_intp B_row_stride = strides[5]; + npy_intp B_col_stride = strides[6]; + npy_intp C_row_stride = strides[7]; + npy_intp C_col_stride = strides[8]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + if (descr->backend != BACKEND_SLEEF) { + PyErr_SetString(PyExc_RuntimeError, "Internal error: non-SLEEF backend in QBLAS matmul"); + return -1; + } + + MatmulOperationType op_type = determine_operation_type(m, n, p); + Sleef_quad alpha = Sleef_cast_from_doubleq1(1.0); + Sleef_quad beta = Sleef_cast_from_doubleq1(0.0); + + char *A = data[0]; + char *B = data[1]; + char *C = data[2]; + + Sleef_quad *A_ptr = (Sleef_quad *)A; + Sleef_quad *B_ptr = (Sleef_quad *)B; + Sleef_quad *C_ptr = (Sleef_quad *)C; + + int result = -1; + + switch (op_type) { + case MATMUL_DOT: { + Sleef_quad *temp_A_buffer = new Sleef_quad[n]; + Sleef_quad *temp_B_buffer = new Sleef_quad[n]; + + memcpy(temp_A_buffer, A_ptr, n * sizeof(Sleef_quad)); + memcpy(temp_B_buffer, B_ptr, n * sizeof(Sleef_quad)); + + size_t incx = 1; + size_t incy = 1; + + result = qblas_dot(n, temp_A_buffer, incx, temp_B_buffer, incy, C_ptr); + + delete[] temp_A_buffer; + delete[] temp_B_buffer; + break; + } + + case MATMUL_GEMV: { + size_t lda = A_row_stride / sizeof(Sleef_quad); + size_t incx = B_row_stride / sizeof(Sleef_quad); + size_t incy = C_row_stride / sizeof(Sleef_quad); + + Sleef_quad *temp_A_buffer = new Sleef_quad[m * n]; + Sleef_quad *temp_B_buffer = new Sleef_quad[n * p]; + memcpy(temp_A_buffer, A_ptr, m * n * sizeof(Sleef_quad)); + memcpy(temp_B_buffer, B_ptr, n * p * sizeof(Sleef_quad)); + A_ptr = temp_A_buffer; + B_ptr = temp_B_buffer; + + // Use temp_C_buffer to avoid unaligned writes + Sleef_quad *temp_C_buffer = new Sleef_quad[m * p]; + + lda = n; + incx = 1; + incy = 1; + + memset(temp_C_buffer, 0, m * p * sizeof(Sleef_quad)); + + result = qblas_gemv('R', 'N', m, n, &alpha, A_ptr, lda, B_ptr, incx, &beta, + temp_C_buffer, incy); + break; + } + + case MATMUL_GEMM: { + size_t lda = A_row_stride / sizeof(Sleef_quad); + size_t ldb = B_row_stride / sizeof(Sleef_quad); + size_t ldc_numpy = C_row_stride / sizeof(Sleef_quad); + + Sleef_quad *temp_A_buffer = new Sleef_quad[m * n]; + Sleef_quad *temp_B_buffer = new Sleef_quad[n * p]; + memcpy(temp_A_buffer, A_ptr, m * n * sizeof(Sleef_quad)); + memcpy(temp_B_buffer, B_ptr, n * p * sizeof(Sleef_quad)); + A_ptr = temp_A_buffer; + B_ptr = temp_B_buffer; + + // since these are now contiguous so, + lda = n; + ldb = p; + size_t ldc_temp = p; + + Sleef_quad *temp_C_buffer = new Sleef_quad[m * p]; + memset(temp_C_buffer, 0, m * p * sizeof(Sleef_quad)); + + result = qblas_gemm('R', 'N', 'N', m, p, n, &alpha, A_ptr, lda, B_ptr, ldb, &beta, + temp_C_buffer, ldc_temp); + + if (result == 0) { + memcpy(C_ptr, temp_C_buffer, m * p * sizeof(Sleef_quad)); + } + + delete[] temp_C_buffer; + delete[] temp_A_buffer; + delete[] temp_B_buffer; + break; + } + } + + if (result != 0) { + PyErr_SetString(PyExc_RuntimeError, "QBLAS operation failed"); + return -1; + } + + return 0; +} + +static int +naive_matmul_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + npy_intp m = dimensions[1]; + npy_intp n = dimensions[2]; + npy_intp p = dimensions[3]; + + npy_intp A_batch_stride = strides[0]; + npy_intp B_stride = strides[1]; + npy_intp C_stride = strides[2]; + + npy_intp A_row_stride = strides[3]; + npy_intp A_col_stride = strides[4]; + npy_intp B_row_stride = strides[5]; + npy_intp B_col_stride = strides[6]; + npy_intp C_row_stride = strides[7]; + npy_intp C_col_stride = strides[8]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + char *A = data[0]; + char *B = data[1]; + char *C = data[2]; + + for (npy_intp i = 0; i < m; i++) { + for (npy_intp j = 0; j < p; j++) { + char *C_ij = C + i * C_row_stride + j * C_col_stride; + + if (backend == BACKEND_SLEEF) { + Sleef_quad sum = Sleef_cast_from_doubleq1(0.0); + + for (npy_intp k = 0; k < n; k++) { + char *A_ik = A + i * A_row_stride + k * A_col_stride; + char *B_kj = B + k * B_row_stride + j * B_col_stride; + + Sleef_quad a_val, b_val; + memcpy(&a_val, A_ik, sizeof(Sleef_quad)); + memcpy(&b_val, B_kj, sizeof(Sleef_quad)); + sum = Sleef_fmaq1_u05(a_val, b_val, sum); + } + + memcpy(C_ij, &sum, sizeof(Sleef_quad)); + } + else { + long double sum = 0.0L; + + for (npy_intp k = 0; k < n; k++) { + char *A_ik = A + i * A_row_stride + k * A_col_stride; + char *B_kj = B + k * B_row_stride + j * B_col_stride; + + long double a_val, b_val; + memcpy(&a_val, A_ik, sizeof(long double)); + memcpy(&b_val, B_kj, sizeof(long double)); + + sum += a_val * b_val; + } + + memcpy(C_ij, &sum, sizeof(long double)); + } + } + } + + return 0; +} + +int +init_matmul_ops(PyObject *numpy) +{ + PyObject *ufunc = PyObject_GetAttrString(numpy, "matmul"); + if (ufunc == NULL) { + return -1; + } + + PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType}; + +#ifndef DISABLE_QUADBLAS + // set threading to max + int num_threads = _quadblas_get_num_threads(); + _quadblas_set_num_threads(num_threads); + + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_matmul_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&quad_matmul_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_matmul_strided_loop_unaligned}, + {0, NULL}}; +#else + PyType_Slot slots[] = {{NPY_METH_resolve_descriptors, (void *)&quad_matmul_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&naive_matmul_strided_loop}, + {NPY_METH_unaligned_strided_loop, (void *)&naive_matmul_strided_loop}, + {0, NULL}}; +#endif // DISABLE_QUADBLAS + + PyArrayMethod_Spec Spec = { + .name = "quad_matmul_qblas", + .nin = 2, + .nout = 1, + .casting = NPY_NO_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { + Py_DECREF(ufunc); + return -1; + } + + PyObject *promoter_capsule = + PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL); + if (promoter_capsule == NULL) { + Py_DECREF(ufunc); + return -1; + } + + PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type); + if (DTypes == NULL) { + Py_DECREF(promoter_capsule); + Py_DECREF(ufunc); + return -1; + } + + if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { + PyErr_Clear(); + } + else { + } + + Py_DECREF(DTypes); + Py_DECREF(promoter_capsule); + Py_DECREF(ufunc); + + return 0; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/matmul.h b/quaddtype/numpy_quaddtype/src/umath/matmul.h new file mode 100644 index 00000000..12858497 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/matmul.h @@ -0,0 +1,17 @@ +#ifndef _QUADDTYPE_MATMUL_H +#define _QUADDTYPE_MATMUL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int +init_matmul_ops(PyObject *numpy); + +#ifdef __cplusplus +} +#endif + +#endif // _QUADDTYPE_MATMUL_H \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/promoters.hpp b/quaddtype/numpy_quaddtype/src/umath/promoters.hpp new file mode 100644 index 00000000..3b3c1ef3 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/promoters.hpp @@ -0,0 +1,90 @@ +#ifndef _QUADDTYPE_PROMOTERS +#define _QUADDTYPE_PROMOTERS + +#include +#include +#include +#include "numpy/arrayobject.h" +#include "numpy/ndarrayobject.h" +#include "numpy/ufuncobject.h" +#include "numpy/dtype_api.h" + +#include "../dtype.h" + +inline int +quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], + PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) +{ + int nin = ufunc->nin; + int nargs = ufunc->nargs; + PyArray_DTypeMeta *common = NULL; + bool has_quad = false; + + // Handle the special case for reductions + if (op_dtypes[0] == NULL) { + assert(nin == 2 && ufunc->nout == 1); /* must be reduction */ + for (int i = 0; i < 3; i++) { + Py_INCREF(op_dtypes[1]); + new_op_dtypes[i] = op_dtypes[1]; + } + return 0; + } + + // Check if any input or signature is QuadPrecision + for (int i = 0; i < nin; i++) { + if (op_dtypes[i] == &QuadPrecDType) { + has_quad = true; + } + } + + if (has_quad) { + common = &QuadPrecDType; + } + else { + for (int i = nin; i < nargs; i++) { + if (signature[i] != NULL) { + if (common == NULL) { + Py_INCREF(signature[i]); + common = signature[i]; + } + else if (common != signature[i]) { + Py_CLEAR(common); // Not homogeneous, unset common + break; + } + } + } + } + // If no common output dtype, use standard promotion for inputs + if (common == NULL) { + common = PyArray_PromoteDTypeSequence(nin, op_dtypes); + if (common == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); // Do not propagate normal promotion errors + } + + return -1; + } + } + + // Set all new_op_dtypes to the common dtype + for (int i = 0; i < nargs; i++) { + if (signature[i]) { + // If signature is specified for this argument, use it + Py_INCREF(signature[i]); + new_op_dtypes[i] = signature[i]; + } + else { + // Otherwise, use the common dtype + Py_INCREF(common); + + new_op_dtypes[i] = common; + } + } + + Py_XDECREF(common); + + return 0; +} + + +#endif \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/umath.cpp b/quaddtype/numpy_quaddtype/src/umath/umath.cpp new file mode 100644 index 00000000..50f95623 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/umath.cpp @@ -0,0 +1,116 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + +extern "C" { +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/ufuncobject.h" +#include "numpy/dtype_api.h" +} +#include "../quad_common.h" +#include "../scalar.h" +#include "../dtype.h" +#include "umath.h" +#include "../ops.hpp" +#include "unary_ops.h" +#include "binary_ops.h" +#include "comparison_ops.h" +#include "matmul.h" + +// helper debugging function +static const char * +get_dtype_name(PyArray_DTypeMeta *dtype) +{ + if (dtype == &QuadPrecDType) { + return "QuadPrecDType"; + } + else if (dtype == &PyArray_BoolDType) { + return "BoolDType"; + } + else if (dtype == &PyArray_ByteDType) { + return "ByteDType"; + } + else if (dtype == &PyArray_UByteDType) { + return "UByteDType"; + } + else if (dtype == &PyArray_ShortDType) { + return "ShortDType"; + } + else if (dtype == &PyArray_UShortDType) { + return "UShortDType"; + } + else if (dtype == &PyArray_IntDType) { + return "IntDType"; + } + else if (dtype == &PyArray_UIntDType) { + return "UIntDType"; + } + else if (dtype == &PyArray_LongDType) { + return "LongDType"; + } + else if (dtype == &PyArray_ULongDType) { + return "ULongDType"; + } + else if (dtype == &PyArray_LongLongDType) { + return "LongLongDType"; + } + else if (dtype == &PyArray_ULongLongDType) { + return "ULongLongDType"; + } + else if (dtype == &PyArray_FloatDType) { + return "FloatDType"; + } + else if (dtype == &PyArray_DoubleDType) { + return "DoubleDType"; + } + else if (dtype == &PyArray_LongDoubleDType) { + return "LongDoubleDType"; + } + else { + return "UnknownDType"; + } +} + +int +init_quad_umath(void) +{ + PyObject *numpy = PyImport_ImportModule("numpy"); + if (!numpy) { + PyErr_SetString(PyExc_ImportError, "Failed to import numpy module"); + return -1; + } + + if (init_quad_unary_ops(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad unary operations"); + goto err; + } + + if (init_quad_binary_ops(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad binary operations"); + goto err; + } + + if (init_quad_comps(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad comparison operations"); + goto err; + } + + if (init_matmul_ops(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad matrix multiplication operations"); + goto err; + } + + Py_DECREF(numpy); + return 0; + +err: + Py_DECREF(numpy); + return -1; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath.h b/quaddtype/numpy_quaddtype/src/umath/umath.h similarity index 95% rename from quaddtype/numpy_quaddtype/src/umath.h rename to quaddtype/numpy_quaddtype/src/umath/umath.h index d64f26be..c9253ef7 100644 --- a/quaddtype/numpy_quaddtype/src/umath.h +++ b/quaddtype/numpy_quaddtype/src/umath/umath.h @@ -12,4 +12,4 @@ init_quad_umath(void); } #endif -#endif +#endif \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/unary_ops.cpp b/quaddtype/numpy_quaddtype/src/umath/unary_ops.cpp new file mode 100644 index 00000000..4c8f31f0 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/unary_ops.cpp @@ -0,0 +1,214 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + +extern "C" { +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/ufuncobject.h" + +#include "numpy/dtype_api.h" +} +#include "../quad_common.h" +#include "../scalar.h" +#include "../dtype.h" +#include "../ops.hpp" + +static NPY_CASTING +quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], + npy_intp *NPY_UNUSED(view_offset)) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + Py_INCREF(given_descrs[0]); + loop_descrs[1] = given_descrs[0]; + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)loop_descrs[1]; + + if (descr_in->backend != descr_out->backend) { + return NPY_UNSAFE_CASTING; + } + + return NPY_NO_CASTING; +} + +template +int +quad_generic_unary_op_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in, out; + while (N--) { + memcpy(&in, in_ptr, elem_size); + if (backend == BACKEND_SLEEF) { + out.sleef_value = sleef_op(&in.sleef_value); + } + else { + out.longdouble_value = longdouble_op(&in.longdouble_value); + } + memcpy(out_ptr, &out, elem_size); + + in_ptr += in_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +quad_generic_unary_op_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + + while (N--) { + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in_ptr); + } + else { + *(long double *)out_ptr = longdouble_op((long double *)in_ptr); + } + in_ptr += in_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) +{ + PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); + if (ufunc == NULL) { + return -1; + } + + PyArray_DTypeMeta *dtypes[2] = {&QuadPrecDType, &QuadPrecDType}; + + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors}, + {NPY_METH_strided_loop, + (void *)&quad_generic_unary_op_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_unary_op_strided_loop_unaligned}, + {0, NULL}}; + + PyArrayMethod_Spec Spec = { + .name = "quad_unary_op", + .nin = 1, + .nout = 1, + .casting = NPY_NO_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { + return -1; + } + + return 0; +} + +int +init_quad_unary_ops(PyObject *numpy) +{ + if (create_quad_unary_ufunc(numpy, "negative") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "positive") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "absolute") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "rint") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "trunc") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "floor") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "ceil") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "sqrt") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "square") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log2") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log10") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log1p") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "exp") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "exp2") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "sin") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "cos") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "tan") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arcsin") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arccos") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arctan") < 0) { + return -1; + } + return 0; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/unary_ops.h b/quaddtype/numpy_quaddtype/src/umath/unary_ops.h new file mode 100644 index 00000000..6c3f17a4 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/unary_ops.h @@ -0,0 +1,9 @@ +#ifndef _QUADDTYPE_UNARY_OPS_H +#define _QUADDTYPE_UNARY_OPS_H + +#include + +int +init_quad_unary_ops(PyObject *numpy); + +#endif diff --git a/quaddtype/release_tracker.md b/quaddtype/release_tracker.md new file mode 100644 index 00000000..fe398e65 --- /dev/null +++ b/quaddtype/release_tracker.md @@ -0,0 +1,107 @@ +# Plan for `numpy-quaddtype` v1.0.0 + +| ufunc name | Added | Edge Cases Tested\* | +| ------------- | ----- | ----------------------------------------------------------------------- | +| add | ✅ | ✅ | +| subtract | ✅ | ✅ | +| multiply | ✅ | ✅ | +| matmul | #116 | ✅ | +| divide | ✅ | ✅ | +| logaddexp | | | +| logaddexp2 | | | +| true_divide | | | +| floor_divide | | | +| negative | ✅ | ✅ | +| positive | ✅ | ✅ | +| power | ✅ | ✅ | +| float_power | | | +| remainder | | | +| mod | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/-0.0/large values)_ | +| fmod | | | +| divmod | | | +| absolute | ✅ | ✅ | +| fabs | | | +| rint | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/±0.0/halfway cases)_ | +| sign | | | +| heaviside | | | +| conj | | | +| conjugate | | | +| exp | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/large +/- values/overflow)_ | +| exp2 | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/large +/- values/overflow)_ | +| log | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/0/-values/1)_ | +| log2 | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/0/-values/1)_ | +| log10 | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/0/-values/1)_ | +| expm1 | | | +| log1p | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/-1/small values)_ | +| sqrt | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/0/-values)_ | +| square | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/0/large values)_ | +| cbrt | | | +| reciprocal | | | +| gcd | | | +| lcm | | | +| sin | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/0/π multiples/2π range)_ | +| cos | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/0/π multiples/2π range)_ | +| tan | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/0/π/2 asymptotes)_ | +| arcsin | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/±1/out-of-domain)_ | +| arccos | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/±1/out-of-domain)_ | +| arctan | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/0/asymptotes)_ | +| arctan2 | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/0/quadrant coverage)_ | +| hypot | | | +| sinh | | | +| cosh | | | +| tanh | | | +| arcsinh | | | +| arccosh | | | +| arctanh | | | +| degrees | | | +| radians | | | +| deg2rad | | | +| rad2deg | | | +| bitwise_and | | | +| bitwise_or | | | +| bitwise_xor | | | +| invert | | | +| left_shift | | | +| right_shift | | | +| greater | ✅ | ✅ | +| greater_equal | ✅ | ✅ | +| less | ✅ | ✅ | +| less_equal | ✅ | ✅ | +| not_equal | ✅ | ✅ | +| equal | ✅ | ✅ | +| logical_and | | | +| logical_or | | | +| logical_xor | | | +| logical_not | | | +| maximum | ✅ | ✅ | +| minimum | ✅ | ✅ | +| fmax | | | +| fmin | | | +| isfinite | | | +| isinf | | | +| isnan | | | +| isnat | | | +| signbit | | | +| copysign | | | +| nextafter | | | +| spacing | | | +| modf | | | +| ldexp | | | +| frexp | | | +| floor | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/±0.0/halfway values)_ | +| ceil | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/±0.0/halfway values)_ | +| trunc | ✅ | ❌ _Need: basic tests + edge cases (NaN/inf/±0.0/fractional values)_ | + +\* **Edge Cases Tested**: Indicates whether the ufunc has parametrized tests that compare QuadPrecision results against `float` and `np.float64` for edge cases including: + +- Special values: `0.0`, `-0.0`, `inf`, `-inf`, `nan`, `-nan` +- For trigonometric functions: Critical points like `0`, `π/2`, `π`, `3π/2`, `2π`, values in `[0, 2π]` +- For logarithmic functions: Values near `0`, `1`, large values +- For exponential functions: Large positive/negative values, values near `0` + +**Testing Status:** + +- ✅ = Comprehensive edge case tests exist in `test_quaddtype.py` with parametrized tests against float64 +- 🟡 = Good basic testing exists but missing some edge cases (specific missing tests noted in italics) +- ❌ = Ufunc is implemented but lacks systematic testing (required tests noted in italics) +- (blank) = Ufunc not yet implemented (implementation needed first) diff --git a/quaddtype/tests/test_dot.py b/quaddtype/tests/test_dot.py new file mode 100644 index 00000000..31d64ce7 --- /dev/null +++ b/quaddtype/tests/test_dot.py @@ -0,0 +1,796 @@ +import pytest +import numpy as np +from numpy_quaddtype import QuadPrecision, QuadPrecDType + + +# ================================================================================ +# UTILITIES +# ================================================================================ + +def assert_quad_equal(a, b, rtol=1e-15, atol=1e-15): + """Assert two quad precision values are equal within tolerance""" + # Ensure both operands are QuadPrecision objects for the comparison + if not isinstance(a, QuadPrecision): + a = QuadPrecision(str(a), backend='sleef') + if not isinstance(b, QuadPrecision): + b = QuadPrecision(str(b), backend='sleef') + + # Use quad-precision arithmetic to calculate the difference + diff = abs(a - b) + tolerance = QuadPrecision(str(atol), backend='sleef') + QuadPrecision(str(rtol), backend='sleef') * max(abs(a), abs(b)) + + # Assert using quad-precision objects + assert diff <= tolerance, f"Values not equal: {a} != {b} (diff: {diff}, tol: {tolerance})" + + +def assert_quad_array_equal(a, b, rtol=1e-25, atol=1e-25): + """Assert two quad precision arrays are equal within tolerance""" + assert a.shape == b.shape, f"Shapes don't match: {a.shape} vs {b.shape}" + + flat_a = a.flatten() + flat_b = b.flatten() + + for i, (val_a, val_b) in enumerate(zip(flat_a, flat_b)): + try: + assert_quad_equal(val_a, val_b, rtol, atol) + except AssertionError as e: + raise AssertionError(f"Arrays differ at index {i}: {e}") + + +def create_quad_array(values, shape=None): + """Create a QuadPrecision array from values using Sleef backend""" + dtype = QuadPrecDType(backend='sleef') + + if isinstance(values, (list, tuple)): + if shape is None: + # 1D array + quad_values = [QuadPrecision(str(float(v)), backend='sleef') for v in values] + return np.array(quad_values, dtype=dtype) + else: + # Reshape to specified shape + if len(shape) == 1: + quad_values = [QuadPrecision(str(float(v)), backend='sleef') for v in values] + return np.array(quad_values, dtype=dtype) + elif len(shape) == 2: + m, n = shape + assert len(values) == m * n, f"Values length {len(values)} doesn't match shape {shape}" + quad_matrix = [] + for i in range(m): + row = [QuadPrecision(str(float(values[i * n + j])), backend='sleef') for j in range(n)] + quad_matrix.append(row) + return np.array(quad_matrix, dtype=dtype) + + raise ValueError("Unsupported values or shape") + + +def is_special_value(val): + """Check if a value is NaN or infinite""" + try: + float_val = float(val) + return np.isnan(float_val) or np.isinf(float_val) + except: + return False + + +def arrays_equal_with_nan(a, b, rtol=1e-15, atol=1e-15): + """Compare arrays that may contain NaN values""" + if a.shape != b.shape: + return False + + flat_a = a.flatten() + flat_b = b.flatten() + + for i, (val_a, val_b) in enumerate(zip(flat_a, flat_b)): + # Handle NaN cases + if is_special_value(val_a) and is_special_value(val_b): + float_a = float(val_a) + float_b = float(val_b) + # Both NaN + if np.isnan(float_a) and np.isnan(float_b): + continue + # Both infinite with same sign + elif np.isinf(float_a) and np.isinf(float_b) and np.sign(float_a) == np.sign(float_b): + continue + else: + return False + elif is_special_value(val_a) or is_special_value(val_b): + return False + else: + try: + assert_quad_equal(val_a, val_b, rtol, atol) + except AssertionError: + return False + + return True + + +# ================================================================================ +# VECTOR-VECTOR DOT PRODUCT TESTS +# ================================================================================ + +class TestVectorVectorDot: + """Test vector-vector np.matmul products""" + + def test_simple_dot_product(self): + """Test basic vector np.matmul product""" + x = create_quad_array([1, 2, 3]) + y = create_quad_array([4, 5, 6]) + + result = np.matmul(x, y) + expected = 1*4 + 2*5 + 3*6 # = 32 + + assert isinstance(result, QuadPrecision) + assert_quad_equal(result, expected) + + def test_orthogonal_vectors(self): + """Test orthogonal vectors (should give zero)""" + x = create_quad_array([1, 0, 0]) + y = create_quad_array([0, 1, 0]) + + result = np.matmul(x, y) + assert_quad_equal(result, 0.0) + + def test_same_vector(self): + """Test np.matmul product of vector with itself""" + x = create_quad_array([2, 3, 4]) + + result = np.matmul(x, x) + expected = 2*2 + 3*3 + 4*4 # = 29 + + assert_quad_equal(result, expected) + + @pytest.mark.parametrize("size", [1, 2, 5, 10, 50, 100]) + def test_various_vector_sizes(self, size): + """Test different vector sizes from small to large""" + # Create vectors with known pattern + x_vals = [i + 1 for i in range(size)] # [1, 2, 3, ...] + y_vals = [2 * (i + 1) for i in range(size)] # [2, 4, 6, ...] + + x = create_quad_array(x_vals) + y = create_quad_array(y_vals) + + result = np.matmul(x, y) + expected = sum(x_vals[i] * y_vals[i] for i in range(size)) + + assert_quad_equal(result, expected) + + def test_negative_and_fractional_values(self): + """Test vectors with negative and fractional values""" + x = create_quad_array([1.5, -2.5, 3.25]) + y = create_quad_array([-1.25, 2.75, -3.5]) + + result = np.matmul(x, y) + expected = 1.5*(-1.25) + (-2.5)*2.75 + 3.25*(-3.5) + + assert_quad_equal(result, expected) + + +# ================================================================================ +# MATRIX-VECTOR MULTIPLICATION TESTS +# ================================================================================ + +class TestMatrixVectorDot: + """Test matrix-vector multiplication""" + + def test_simple_matrix_vector(self): + """Test basic matrix-vector multiplication""" + # 2x3 matrix + A = create_quad_array([1, 2, 3, 4, 5, 6], shape=(2, 3)) + # 3x1 vector + x = create_quad_array([1, 1, 1]) + + result = np.matmul(A, x) + expected = [1+2+3, 4+5+6] # [6, 15] + + assert result.shape == (2,) + for i in range(2): + assert_quad_equal(result[i], expected[i]) + + def test_identity_matrix_vector(self): + """Test multiplication with identity matrix""" + # 3x3 identity matrix + I = create_quad_array([1, 0, 0, 0, 1, 0, 0, 0, 1], shape=(3, 3)) + x = create_quad_array([2, 3, 4]) + + result = np.matmul(I, x) + + assert result.shape == (3,) + for i in range(3): + assert_quad_equal(result[i], float(x[i])) + + @pytest.mark.parametrize("m,n", [(2,3), (3,2), (5,4), (10,8), (20,15)]) + def test_various_matrix_vector_sizes(self, m, n): + """Test various matrix-vector sizes from small to large""" + # Create m×n matrix with sequential values + A_vals = [(i*n + j + 1) for i in range(m) for j in range(n)] + A = create_quad_array(A_vals, shape=(m, n)) + + # Create n×1 vector with simple values + x_vals = [i + 1 for i in range(n)] + x = create_quad_array(x_vals) + + result = np.matmul(A, x) + + assert result.shape == (m,) + + # Verify manually for small matrices + if m <= 5 and n <= 5: + for i in range(m): + expected = sum(A_vals[i*n + j] * x_vals[j] for j in range(n)) + assert_quad_equal(result[i], expected) + + +# ================================================================================ +# MATRIX-MATRIX MULTIPLICATION TESTS +# ================================================================================ + +class TestMatrixMatrixDot: + """Test matrix-matrix multiplication""" + + def test_simple_matrix_matrix(self): + """Test basic matrix-matrix multiplication""" + # 2x2 matrices + A = create_quad_array([1, 2, 3, 4], shape=(2, 2)) + B = create_quad_array([5, 6, 7, 8], shape=(2, 2)) + + result = np.matmul(A, B) + + # Expected: [[1*5+2*7, 1*6+2*8], [3*5+4*7, 3*6+4*8]] = [[19, 22], [43, 50]] + expected = [[19, 22], [43, 50]] + + assert result.shape == (2, 2) + for i in range(2): + for j in range(2): + assert_quad_equal(result[i, j], expected[i][j]) + + def test_identity_matrix_multiplication(self): + """Test multiplication with identity matrix""" + A = create_quad_array([1, 2, 3, 4], shape=(2, 2)) + I = create_quad_array([1, 0, 0, 1], shape=(2, 2)) + + # A * I should equal A + result1 = np.matmul(A, I) + assert_quad_array_equal(result1, A) + + # I * A should equal A + result2 = np.matmul(I, A) + assert_quad_array_equal(result2, A) + + @pytest.mark.parametrize("m,n,k", [(2,2,2), (2,3,4), (3,2,5), (4,4,4), (5,6,7)]) + def test_various_matrix_sizes(self, m, n, k): + """Test various matrix sizes: (m×k) × (k×n) = (m×n)""" + # Create A: m×k matrix + A_vals = [(i*k + j + 1) for i in range(m) for j in range(k)] + A = create_quad_array(A_vals, shape=(m, k)) + + # Create B: k×n matrix + B_vals = [(i*n + j + 1) for i in range(k) for j in range(n)] + B = create_quad_array(B_vals, shape=(k, n)) + + result = np.matmul(A, B) + + assert result.shape == (m, n) + + # Verify manually for small matrices + if m <= 3 and n <= 3 and k <= 3: + for i in range(m): + for j in range(n): + expected = sum(A_vals[i*k + l] * B_vals[l*n + j] for l in range(k)) + assert_quad_equal(result[i, j], expected) + + def test_associativity(self): + """Test matrix multiplication associativity: (A*B)*C = A*(B*C)""" + # Use small 2x2 matrices for simplicity + A = create_quad_array([1, 2, 3, 4], shape=(2, 2)) + B = create_quad_array([2, 1, 1, 2], shape=(2, 2)) + C = create_quad_array([1, 1, 2, 1], shape=(2, 2)) + + # Compute (A*B)*C + AB = np.matmul(A, B) + result1 = np.matmul(AB, C) + + # Compute A*(B*C) + BC = np.matmul(B, C) + result2 = np.matmul(A, BC) + + assert_quad_array_equal(result1, result2, rtol=1e-25) + + +# ================================================================================ +# SPECIAL VALUES EDGE CASE TESTS +# ================================================================================ + +class TestSpecialValueEdgeCases: + """Test matmul with special IEEE 754 values (NaN, inf, -0.0)""" + + @pytest.mark.parametrize("special_val", ["0.0", "-0.0", "inf", "-inf", "nan", "-nan"]) + def test_vector_with_special_values(self, special_val): + """Test vectors containing special values""" + # Create vectors with special values + x = create_quad_array([1.0, float(special_val), 2.0]) + y = create_quad_array([3.0, 4.0, 5.0]) + + result = np.matmul(x, y) + + # Compare with float64 reference + x_float = np.array([1.0, float(special_val), 2.0], dtype=np.float64) + y_float = np.array([3.0, 4.0, 5.0], dtype=np.float64) + expected = np.matmul(x_float, y_float) + + # Handle special value comparisons + if np.isnan(expected): + assert np.isnan(float(result)) + elif np.isinf(expected): + assert np.isinf(float(result)) + assert np.sign(float(result)) == np.sign(expected) + else: + assert_quad_equal(result, expected) + + @pytest.mark.parametrize("special_val", ["0.0", "-0.0", "inf", "-inf", "nan"]) + def test_matrix_vector_with_special_values(self, special_val): + """Test matrix-vector multiplication with special values""" + # Matrix with special value + A = create_quad_array([1.0, float(special_val), 3.0, 4.0], shape=(2, 2)) + x = create_quad_array([2.0, 1.0]) + + result = np.matmul(A, x) + + # Compare with float64 reference + A_float = np.array([[1.0, float(special_val)], [3.0, 4.0]], dtype=np.float64) + x_float = np.array([2.0, 1.0], dtype=np.float64) + expected = np.matmul(A_float, x_float) + + assert result.shape == expected.shape + for i in range(len(expected)): + if np.isnan(expected[i]): + assert np.isnan(float(result[i])) + elif np.isinf(expected[i]): + assert np.isinf(float(result[i])) + assert np.sign(float(result[i])) == np.sign(expected[i]) + else: + assert_quad_equal(result[i], expected[i]) + + @pytest.mark.parametrize("special_val", ["0.0", "-0.0", "inf", "-inf", "nan"]) + def test_matrix_matrix_with_special_values(self, special_val): + """Test matrix-matrix multiplication with special values""" + A = create_quad_array([1.0, 2.0, float(special_val), 4.0], shape=(2, 2)) + B = create_quad_array([5.0, 6.0, 7.0, 8.0], shape=(2, 2)) + + result = np.matmul(A, B) + + # Compare with float64 reference + A_float = np.array([[1.0, 2.0], [float(special_val), 4.0]], dtype=np.float64) + B_float = np.array([[5.0, 6.0], [7.0, 8.0]], dtype=np.float64) + expected = np.matmul(A_float, B_float) + + assert result.shape == expected.shape + assert arrays_equal_with_nan(result, expected) + + def test_all_nan_matrix(self): + """Test matrices filled with NaN""" + A = create_quad_array([float('nan')] * 4, shape=(2, 2)) + B = create_quad_array([1, 2, 3, 4], shape=(2, 2)) + + result = np.matmul(A, B) + + # Result should be all NaN (NaN * anything = NaN) + for i in range(2): + for j in range(2): + assert np.isnan(float(result[i, j])) + + def test_inf_times_zero_produces_nan(self): + """Test that Inf * 0 correctly produces NaN per IEEE 754""" + # Create a scenario where Inf * 0 occurs in matrix multiplication + A = create_quad_array([float('inf'), 1.0], shape=(1, 2)) + B = create_quad_array([0.0, 1.0], shape=(2, 1)) + + result = np.matmul(A, B) + + # Result should be inf*0 + 1*1 = NaN + 1 = NaN + assert np.isnan(float(result[0, 0])), "Inf * 0 should produce NaN per IEEE 754" + + def test_nan_propagation(self): + """Test that NaN properly propagates through matrix operations""" + A = create_quad_array([1.0, float('nan'), 3.0, 4.0], shape=(2, 2)) + B = create_quad_array([1.0, 0.0, 0.0, 1.0], shape=(2, 2)) # Identity + + result = np.matmul(A, B) + + # C[0,0] = 1*1 + nan*0 = 1 + nan = nan (nan*0 = nan, not like inf*0) + # C[0,1] = 1*0 + nan*1 = 0 + nan = nan + # C[1,0] = 3*1 + 4*0 = 3 + 0 = 3 + # C[1,1] = 3*0 + 4*1 = 0 + 4 = 4 + assert np.isnan(float(result[0, 0])) + assert np.isnan(float(result[0, 1])) + assert_quad_equal(result[1, 0], 3.0) + assert_quad_equal(result[1, 1], 4.0) + + def test_zero_division_and_indeterminate_forms(self): + """Test handling of indeterminate forms in matrix operations""" + # Test various indeterminate forms that should produce NaN + + # Case: Inf - Inf form + A = create_quad_array([float('inf'), float('inf')], shape=(1, 2)) + B = create_quad_array([1.0, -1.0], shape=(2, 1)) + + result = np.matmul(A, B) + + # Result should be inf*1 + inf*(-1) = inf - inf = NaN + assert np.isnan(float(result[0, 0])), "Inf - Inf should produce NaN per IEEE 754" + + def test_mixed_inf_values(self): + """Test matrices with mixed infinite values""" + # Use all-ones matrix to avoid Inf * 0 = NaN issues + A = create_quad_array([float('inf'), 2, float('-inf'), 3], shape=(2, 2)) + B = create_quad_array([1, 1, 1, 1], shape=(2, 2)) # All ones to avoid Inf*0 + + result = np.matmul(A, B) + + # C[0,0] = inf*1 + 2*1 = inf + 2 = inf + # C[0,1] = inf*1 + 2*1 = inf + 2 = inf + # C[1,0] = -inf*1 + 3*1 = -inf + 3 = -inf + # C[1,1] = -inf*1 + 3*1 = -inf + 3 = -inf + assert np.isinf(float(result[0, 0])) and float(result[0, 0]) > 0 + assert np.isinf(float(result[0, 1])) and float(result[0, 1]) > 0 + assert np.isinf(float(result[1, 0])) and float(result[1, 0]) < 0 + assert np.isinf(float(result[1, 1])) and float(result[1, 1]) < 0 + + +# ================================================================================ +# DEGENERATE AND EMPTY CASE TESTS +# ================================================================================ + +class TestDegenerateCases: + """Test edge cases with degenerate dimensions""" + + def test_single_element_matrices(self): + """Test 1x1 matrix operations""" + A = create_quad_array([3.0], shape=(1, 1)) + B = create_quad_array([4.0], shape=(1, 1)) + + result = np.matmul(A, B) + + assert result.shape == (1, 1) + assert_quad_equal(result[0, 0], 12.0) + + def test_single_element_vector(self): + """Test operations with single-element vectors""" + x = create_quad_array([5.0]) + y = create_quad_array([7.0]) + + result = np.matmul(x, y) + + assert isinstance(result, QuadPrecision) + assert_quad_equal(result, 35.0) + + def test_very_tall_matrix(self): + """Test very tall matrices (1000x1)""" + size = 1000 + A = create_quad_array([1.0] * size, shape=(size, 1)) + B = create_quad_array([2.0], shape=(1, 1)) + + result = np.matmul(A, B) + + assert result.shape == (size, 1) + for i in range(min(10, size)): # Check first 10 elements + assert_quad_equal(result[i, 0], 2.0) + + def test_very_wide_matrix(self): + """Test very wide matrices (1x1000)""" + size = 1000 + A = create_quad_array([1.0], shape=(1, 1)) + B = create_quad_array([3.0] * size, shape=(1, size)) + + result = np.matmul(A, B) + + assert result.shape == (1, size) + for i in range(min(10, size)): # Check first 10 elements + assert_quad_equal(result[0, i], 3.0) + + def test_zero_matrices(self): + """Test matrices filled with zeros""" + A = create_quad_array([0.0] * 9, shape=(3, 3)) + B = create_quad_array([1, 2, 3, 4, 5, 6, 7, 8, 9], shape=(3, 3)) + + result = np.matmul(A, B) + + assert result.shape == (3, 3) + for i in range(3): + for j in range(3): + assert_quad_equal(result[i, j], 0.0) + + def test_repeated_row_matrix(self): + """Test matrices with repeated rows""" + # Matrix with all rows the same + A = create_quad_array([1, 2, 3] * 3, shape=(3, 3)) # Each row is [1, 2, 3] + B = create_quad_array([1, 0, 0, 0, 1, 0, 0, 0, 1], shape=(3, 3)) # Identity + + result = np.matmul(A, B) + + # Result should have all rows equal to [1, 2, 3] + for i in range(3): + assert_quad_equal(result[i, 0], 1.0) + assert_quad_equal(result[i, 1], 2.0) + assert_quad_equal(result[i, 2], 3.0) + + def test_repeated_column_matrix(self): + """Test matrices with repeated columns""" + A = create_quad_array([1, 0, 0, 0, 1, 0, 0, 0, 1], shape=(3, 3)) # Identity + B = create_quad_array([2, 2, 2, 3, 3, 3, 4, 4, 4], shape=(3, 3)) # Each column repeated + + result = np.matmul(A, B) + + # Result should be same as B (identity multiplication) + assert_quad_array_equal(result, B) + + +# ================================================================================ +# NUMERICAL STABILITY AND PRECISION TESTS +# ================================================================================ + +class TestNumericalStability: + """Test numerical stability with extreme values""" + + def test_very_large_values(self): + """Test matrices with very large values""" + large_val = 1e100 + A = create_quad_array([large_val, 1, 1, large_val], shape=(2, 2)) + B = create_quad_array([1, 0, 0, 1], shape=(2, 2)) # Identity + + result = np.matmul(A, B) + + # Should preserve large values without overflow + assert_quad_equal(result[0, 0], large_val) + assert_quad_equal(result[1, 1], large_val) + assert not np.isinf(float(result[0, 0])) + assert not np.isinf(float(result[1, 1])) + + def test_very_small_values(self): + """Test matrices with very small values""" + small_val = 1e-100 + A = create_quad_array([small_val, 0, 0, small_val], shape=(2, 2)) + B = create_quad_array([1, 0, 0, 1], shape=(2, 2)) # Identity + + result = np.matmul(A, B) + + # Should preserve small values without underflow + assert_quad_equal(result[0, 0], small_val) + assert_quad_equal(result[1, 1], small_val) + assert float(result[0, 0]) != 0.0 + assert float(result[1, 1]) != 0.0 + + def test_mixed_scale_values(self): + """Test matrices with mixed magnitude values""" + A = create_quad_array([1e100, 1e-100, 1e50, 1e-50], shape=(2, 2)) + B = create_quad_array([1, 0, 0, 1], shape=(2, 2)) # Identity + + result = np.matmul(A, B) + + # All values should be preserved accurately + assert_quad_equal(result[0, 0], 1e100) + assert_quad_equal(result[0, 1], 1e-100) + assert_quad_equal(result[1, 0], 1e50) + assert_quad_equal(result[1, 1], 1e-50) + + def test_precision_critical_case(self): + """Test case that would lose precision in double""" + # Create a case where large values cancel in the dot product + # Vector: [1e20, 1.0, -1e20] dot [1, 0, 1] should equal 1.0 + x = create_quad_array([1e20, 1.0, -1e20]) + y = create_quad_array([1.0, 0.0, 1.0]) + + result = np.matmul(x, y) + + # The result should be 1e20*1 + 1.0*0 + (-1e20)*1 = 1e20 - 1e20 = 0, but we want 1 + # Let me fix this: [1e20, 1.0, -1e20] dot [0, 1, 0] = 1.0 + x = create_quad_array([1e20, 1.0, -1e20]) + y = create_quad_array([0.0, 1.0, 0.0]) + + result = np.matmul(x, y) + + # This would likely fail in double precision due to representation issues + assert_quad_equal(result, 1.0, atol=1e-25) + + def test_condition_number_extreme(self): + """Test matrices with extreme condition numbers""" + # Nearly singular matrix (very small determinant) + eps = 1e-50 + A = create_quad_array([1, 1, 1, 1+eps], shape=(2, 2)) + B = create_quad_array([1, 0, 0, 1], shape=(2, 2)) + + result = np.matmul(A, B) + + # Result should be computed accurately + assert_quad_equal(result[0, 0], 1.0) + assert_quad_equal(result[0, 1], 1.0) + assert_quad_equal(result[1, 0], 1.0) + assert_quad_equal(result[1, 1], 1.0 + eps) + + def test_accumulation_precision(self): + """Test precision in accumulation of many terms""" + size = 100 + # Create vectors where each term contributes equally + x_vals = [1.0 / size] * size + y_vals = [1.0] * size + + x = create_quad_array(x_vals) + y = create_quad_array(y_vals) + + result = np.matmul(x, y) + + # Result should be exactly 1.0 + assert_quad_equal(result, 1.0, atol=1e-25) + + +# ================================================================================ +# CROSS-VALIDATION TESTS +# ================================================================================ + +class TestCrossValidation: + """Test consistency with float64 reference implementations""" + + @pytest.mark.parametrize("size", [2, 3, 5, 10]) + def test_consistency_with_float64_vectors(self, size): + """Test vector operations consistency with float64""" + # Use values well within float64 range + x_vals = [i + 0.5 for i in range(size)] + y_vals = [2 * i + 1.5 for i in range(size)] + + # QuadPrecision computation + x_quad = create_quad_array(x_vals) + y_quad = create_quad_array(y_vals) + result_quad = np.matmul(x_quad, y_quad) + + # float64 reference + x_float = np.array(x_vals, dtype=np.float64) + y_float = np.array(y_vals, dtype=np.float64) + result_float = np.matmul(x_float, y_float) + + # Results should match within float64 precision + assert_quad_equal(result_quad, result_float, rtol=1e-14) + + @pytest.mark.parametrize("m,n,k", [(2,2,2), (3,3,3), (4,5,6)]) + def test_consistency_with_float64_matrices(self, m, n, k): + """Test matrix operations consistency with float64""" + # Create test matrices with float64-representable values + A_vals = [(i + j + 1) * 0.25 for i in range(m) for j in range(k)] + B_vals = [(i * 2 + j) * 0.125 for i in range(k) for j in range(n)] + + # QuadPrecision computation + A_quad = create_quad_array(A_vals, shape=(m, k)) + B_quad = create_quad_array(B_vals, shape=(k, n)) + result_quad = np.matmul(A_quad, B_quad) + + # float64 reference + A_float = np.array(A_vals, dtype=np.float64).reshape(m, k) + B_float = np.array(B_vals, dtype=np.float64).reshape(k, n) + result_float = np.matmul(A_float, B_float) + + # Results should match within float64 precision + for i in range(m): + for j in range(n): + assert_quad_equal(result_quad[i, j], result_float[i, j], rtol=1e-14) + + def test_quad_precision_advantage(self): + """Test cases where quad precision shows advantage over float64""" + A = create_quad_array([1.0, 1e-30], shape=(1, 2)) + B = create_quad_array([1.0, 1.0], shape=(2, 1)) + + result_quad = np.matmul(A, B) + + # The result should be 1.0 + 1e-30 = 1.0000000000000000000000000000001 + expected = 1.0 + 1e-30 + assert_quad_equal(result_quad[0, 0], expected, rtol=1e-25) + + # Verify that this value is actually different from 1.0 in quad precision + diff = result_quad[0, 0] - 1.0 + assert abs(diff) > 0 # Should be non-zero in quad precision + + +# ================================================================================ +# LARGE MATRIX TESTS +# ================================================================================ + +class TestLargeMatrices: + """Test performance and correctness with larger matrices""" + + @pytest.mark.parametrize("size", [50, 100, 200]) + def test_large_square_matrices(self, size): + """Test large square matrix multiplication""" + # Create matrices with simple pattern for verification + A_vals = [1.0 if i == j else 0.1 for i in range(size) for j in range(size)] # Near-diagonal + B_vals = [1.0] * (size * size) # All ones + + A = create_quad_array(A_vals, shape=(size, size)) + B = create_quad_array(B_vals, shape=(size, size)) + + result = np.matmul(A, B) + + assert result.shape == (size, size) + + # Each element = sum of a row in A = 1.0 + 0.1*(size-1) + expected_value = 1.0 + 0.1 * (size - 1) + + # Check diagonal and off-diagonal elements + assert_quad_equal(result[0, 0], expected_value, rtol=1e-15, atol=1e-15) + if size > 1: + assert_quad_equal(result[0, 1], expected_value, rtol=1e-15, atol=1e-15) + + # Additional verification: check a few more elements + if size > 2: + assert_quad_equal(result[1, 0], expected_value, rtol=1e-15, atol=1e-15) + assert_quad_equal(result[size//2, size//2], expected_value, rtol=1e-15, atol=1e-15) + + def test_large_vector_operations(self): + """Test large vector np.matmul products""" + size = 1000 + + # Create vectors with known sum + x_vals = [1.0] * size + y_vals = [2.0] * size + + x = create_quad_array(x_vals) + y = create_quad_array(y_vals) + + result = np.matmul(x, y) + expected = size * 1.0 * 2.0 # = 2000.0 + + assert_quad_equal(result, expected) + + def test_rectangular_large_matrices(self): + """Test large rectangular matrix operations""" + m, n, k = 100, 80, 120 + + # Create simple patterns + A_vals = [(i + j + 1) % 10 for i in range(m) for j in range(k)] + B_vals = [(i + j + 1) % 10 for i in range(k) for j in range(n)] + + A = create_quad_array(A_vals, shape=(m, k)) + B = create_quad_array(B_vals, shape=(k, n)) + + result = np.matmul(A, B) + + assert result.shape == (m, n) + + # Verify that result doesn't contain NaN or inf + result_flat = result.flatten() + for i in range(min(10, len(result_flat))): # Check first few elements + val = float(result_flat[i]) + assert not np.isnan(val), f"NaN found at position {i}" + assert not np.isinf(val), f"Inf found at position {i}" + + +# ================================================================================ +# BASIC ERROR HANDLING +# ================================================================================ + +class TestBasicErrorHandling: + """Test basic error conditions""" + + def test_dimension_mismatch_vectors(self): + """Test dimension mismatch in vectors""" + x = create_quad_array([1, 2]) + y = create_quad_array([1, 2, 3]) + + with pytest.raises(ValueError, match=r"matmul: Input operand 1 has a mismatch in its core dimension 0"): + np.matmul(x, y) + + def test_dimension_mismatch_matrix_vector(self): + """Test dimension mismatch in matrix-vector""" + A = create_quad_array([1, 2, 3, 4], shape=(2, 2)) + x = create_quad_array([1, 2, 3]) # Wrong size + + with pytest.raises(ValueError, match=r"matmul: Input operand 1 has a mismatch in its core dimension 0"): + np.matmul(A, x) + + def test_dimension_mismatch_matrices(self): + """Test dimension mismatch in matrix-matrix""" + A = create_quad_array([1, 2, 3, 4], shape=(2, 2)) + B = create_quad_array([1, 2, 3, 4, 5, 6], shape=(3, 2)) # Wrong size + + with pytest.raises(ValueError, match=r"matmul: Input operand 1 has a mismatch in its core dimension 0"): + np.matmul(A, B) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) \ No newline at end of file