diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9267d9f4..542093c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,17 +59,37 @@ jobs: run: | sudo apt-get update sudo apt-get install -y libmpfr-dev libssl-dev libfftw3-dev + - name: Install SLEEF run: | + sudo apt-get update -y + sudo apt-get install -y cmake gcc g++ make git pkg-config git clone --branch 3.8 https://github.com/shibatch/sleef.git cd sleef cmake -S . -B build -DSLEEF_BUILD_QUAD:BOOL=ON -DSLEEF_BUILD_SHARED_LIBS:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON cmake --build build/ --clean-first -j - sudo cmake --install build --prefix /usr + sudo cmake --install build --prefix /usr/local + - name: Install quaddtype working-directory: quaddtype run: | - LDFLAGS="-Wl,-rpath,/usr/lib" python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug" + # Initialize submodules first + git submodule update --init --recursive + ls -la numpy_quaddtype/QBLAS/ + + # Set environment variables with proper export and correct paths + export CFLAGS="-I/usr/local/include -I$(pwd)/numpy_quaddtype/QBLAS/include" + export CXXFLAGS="-I/usr/local/include -I$(pwd)/numpy_quaddtype/QBLAS/include -fext-numeric-literals" + export LDFLAGS="-L/usr/local/lib64 -L/usr/local/lib -Wl,-rpath,/usr/local/lib64 -Wl,-rpath,/usr/local/lib -fopenmp" + export LD_LIBRARY_PATH="/usr/local/lib64:/usr/local/lib:$LD_LIBRARY_PATH" + + # Install with meson args to ensure the C++ flags are passed through + python -m pip install . -v --no-build-isolation \ + -Cbuilddir=build \ + -C'compile-args=-v' \ + -Csetup-args="-Dbuildtype=debug" \ + -Csetup-args="-Dcpp_args=-fext-numeric-literals" + - name: Run quaddtype tests working-directory: quaddtype run: | diff --git a/quaddtype/README.md b/quaddtype/README.md index af4ddef7..11eef1d2 100644 --- a/quaddtype/README.md +++ b/quaddtype/README.md @@ -27,7 +27,7 @@ np.array([1,2,3], dtype=QuadPrecDType("longdouble")) The code needs the quad precision pieces of the sleef library, which is not available on most systems by default, so we have to generate -that first. The below assumes one has the required pieces to build +that first. The below assumes one has the required pieces to build sleef (cmake and libmpfr-dev), and that one is in the package directory locally. @@ -40,6 +40,7 @@ cd .. ``` Building the `numpy-quaddtype` package from locally installed sleef: + ```bash export SLEEF_DIR=$PWD/sleef/build export LIBRARY_PATH=$SLEEF_DIR/lib @@ -57,10 +58,13 @@ export LDFLAGS="-Wl,-rpath,$SLEEF_DIR/lib -fopenmp -latomic -lpthread" export CFLAGS="-fPIC" export CXXFLAGS="-fPIC" +# To build without QBLAS (default for MSVC) +# export CFLAGS="-fPIC -DDISABLE_QUADBLAS" +# export CXXFLAGS="-fPIC -DDISABLE_QUADBLAS" + python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' # Run the tests cd .. python -m pytest ``` - diff --git a/quaddtype/meson.build b/quaddtype/meson.build index d1c67990..c000675a 100644 --- a/quaddtype/meson.build +++ b/quaddtype/meson.build @@ -50,12 +50,21 @@ srcs = [ 'numpy_quaddtype/src/scalar_ops.h', 'numpy_quaddtype/src/scalar_ops.cpp', 'numpy_quaddtype/src/ops.hpp', - 'numpy_quaddtype/src/umath.h', - 'numpy_quaddtype/src/umath.cpp', 'numpy_quaddtype/src/dragon4.h', 'numpy_quaddtype/src/dragon4.c', 'numpy_quaddtype/src/quadblas_interface.h', - 'numpy_quaddtype/src/quadblas_interface.cpp' + 'numpy_quaddtype/src/quadblas_interface.cpp', + 'numpy_quaddtype/src/umath/umath.h', + 'numpy_quaddtype/src/umath/umath.cpp', + 'numpy_quaddtype/src/umath/binary_ops.h', + 'numpy_quaddtype/src/umath/binary_ops.cpp', + 'numpy_quaddtype/src/umath/unary_ops.h', + 'numpy_quaddtype/src/umath/unary_ops.cpp', + 'numpy_quaddtype/src/umath/comparison_ops.h', + 'numpy_quaddtype/src/umath/comparison_ops.cpp', + 'numpy_quaddtype/src/umath/promoters.hpp', + 'numpy_quaddtype/src/umath/matmul.h', + 'numpy_quaddtype/src/umath/matmul.cpp', ] py.install_sources( diff --git a/quaddtype/numpy_quaddtype/QBLAS b/quaddtype/numpy_quaddtype/QBLAS index 0eabb677..9468e24a 160000 --- a/quaddtype/numpy_quaddtype/QBLAS +++ b/quaddtype/numpy_quaddtype/QBLAS @@ -1 +1 @@ -Subproject commit 0eabb677431c6148434c50deba7abd6902d74b16 +Subproject commit 9468e24a02b731563eba2aee0350e9219b36c102 diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py index b0a9f3be..878180bc 100644 --- a/quaddtype/numpy_quaddtype/__init__.py +++ b/quaddtype/numpy_quaddtype/__init__.py @@ -3,7 +3,6 @@ QuadPrecDType, is_longdouble_128, get_sleef_constant, - qblas_dot as dot, set_num_threads, get_num_threads, get_quadblas_version @@ -17,7 +16,7 @@ # Constants 'pi', 'e', 'log2e', 'log10e', 'ln2', 'ln10', 'max_value', 'min_value', 'epsilon', # QuadBLAS related functions - 'dot', 'set_num_threads', 'get_num_threads', 'get_quadblas_version' + 'set_num_threads', 'get_num_threads', 'get_quadblas_version' ] def SleefQuadPrecision(value): @@ -40,8 +39,4 @@ def LongDoubleQuadPrecDType(): ln10 = get_sleef_constant("ln10") max_value = get_sleef_constant("quad_max") min_value = get_sleef_constant("quad_min") -epsilon = get_sleef_constant("epsilon") - -num_cores = multiprocessing.cpu_count() -# set default number of threads for QuadBLAS -set_num_threads(num_cores) \ No newline at end of file +epsilon = get_sleef_constant("epsilon") \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/quadblas_interface.cpp b/quaddtype/numpy_quaddtype/src/quadblas_interface.cpp index 6bc3fb0d..65feb604 100644 --- a/quaddtype/numpy_quaddtype/src/quadblas_interface.cpp +++ b/quaddtype/numpy_quaddtype/src/quadblas_interface.cpp @@ -1,789 +1,233 @@ -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NPY_TARGET_VERSION NPY_2_0_API_VERSION -#define NO_IMPORT_ARRAY -#define NO_IMPORT_UFUNC - -extern "C" { -#include -#include "numpy/arrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/dtype_api.h" -} - -#include "scalar.h" -#include "dtype.h" -#include "quad_common.h" #include "quadblas_interface.h" - -extern "C" { -#include -#include -} +#include +#include #ifndef DISABLE_QUADBLAS #include "../QBLAS/include/quadblas/quadblas.hpp" -#endif - -#ifdef DISABLE_QUADBLAS +#endif // DISABLE_QUADBLAS -static bool -extract_quad_array_info_simple(PyArrayObject *arr, Sleef_quad **data, QuadBackendType *backend) -{ - if (!PyArray_Check(arr)) { - PyErr_SetString(PyExc_TypeError, "Expected numpy array"); - return false; - } - - PyArray_Descr *descr = PyArray_DESCR(arr); - if (!PyObject_TypeCheck(descr, (PyTypeObject *)&QuadPrecDType)) { - PyErr_SetString(PyExc_TypeError, "Array must have QuadPrecDType dtype"); - return false; - } +extern "C" { - QuadPrecDTypeObject *quad_descr = (QuadPrecDTypeObject *)descr; - *backend = quad_descr->backend; - *data = (Sleef_quad *)PyArray_DATA(arr); - return true; -} +#ifndef DISABLE_QUADBLAS -static Sleef_quad * -ensure_sleef_backend_simple(PyArrayObject *arr, QuadBackendType original_backend, - Sleef_quad **temp_storage) +int +qblas_dot(size_t n, Sleef_quad *x, size_t incx, Sleef_quad *y, size_t incy, Sleef_quad *result) { - if (original_backend == BACKEND_SLEEF) { - *temp_storage = nullptr; - return (Sleef_quad *)PyArray_DATA(arr); + if (!x || !y || !result || n == 0) { + return -1; } - npy_intp size = PyArray_SIZE(arr); - *temp_storage = (Sleef_quad *)malloc(size * sizeof(Sleef_quad)); - if (!*temp_storage) { - PyErr_NoMemory(); - return nullptr; + try { + *result = QuadBLAS::dot(n, x, incx, y, incy); + return 0; } - - long double *ld_data = (long double *)PyArray_DATA(arr); - for (npy_intp i = 0; i < size; i++) { - (*temp_storage)[i] = Sleef_cast_from_doubleq1((double)ld_data[i]); + catch (...) { + return -1; } - - return *temp_storage; } -// =============================================================================== -// FALLBACK IMPLEMENTATIONS (No QuadBLAS) -// =============================================================================== - -static PyObject * -dot_vector_vector_fallback(PyArrayObject *a, PyArrayObject *b) +int +qblas_gemv(char layout, char trans, size_t m, size_t n, Sleef_quad *alpha, Sleef_quad *A, + size_t lda, Sleef_quad *x, size_t incx, Sleef_quad *beta, Sleef_quad *y, size_t incy) { - if (PyArray_NDIM(a) != 1 || PyArray_NDIM(b) != 1) { - PyErr_SetString(PyExc_ValueError, "Both inputs must be 1-dimensional arrays"); - return nullptr; - } - - npy_intp n_a = PyArray_DIM(a, 0); - npy_intp n_b = PyArray_DIM(b, 0); - - if (n_a != n_b) { - PyErr_SetString(PyExc_ValueError, "Arrays must have the same length"); - return nullptr; + if (!alpha || !A || !x || !beta || !y || m == 0 || n == 0) { + return -1; } - Sleef_quad *data_a, *data_b; - QuadBackendType backend_a, backend_b; - - if (!extract_quad_array_info_simple(a, &data_a, &backend_a) || - !extract_quad_array_info_simple(b, &data_b, &backend_b)) { - return nullptr; - } - - Sleef_quad *temp_a = nullptr, *temp_b = nullptr; - Sleef_quad *sleef_a = ensure_sleef_backend_simple(a, backend_a, &temp_a); - Sleef_quad *sleef_b = ensure_sleef_backend_simple(b, backend_b, &temp_b); - - if (!sleef_a || !sleef_b) { - free(temp_a); - free(temp_b); - return nullptr; - } - - // Simple dot product implementation - Sleef_quad result = Sleef_cast_from_doubleq1(0.0); - for (npy_intp i = 0; i < n_a; i++) { - result = Sleef_fmaq1_u05(sleef_a[i], sleef_b[i], result); - } - - free(temp_a); - free(temp_b); + try { + // Convert layout + QuadBLAS::Layout qblas_layout; + if (layout == 'R' || layout == 'r') { + qblas_layout = QuadBLAS::Layout::RowMajor; + } + else if (layout == 'C' || layout == 'c') { + qblas_layout = QuadBLAS::Layout::ColMajor; + } + else { + return -1; // Invalid layout + } - QuadBackendType result_backend = BACKEND_SLEEF; - if (backend_a == BACKEND_LONGDOUBLE && backend_b == BACKEND_LONGDOUBLE) { - result_backend = BACKEND_LONGDOUBLE; - } + // Handle transpose (swap dimensions for transpose) + size_t actual_m = m, actual_n = n; + if (trans == 'T' || trans == 't' || trans == 'C' || trans == 'c') { + std::swap(actual_m, actual_n); + // For transpose, we need to adjust the layout + if (qblas_layout == QuadBLAS::Layout::RowMajor) { + qblas_layout = QuadBLAS::Layout::ColMajor; + } + else { + qblas_layout = QuadBLAS::Layout::RowMajor; + } + } - QuadPrecisionObject *result_obj = QuadPrecision_raw_new(result_backend); - if (!result_obj) { - return nullptr; - } + // Call QBLAS GEMV + QuadBLAS::gemv(qblas_layout, actual_m, actual_n, *alpha, A, lda, x, incx, *beta, y, incy); - if (result_backend == BACKEND_SLEEF) { - result_obj->value.sleef_value = result; + return 0; } - else { - result_obj->value.longdouble_value = (long double)Sleef_cast_to_doubleq1(result); + catch (...) { + return -1; } - - return (PyObject *)result_obj; } -static PyObject * -dot_matrix_vector_fallback(PyArrayObject *a, PyArrayObject *b) +int +qblas_gemm(char layout, char transa, char transb, size_t m, size_t n, size_t k, Sleef_quad *alpha, + Sleef_quad *A, size_t lda, Sleef_quad *B, size_t ldb, Sleef_quad *beta, Sleef_quad *C, + size_t ldc) { - if (PyArray_NDIM(a) != 2 || PyArray_NDIM(b) != 1) { - PyErr_SetString(PyExc_ValueError, "First input must be 2D, second input must be 1D"); - return nullptr; - } - - npy_intp m = PyArray_DIM(a, 0); - npy_intp n = PyArray_DIM(a, 1); - npy_intp n_b = PyArray_DIM(b, 0); - - if (n != n_b) { - PyErr_SetString(PyExc_ValueError, "Matrix columns must match vector length"); - return nullptr; - } - - Sleef_quad *data_a, *data_b; - QuadBackendType backend_a, backend_b; - - if (!extract_quad_array_info_simple(a, &data_a, &backend_a) || - !extract_quad_array_info_simple(b, &data_b, &backend_b)) { - return nullptr; - } - - Sleef_quad *temp_a = nullptr, *temp_b = nullptr; - Sleef_quad *sleef_a = ensure_sleef_backend_simple(a, backend_a, &temp_a); - Sleef_quad *sleef_b = ensure_sleef_backend_simple(b, backend_b, &temp_b); - - if (!sleef_a || !sleef_b) { - free(temp_a); - free(temp_b); - return nullptr; + if (!alpha || !A || !B || !beta || !C || m == 0 || n == 0 || k == 0) { + return -1; } - QuadBackendType result_backend = BACKEND_SLEEF; - if (backend_a == BACKEND_LONGDOUBLE && backend_b == BACKEND_LONGDOUBLE) { - result_backend = BACKEND_LONGDOUBLE; - } - - npy_intp result_dims[1] = {m}; - QuadPrecDTypeObject *result_dtype = new_quaddtype_instance(result_backend); - if (!result_dtype) { - free(temp_a); - free(temp_b); - return nullptr; - } - - PyArrayObject *result = - (PyArrayObject *)PyArray_Empty(1, result_dims, (PyArray_Descr *)result_dtype, 0); - if (!result) { - free(temp_a); - free(temp_b); - Py_DECREF(result_dtype); - return nullptr; - } - - Sleef_quad *result_data = (Sleef_quad *)PyArray_DATA(result); - - // Initialize result to zero - for (npy_intp i = 0; i < m; i++) { - result_data[i] = Sleef_cast_from_doubleq1(0.0); - } - - // Simple matrix-vector multiplication: result[i] = sum(A[i,j] * b[j]) - for (npy_intp i = 0; i < m; i++) { - Sleef_quad sum = Sleef_cast_from_doubleq1(0.0); - for (npy_intp j = 0; j < n; j++) { - // Assume row-major layout: A[i,j] = sleef_a[i*n + j] - sum = Sleef_fmaq1_u05(sleef_a[i * n + j], sleef_b[j], sum); + try { + QuadBLAS::Layout qblas_layout; + if (layout == 'R' || layout == 'r') { + qblas_layout = QuadBLAS::Layout::RowMajor; } - result_data[i] = sum; - } - - // Convert to longdouble if needed - if (result_backend == BACKEND_LONGDOUBLE) { - long double *ld_result = (long double *)PyArray_DATA(result); - for (npy_intp i = 0; i < m; i++) { - ld_result[i] = (long double)Sleef_cast_to_doubleq1(result_data[i]); + else if (layout == 'C' || layout == 'c') { + qblas_layout = QuadBLAS::Layout::ColMajor; + } + else { + return -1; // Invalid layout } - } - - free(temp_a); - free(temp_b); - - return (PyObject *)result; -} - -static PyObject * -dot_matrix_matrix_fallback(PyArrayObject *a, PyArrayObject *b) -{ - if (PyArray_NDIM(a) != 2 || PyArray_NDIM(b) != 2) { - PyErr_SetString(PyExc_ValueError, "Both inputs must be 2-dimensional arrays"); - return nullptr; - } - - npy_intp m = PyArray_DIM(a, 0); - npy_intp k = PyArray_DIM(a, 1); - npy_intp k_b = PyArray_DIM(b, 0); - npy_intp n = PyArray_DIM(b, 1); - - if (k != k_b) { - PyErr_SetString(PyExc_ValueError, "Matrix inner dimensions must match"); - return nullptr; - } - - Sleef_quad *data_a, *data_b; - QuadBackendType backend_a, backend_b; - - if (!extract_quad_array_info_simple(a, &data_a, &backend_a) || - !extract_quad_array_info_simple(b, &data_b, &backend_b)) { - return nullptr; - } - - Sleef_quad *temp_a = nullptr, *temp_b = nullptr; - Sleef_quad *sleef_a = ensure_sleef_backend_simple(a, backend_a, &temp_a); - Sleef_quad *sleef_b = ensure_sleef_backend_simple(b, backend_b, &temp_b); - - if (!sleef_a || !sleef_b) { - free(temp_a); - free(temp_b); - return nullptr; - } - - QuadBackendType result_backend = BACKEND_SLEEF; - if (backend_a == BACKEND_LONGDOUBLE && backend_b == BACKEND_LONGDOUBLE) { - result_backend = BACKEND_LONGDOUBLE; - } - - npy_intp result_dims[2] = {m, n}; - QuadPrecDTypeObject *result_dtype = new_quaddtype_instance(result_backend); - if (!result_dtype) { - free(temp_a); - free(temp_b); - return nullptr; - } - PyArrayObject *result = - (PyArrayObject *)PyArray_Empty(2, result_dims, (PyArray_Descr *)result_dtype, 0); - if (!result) { - free(temp_a); - free(temp_b); - Py_DECREF(result_dtype); - return nullptr; - } + // For now, we only support no transpose + // TODO: Implement transpose support if needed + if ((transa != 'N' && transa != 'n') || (transb != 'N' && transb != 'n')) { + return -1; // Transpose not implemented yet + } - Sleef_quad *result_data = (Sleef_quad *)PyArray_DATA(result); + QuadBLAS::gemm(qblas_layout, m, n, k, *alpha, A, lda, B, ldb, *beta, C, ldc); - // Initialize result matrix to zero - for (npy_intp i = 0; i < m * n; i++) { - result_data[i] = Sleef_cast_from_doubleq1(0.0); + return 0; } - - // Simple matrix-matrix multiplication: C[i,j] = sum(A[i,l] * B[l,j]) - for (npy_intp i = 0; i < m; i++) { - for (npy_intp j = 0; j < n; j++) { - Sleef_quad sum = Sleef_cast_from_doubleq1(0.0); - for (npy_intp l = 0; l < k; l++) { - // Row-major: A[i,l] = sleef_a[i*k + l], B[l,j] = sleef_b[l*n + j] - sum = Sleef_fmaq1_u05(sleef_a[i * k + l], sleef_b[l * n + j], sum); - } - result_data[i * n + j] = sum; - } + catch (...) { + return -1; } +} - // Convert to longdouble if needed - if (result_backend == BACKEND_LONGDOUBLE) { - long double *ld_result = (long double *)PyArray_DATA(result); - for (npy_intp i = 0; i < m * n; i++) { - ld_result[i] = (long double)Sleef_cast_to_doubleq1(result_data[i]); - } - } - - free(temp_a); - free(temp_b); - - return (PyObject *)result; +int +qblas_supports_backend(QuadBackendType backend) +{ + // QBLAS only supports SLEEF backend + return (backend == BACKEND_SLEEF) ? 1 : 0; } PyObject * -py_quadblas_dot(PyObject *self, PyObject *args) +py_quadblas_set_num_threads(PyObject *self, PyObject *args) { - PyObject *a_obj, *b_obj; - - if (!PyArg_ParseTuple(args, "OO", &a_obj, &b_obj)) { - return nullptr; - } - - PyArrayObject *a = (PyArrayObject *)PyArray_FROM_OF(a_obj, NPY_ARRAY_ALIGNED); - PyArrayObject *b = (PyArrayObject *)PyArray_FROM_OF(b_obj, NPY_ARRAY_ALIGNED); - - if (!a || !b) { - Py_XDECREF(a); - Py_XDECREF(b); - PyErr_SetString(PyExc_TypeError, "Inputs must be convertible to arrays"); - return nullptr; + int num_threads; + if (!PyArg_ParseTuple(args, "i", &num_threads)) { + return NULL; } - PyObject *result = nullptr; - - int ndim_a = PyArray_NDIM(a); - int ndim_b = PyArray_NDIM(b); - - if (ndim_a == 1 && ndim_b == 1) { - result = dot_vector_vector_fallback(a, b); - } - else if (ndim_a == 2 && ndim_b == 1) { - result = dot_matrix_vector_fallback(a, b); - } - else if (ndim_a == 2 && ndim_b == 2) { - result = dot_matrix_matrix_fallback(a, b); - } - else if (ndim_a == 1 && ndim_b == 2) { - PyErr_SetString(PyExc_ValueError, - "Vector-Matrix multiplication not supported (use Matrix-Vector instead)"); - } - else { - PyErr_SetString(PyExc_ValueError, - "Unsupported array dimensions. Supported: (1D,1D), (2D,1D), (2D,2D)"); + if (num_threads <= 0) { + PyErr_SetString(PyExc_ValueError, "Number of threads must be positive"); + return NULL; } - Py_DECREF(a); - Py_DECREF(b); - - return result; -} - -// Dummy implementations for other QuadBLAS functions -PyObject * -py_quadblas_set_num_threads(PyObject *self, PyObject *args) -{ - // On Windows fallback, just ignore thread setting + QuadBLAS::set_num_threads(num_threads); Py_RETURN_NONE; } PyObject * py_quadblas_get_num_threads(PyObject *self, PyObject *args) { - // Return 1 for fallback implementation - return PyLong_FromLong(1); + int num_threads = QuadBLAS::get_num_threads(); + return PyLong_FromLong(num_threads); } PyObject * py_quadblas_get_version(PyObject *self, PyObject *args) { - return PyUnicode_FromString("QuadBLAS is disabled for MSVC"); + return PyUnicode_FromString("QuadBLAS 1.0.0 - High Performance Quad Precision BLAS"); } -#else - -static QuadBLAS::Layout -get_quadblas_layout(PyArrayObject *arr) +int +_quadblas_set_num_threads(int num_threads) { - if (PyArray_IS_C_CONTIGUOUS(arr)) { - return QuadBLAS::Layout::RowMajor; - } - else { - return QuadBLAS::Layout::ColMajor; - } + QuadBLAS::set_num_threads(num_threads); + return 0; } -static bool -extract_quad_array_info(PyArrayObject *arr, Sleef_quad **data, QuadBackendType *backend, - QuadBLAS::Layout *layout) +int +_quadblas_get_num_threads(void) { - if (!PyArray_Check(arr)) { - PyErr_SetString(PyExc_TypeError, "Expected numpy array"); - return false; - } - - PyArray_Descr *descr = PyArray_DESCR(arr); - if (!PyObject_TypeCheck(descr, (PyTypeObject *)&QuadPrecDType)) { - PyErr_SetString(PyExc_TypeError, "Array must have QuadPrecDType dtype"); - return false; - } - - QuadPrecDTypeObject *quad_descr = (QuadPrecDTypeObject *)descr; - *backend = quad_descr->backend; - *data = (Sleef_quad *)PyArray_DATA(arr); - *layout = get_quadblas_layout(arr); - - return true; + int num_threads = QuadBLAS::get_num_threads(); + return num_threads; } -static Sleef_quad * -ensure_sleef_backend(PyArrayObject *arr, QuadBackendType original_backend, - Sleef_quad **temp_storage) -{ - if (original_backend == BACKEND_SLEEF) { - *temp_storage = nullptr; - return (Sleef_quad *)PyArray_DATA(arr); - } +#else // DISABLE_QUADBLAS - npy_intp size = PyArray_SIZE(arr); - *temp_storage = QuadBLAS::aligned_alloc(size); - if (!*temp_storage) { - PyErr_NoMemory(); - return nullptr; - } - long double *ld_data = (long double *)PyArray_DATA(arr); - for (npy_intp i = 0; i < size; i++) { - (*temp_storage)[i] = Sleef_cast_from_doubleq1((double)ld_data[i]); - } - - return *temp_storage; -} - -static PyObject * -dot_vector_vector(PyArrayObject *a, PyArrayObject *b) +int +qblas_dot(size_t n, Sleef_quad *x, size_t incx, Sleef_quad *y, size_t incy, Sleef_quad *result) { - if (PyArray_NDIM(a) != 1 || PyArray_NDIM(b) != 1) { - PyErr_SetString(PyExc_ValueError, "Both inputs must be 1-dimensional arrays"); - return nullptr; - } - - npy_intp n_a = PyArray_DIM(a, 0); - npy_intp n_b = PyArray_DIM(b, 0); - - if (n_a != n_b) { - PyErr_SetString(PyExc_ValueError, "Arrays must have the same length"); - return nullptr; - } - - Sleef_quad *data_a, *data_b; - QuadBackendType backend_a, backend_b; - QuadBLAS::Layout layout_a, layout_b; - - if (!extract_quad_array_info(a, &data_a, &backend_a, &layout_a) || - !extract_quad_array_info(b, &data_b, &backend_b, &layout_b)) { - return nullptr; - } - - Sleef_quad *temp_a = nullptr, *temp_b = nullptr; - Sleef_quad *sleef_a = ensure_sleef_backend(a, backend_a, &temp_a); - Sleef_quad *sleef_b = ensure_sleef_backend(b, backend_b, &temp_b); - - if (!sleef_a || !sleef_b) { - QuadBLAS::aligned_free(temp_a); - QuadBLAS::aligned_free(temp_b); - return nullptr; - } - - npy_intp stride_a = PyArray_STRIDE(a, 0) / PyArray_ITEMSIZE(a); - npy_intp stride_b = PyArray_STRIDE(b, 0) / PyArray_ITEMSIZE(b); - - Sleef_quad result = QuadBLAS::dot(n_a, sleef_a, stride_a, sleef_b, stride_b); - - QuadBLAS::aligned_free(temp_a); - QuadBLAS::aligned_free(temp_b); - - QuadBackendType result_backend = BACKEND_SLEEF; - if (backend_a == BACKEND_LONGDOUBLE && backend_b == BACKEND_LONGDOUBLE) { - result_backend = BACKEND_LONGDOUBLE; - } - - QuadPrecisionObject *result_obj = QuadPrecision_raw_new(result_backend); - if (!result_obj) { - return nullptr; - } - - if (result_backend == BACKEND_SLEEF) { - result_obj->value.sleef_value = result; - } - else { - result_obj->value.longdouble_value = (long double)Sleef_cast_to_doubleq1(result); - } - - return (PyObject *)result_obj; + return -1; // QBLAS is disabled, dot product not available } -static PyObject * -dot_matrix_vector(PyArrayObject *a, PyArrayObject *b) +int +qblas_gemv(char layout, char trans, size_t m, size_t n, Sleef_quad *alpha, Sleef_quad *A, + size_t lda, Sleef_quad *x, size_t incx, Sleef_quad *beta, Sleef_quad *y, size_t incy) { - if (PyArray_NDIM(a) != 2 || PyArray_NDIM(b) != 1) { - PyErr_SetString(PyExc_ValueError, "First input must be 2D, second input must be 1D"); - return nullptr; - } - - npy_intp m = PyArray_DIM(a, 0); - npy_intp n = PyArray_DIM(a, 1); - npy_intp n_b = PyArray_DIM(b, 0); - - if (n != n_b) { - PyErr_SetString(PyExc_ValueError, "Matrix columns must match vector length"); - return nullptr; - } - - Sleef_quad *data_a, *data_b; - QuadBackendType backend_a, backend_b; - QuadBLAS::Layout layout_a, layout_b; - - if (!extract_quad_array_info(a, &data_a, &backend_a, &layout_a) || - !extract_quad_array_info(b, &data_b, &backend_b, &layout_b)) { - return nullptr; - } - - Sleef_quad *temp_a = nullptr, *temp_b = nullptr; - Sleef_quad *sleef_a = ensure_sleef_backend(a, backend_a, &temp_a); - Sleef_quad *sleef_b = ensure_sleef_backend(b, backend_b, &temp_b); - - if (!sleef_a || !sleef_b) { - QuadBLAS::aligned_free(temp_a); - QuadBLAS::aligned_free(temp_b); - return nullptr; - } - - QuadBackendType result_backend = BACKEND_SLEEF; - if (backend_a == BACKEND_LONGDOUBLE && backend_b == BACKEND_LONGDOUBLE) { - result_backend = BACKEND_LONGDOUBLE; - } - - npy_intp result_dims[1] = {m}; - QuadPrecDTypeObject *result_dtype = new_quaddtype_instance(result_backend); - if (!result_dtype) { - QuadBLAS::aligned_free(temp_a); - QuadBLAS::aligned_free(temp_b); - return nullptr; - } - - PyArrayObject *result = - (PyArrayObject *)PyArray_Empty(1, result_dims, (PyArray_Descr *)result_dtype, 0); - if (!result) { - QuadBLAS::aligned_free(temp_a); - QuadBLAS::aligned_free(temp_b); - Py_DECREF(result_dtype); - return nullptr; - } - - Sleef_quad *result_data = (Sleef_quad *)PyArray_DATA(result); - - npy_intp lda; - if (layout_a == QuadBLAS::Layout::RowMajor) { - lda = n; - } - else { - lda = m; - } - - npy_intp stride_b = PyArray_STRIDE(b, 0) / PyArray_ITEMSIZE(b); - npy_intp stride_result = PyArray_STRIDE(result, 0) / PyArray_ITEMSIZE(result); - - Sleef_quad alpha = Sleef_cast_from_doubleq1(1.0); - Sleef_quad beta = Sleef_cast_from_doubleq1(0.0); - - QuadBLAS::gemv(layout_a, m, n, alpha, sleef_a, lda, sleef_b, stride_b, beta, result_data, - stride_result); - - if (result_backend == BACKEND_LONGDOUBLE) { - long double *ld_result = (long double *)PyArray_DATA(result); - for (npy_intp i = 0; i < m; i++) { - ld_result[i] = (long double)Sleef_cast_to_doubleq1(result_data[i]); - } - } - - QuadBLAS::aligned_free(temp_a); - QuadBLAS::aligned_free(temp_b); - - return (PyObject *)result; + return -1; // QBLAS is disabled, GEMV not available } -static PyObject * -dot_matrix_matrix(PyArrayObject *a, PyArrayObject *b) +int +qblas_gemm(char layout, char transa, char transb, size_t m, size_t n, size_t k, Sleef_quad *alpha, + Sleef_quad *A, size_t lda, Sleef_quad *B, size_t ldb, Sleef_quad *beta, Sleef_quad *C, + size_t ldc) { - if (PyArray_NDIM(a) != 2 || PyArray_NDIM(b) != 2) { - PyErr_SetString(PyExc_ValueError, "Both inputs must be 2-dimensional arrays"); - return nullptr; - } - - npy_intp m = PyArray_DIM(a, 0); - npy_intp k = PyArray_DIM(a, 1); - npy_intp k_b = PyArray_DIM(b, 0); - npy_intp n = PyArray_DIM(b, 1); - - if (k != k_b) { - PyErr_SetString(PyExc_ValueError, "Matrix inner dimensions must match"); - return nullptr; - } - - Sleef_quad *data_a, *data_b; - QuadBackendType backend_a, backend_b; - QuadBLAS::Layout layout_a, layout_b; - - if (!extract_quad_array_info(a, &data_a, &backend_a, &layout_a) || - !extract_quad_array_info(b, &data_b, &backend_b, &layout_b)) { - return nullptr; - } - - Sleef_quad *temp_a = nullptr, *temp_b = nullptr; - Sleef_quad *sleef_a = ensure_sleef_backend(a, backend_a, &temp_a); - Sleef_quad *sleef_b = ensure_sleef_backend(b, backend_b, &temp_b); - - if (!sleef_a || !sleef_b) { - QuadBLAS::aligned_free(temp_a); - QuadBLAS::aligned_free(temp_b); - return nullptr; - } - - QuadBackendType result_backend = BACKEND_SLEEF; - if (backend_a == BACKEND_LONGDOUBLE && backend_b == BACKEND_LONGDOUBLE) { - result_backend = BACKEND_LONGDOUBLE; - } - - npy_intp result_dims[2] = {m, n}; - QuadPrecDTypeObject *result_dtype = new_quaddtype_instance(result_backend); - if (!result_dtype) { - QuadBLAS::aligned_free(temp_a); - QuadBLAS::aligned_free(temp_b); - return nullptr; - } - - PyArrayObject *result = - (PyArrayObject *)PyArray_Empty(2, result_dims, (PyArray_Descr *)result_dtype, 0); - if (!result) { - QuadBLAS::aligned_free(temp_a); - QuadBLAS::aligned_free(temp_b); - Py_DECREF(result_dtype); - return nullptr; - } - - Sleef_quad *result_data = (Sleef_quad *)PyArray_DATA(result); - for (npy_intp i = 0; i < m * n; i++) { - result_data[i] = Sleef_cast_from_doubleq1(0.0); - } - - npy_intp lda, ldb, ldc; - - if (layout_a == QuadBLAS::Layout::RowMajor) { - lda = k; - } - else { - lda = m; - } - - if (layout_b == QuadBLAS::Layout::RowMajor) { - ldb = n; - } - else { - ldb = k; - } - - QuadBLAS::Layout result_layout = layout_a; - if (result_layout == QuadBLAS::Layout::RowMajor) { - ldc = n; - } - else { - ldc = m; - } - - Sleef_quad alpha = Sleef_cast_from_doubleq1(1.0); - Sleef_quad beta = Sleef_cast_from_doubleq1(0.0); - - QuadBLAS::gemm(result_layout, m, n, k, alpha, sleef_a, lda, sleef_b, ldb, beta, result_data, - ldc); - - if (result_backend == BACKEND_LONGDOUBLE) { - long double *ld_result = (long double *)PyArray_DATA(result); - for (npy_intp i = 0; i < m * n; i++) { - ld_result[i] = (long double)Sleef_cast_to_doubleq1(result_data[i]); - } - } - - QuadBLAS::aligned_free(temp_a); - QuadBLAS::aligned_free(temp_b); - - return (PyObject *)result; + return -1; // QBLAS is disabled, GEMM not available } -PyObject * -py_quadblas_dot(PyObject *self, PyObject *args) +int +qblas_supports_backend(QuadBackendType backend) { - PyObject *a_obj, *b_obj; - - if (!PyArg_ParseTuple(args, "OO", &a_obj, &b_obj)) { - return nullptr; - } - - PyArrayObject *a = (PyArrayObject *)PyArray_FROM_OF(a_obj, NPY_ARRAY_ALIGNED); - PyArrayObject *b = (PyArrayObject *)PyArray_FROM_OF(b_obj, NPY_ARRAY_ALIGNED); - - if (!a || !b) { - Py_XDECREF(a); - Py_XDECREF(b); - PyErr_SetString(PyExc_TypeError, "Inputs must be convertible to arrays"); - return nullptr; - } - - PyObject *result = nullptr; - - int ndim_a = PyArray_NDIM(a); - int ndim_b = PyArray_NDIM(b); - - if (ndim_a == 1 && ndim_b == 1) { - result = dot_vector_vector(a, b); - } - else if (ndim_a == 2 && ndim_b == 1) { - result = dot_matrix_vector(a, b); - } - else if (ndim_a == 2 && ndim_b == 2) { - result = dot_matrix_matrix(a, b); - } - else if (ndim_a == 1 && ndim_b == 2) { - PyErr_SetString(PyExc_ValueError, - "Vector-Matrix multiplication not supported (use Matrix-Vector instead)"); - } - else { - PyErr_SetString(PyExc_ValueError, - "Unsupported array dimensions. Supported: (1D,1D), (2D,1D), (2D,2D)"); - } - - Py_DECREF(a); - Py_DECREF(b); - - return result; + return -1; // QBLAS is disabled, backend support not available } PyObject * py_quadblas_set_num_threads(PyObject *self, PyObject *args) { - int num_threads; - - if (!PyArg_ParseTuple(args, "i", &num_threads)) { - return nullptr; - } - - if (num_threads < 1) { - PyErr_SetString(PyExc_ValueError, "Number of threads must be positive"); - return nullptr; - } - - QuadBLAS::set_num_threads(num_threads); - Py_RETURN_NONE; + // raise error + PyErr_SetString(PyExc_NotImplementedError, "QuadBLAS is disabled"); + return NULL; } PyObject * py_quadblas_get_num_threads(PyObject *self, PyObject *args) { - return PyLong_FromLong(QuadBLAS::get_num_threads()); + // raise error + PyErr_SetString(PyExc_NotImplementedError, "QuadBLAS is disabled"); + return NULL; } PyObject * py_quadblas_get_version(PyObject *self, PyObject *args) { - return PyUnicode_FromString(QuadBLAS::VERSION); + // raise error + PyErr_SetString(PyExc_NotImplementedError, "QuadBLAS is disabled"); + return NULL; } -#endif // DISABLE_QUADBLAS \ No newline at end of file +int +_quadblas_set_num_threads(int num_threads) +{ + // raise error + PyErr_SetString(PyExc_NotImplementedError, "QuadBLAS is disabled"); + return -1; +} + +int +_quadblas_get_num_threads(void) +{ + // raise error + PyErr_SetString(PyExc_NotImplementedError, "QuadBLAS is disabled"); + return -1; +} + +#endif // DISABLE_QUADBLAS + +} // extern "C" \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/quadblas_interface.h b/quaddtype/numpy_quaddtype/src/quadblas_interface.h index da8f0a85..76033ebc 100644 --- a/quaddtype/numpy_quaddtype/src/quadblas_interface.h +++ b/quaddtype/numpy_quaddtype/src/quadblas_interface.h @@ -1,23 +1,44 @@ -#ifndef _QUADDTYPE_QUADBLAS_INTERFACE_H -#define _QUADDTYPE_QUADBLAS_INTERFACE_H +#ifndef QUADBLAS_INTERFACE_H +#define QUADBLAS_INTERFACE_H + +#include +#include +#include "quad_common.h" +#include #ifdef __cplusplus extern "C" { #endif -#include +int +qblas_dot(size_t n, Sleef_quad *x, size_t incx, Sleef_quad *y, size_t incy, Sleef_quad *result); +int +qblas_gemv(char layout, char trans, size_t m, size_t n, Sleef_quad *alpha, Sleef_quad *A, + size_t lda, Sleef_quad *x, size_t incx, Sleef_quad *beta, Sleef_quad *y, size_t incy); -PyObject* py_quadblas_dot(PyObject* self, PyObject* args); +int +qblas_gemm(char layout, char transa, char transb, size_t m, size_t n, size_t k, Sleef_quad *alpha, + Sleef_quad *A, size_t lda, Sleef_quad *B, size_t ldb, Sleef_quad *beta, Sleef_quad *C, + size_t ldc); +int +qblas_supports_backend(QuadBackendType backend); -PyObject* py_quadblas_set_num_threads(PyObject* self, PyObject* args); -PyObject* py_quadblas_get_num_threads(PyObject* self, PyObject* args); +PyObject * +py_quadblas_set_num_threads(PyObject *self, PyObject *args); +PyObject * +py_quadblas_get_num_threads(PyObject *self, PyObject *args); +PyObject * +py_quadblas_get_version(PyObject *self, PyObject *args); -PyObject* py_quadblas_get_version(PyObject* self, PyObject* args); +int +_quadblas_set_num_threads(int num_threads); +int +_quadblas_get_num_threads(void); #ifdef __cplusplus } #endif -#endif \ No newline at end of file +#endif // QUADBLAS_INTERFACE_H \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c index 9e2b8430..1e8fd535 100644 --- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c +++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c @@ -14,50 +14,60 @@ #include "scalar.h" #include "dtype.h" -#include "umath.h" +#include "umath/umath.h" #include "quad_common.h" #include "quadblas_interface.h" #include "float.h" - -static PyObject* py_is_longdouble_128(PyObject* self, PyObject* args) { - if(sizeof(long double) == 16 && - LDBL_MANT_DIG == 113 && - LDBL_MAX_EXP == 16384) { +static PyObject * +py_is_longdouble_128(PyObject *self, PyObject *args) +{ + if (sizeof(long double) == 16 && LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384) { Py_RETURN_TRUE; - } else { + } + else { Py_RETURN_FALSE; } } -static PyObject* get_sleef_constant(PyObject* self, PyObject* args) { - const char* constant_name; +static PyObject * +get_sleef_constant(PyObject *self, PyObject *args) +{ + const char *constant_name; if (!PyArg_ParseTuple(args, "s", &constant_name)) { return NULL; } - QuadPrecisionObject* result = QuadPrecision_raw_new(BACKEND_SLEEF); + QuadPrecisionObject *result = QuadPrecision_raw_new(BACKEND_SLEEF); if (result == NULL) { return NULL; } if (strcmp(constant_name, "pi") == 0) { result->value.sleef_value = SLEEF_M_PIq; - } else if (strcmp(constant_name, "e") == 0) { + } + else if (strcmp(constant_name, "e") == 0) { result->value.sleef_value = SLEEF_M_Eq; - } else if (strcmp(constant_name, "log2e") == 0) { + } + else if (strcmp(constant_name, "log2e") == 0) { result->value.sleef_value = SLEEF_M_LOG2Eq; - } else if (strcmp(constant_name, "log10e") == 0) { + } + else if (strcmp(constant_name, "log10e") == 0) { result->value.sleef_value = SLEEF_M_LOG10Eq; - } else if (strcmp(constant_name, "ln2") == 0) { + } + else if (strcmp(constant_name, "ln2") == 0) { result->value.sleef_value = SLEEF_M_LN2q; - } else if (strcmp(constant_name, "ln10") == 0) { + } + else if (strcmp(constant_name, "ln10") == 0) { result->value.sleef_value = SLEEF_M_LN10q; - } else if (strcmp(constant_name, "quad_max") == 0) { + } + else if (strcmp(constant_name, "quad_max") == 0) { result->value.sleef_value = SLEEF_QUAD_MAX; - } else if (strcmp(constant_name, "quad_min") == 0) { + } + else if (strcmp(constant_name, "quad_min") == 0) { result->value.sleef_value = SLEEF_QUAD_MIN; - } else if (strcmp(constant_name, "epsilon") == 0) { + } + else if (strcmp(constant_name, "epsilon") == 0) { result->value.sleef_value = SLEEF_QUAD_EPSILON; } else { @@ -66,26 +76,23 @@ static PyObject* get_sleef_constant(PyObject* self, PyObject* args) { return NULL; } - return (PyObject*)result; + return (PyObject *)result; } static PyMethodDef module_methods[] = { - {"is_longdouble_128", py_is_longdouble_128, METH_NOARGS, "Check if long double is 128-bit"}, - {"get_sleef_constant", get_sleef_constant, METH_VARARGS, "Get Sleef constant by name"}, - {"qblas_dot", py_quadblas_dot, METH_VARARGS, "Optimized dot product using QuadBLAS"}, - {"set_num_threads", py_quadblas_set_num_threads, METH_VARARGS, "Set number of threads for QuadBLAS"}, - {"get_num_threads", py_quadblas_get_num_threads, METH_NOARGS, "Get number of threads for QuadBLAS"}, - {"get_quadblas_version", py_quadblas_get_version, METH_NOARGS, "Get QuadBLAS version"}, - {NULL, NULL, 0, NULL} -}; + {"is_longdouble_128", py_is_longdouble_128, METH_NOARGS, "Check if long double is 128-bit"}, + {"get_sleef_constant", get_sleef_constant, METH_VARARGS, "Get Sleef constant by name"}, + {"set_num_threads", py_quadblas_set_num_threads, METH_VARARGS, + "Set number of threads for QuadBLAS"}, + {"get_num_threads", py_quadblas_get_num_threads, METH_NOARGS, + "Get number of threads for QuadBLAS"}, + {"get_quadblas_version", py_quadblas_get_version, METH_NOARGS, "Get QuadBLAS version"}, + {NULL, NULL, 0, NULL}}; static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - .m_name = "_quaddtype_main", + PyModuleDef_HEAD_INIT, .m_name = "_quaddtype_main", .m_doc = "Quad (128-bit) floating point Data Type for NumPy with multiple backends", - .m_size = -1, - .m_methods = module_methods -}; + .m_size = -1, .m_methods = module_methods}; PyMODINIT_FUNC PyInit__quaddtype_main(void) diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp deleted file mode 100644 index 0058236a..00000000 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ /dev/null @@ -1,806 +0,0 @@ -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NPY_TARGET_VERSION NPY_2_0_API_VERSION -#define NO_IMPORT_ARRAY -#define NO_IMPORT_UFUNC - -extern "C" { -#include -#include - -#include "numpy/arrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/ufuncobject.h" - -#include "numpy/dtype_api.h" -} -#include "quad_common.h" -#include "scalar.h" -#include "dtype.h" -#include "umath.h" -#include "ops.hpp" - -// helper debugging function -static const char * -get_dtype_name(PyArray_DTypeMeta *dtype) -{ - if (dtype == &QuadPrecDType) { - return "QuadPrecDType"; - } - else if (dtype == &PyArray_BoolDType) { - return "BoolDType"; - } - else if (dtype == &PyArray_ByteDType) { - return "ByteDType"; - } - else if (dtype == &PyArray_UByteDType) { - return "UByteDType"; - } - else if (dtype == &PyArray_ShortDType) { - return "ShortDType"; - } - else if (dtype == &PyArray_UShortDType) { - return "UShortDType"; - } - else if (dtype == &PyArray_IntDType) { - return "IntDType"; - } - else if (dtype == &PyArray_UIntDType) { - return "UIntDType"; - } - else if (dtype == &PyArray_LongDType) { - return "LongDType"; - } - else if (dtype == &PyArray_ULongDType) { - return "ULongDType"; - } - else if (dtype == &PyArray_LongLongDType) { - return "LongLongDType"; - } - else if (dtype == &PyArray_ULongLongDType) { - return "ULongLongDType"; - } - else if (dtype == &PyArray_FloatDType) { - return "FloatDType"; - } - else if (dtype == &PyArray_DoubleDType) { - return "DoubleDType"; - } - else if (dtype == &PyArray_LongDoubleDType) { - return "LongDoubleDType"; - } - else { - return "UnknownDType"; - } -} - -static NPY_CASTING -quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], - PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], - npy_intp *NPY_UNUSED(view_offset)) -{ - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - - if (given_descrs[1] == NULL) { - Py_INCREF(given_descrs[0]); - loop_descrs[1] = given_descrs[0]; - } - else { - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; - } - - QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)given_descrs[0]; - QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)loop_descrs[1]; - - if (descr_in->backend != descr_out->backend) { - return NPY_UNSAFE_CASTING; - } - - return NPY_NO_CASTING; -} - -template -int -quad_generic_unary_op_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - npy_intp in_stride = strides[0]; - npy_intp out_stride = strides[1]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); - - quad_value in, out; - while (N--) { - memcpy(&in, in_ptr, elem_size); - if (backend == BACKEND_SLEEF) { - out.sleef_value = sleef_op(&in.sleef_value); - } - else { - out.longdouble_value = longdouble_op(&in.longdouble_value); - } - memcpy(out_ptr, &out, elem_size); - - in_ptr += in_stride; - out_ptr += out_stride; - } - return 0; -} - -template -int -quad_generic_unary_op_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - npy_intp in_stride = strides[0]; - npy_intp out_stride = strides[1]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - - while (N--) { - if (backend == BACKEND_SLEEF) { - *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in_ptr); - } - else { - *(long double *)out_ptr = longdouble_op((long double *)in_ptr); - } - in_ptr += in_stride; - out_ptr += out_stride; - } - return 0; -} - -template -int -create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) -{ - PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); - if (ufunc == NULL) { - return -1; - } - - PyArray_DTypeMeta *dtypes[2] = {&QuadPrecDType, &QuadPrecDType}; - - PyType_Slot slots[] = { - {NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors}, - {NPY_METH_strided_loop, - (void *)&quad_generic_unary_op_strided_loop_aligned}, - {NPY_METH_unaligned_strided_loop, - (void *)&quad_generic_unary_op_strided_loop_unaligned}, - {0, NULL}}; - - PyArrayMethod_Spec Spec = { - .name = "quad_unary_op", - .nin = 1, - .nout = 1, - .casting = NPY_NO_CASTING, - .flags = NPY_METH_SUPPORTS_UNALIGNED, - .dtypes = dtypes, - .slots = slots, - }; - - if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { - return -1; - } - - return 0; -} - -int -init_quad_unary_ops(PyObject *numpy) -{ - if (create_quad_unary_ufunc(numpy, "negative") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "positive") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "absolute") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "rint") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "trunc") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "floor") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "ceil") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "sqrt") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "square") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log2") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log10") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "log1p") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "exp") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "exp2") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "sin") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "cos") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "tan") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "arcsin") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "arccos") < 0) { - return -1; - } - if (create_quad_unary_ufunc(numpy, "arctan") < 0) { - return -1; - } - return 0; -} - -// Binary ufuncs - -static NPY_CASTING -quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], - PyArray_Descr *const given_descrs[], - PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) -{ - QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; - QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; - QuadBackendType target_backend; - - // Determine target backend and if casting is needed - NPY_CASTING casting = NPY_NO_CASTING; - if (descr_in1->backend != descr_in2->backend) { - target_backend = BACKEND_LONGDOUBLE; - casting = NPY_SAFE_CASTING; - } - else { - target_backend = descr_in1->backend; - } - - // Set up input descriptors, casting if necessary - for (int i = 0; i < 2; i++) { - if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { - loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); - if (!loop_descrs[i]) { - return (NPY_CASTING)-1; - } - } - else { - Py_INCREF(given_descrs[i]); - loop_descrs[i] = given_descrs[i]; - } - } - - // Set up output descriptor - if (given_descrs[2] == NULL) { - loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); - if (!loop_descrs[2]) { - return (NPY_CASTING)-1; - } - } - else { - QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2]; - if (descr_out->backend != target_backend) { - loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); - if (!loop_descrs[2]) { - return (NPY_CASTING)-1; - } - } - else { - Py_INCREF(given_descrs[2]); - loop_descrs[2] = given_descrs[2]; - } - } - return casting; -} - -template -int -quad_generic_binop_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); - - quad_value in1, in2, out; - while (N--) { - memcpy(&in1, in1_ptr, elem_size); - memcpy(&in2, in2_ptr, elem_size); - if (backend == BACKEND_SLEEF) { - out.sleef_value = sleef_op(&in1.sleef_value, &in2.sleef_value); - } - else { - out.longdouble_value = longdouble_op(&in1.longdouble_value, &in2.longdouble_value); - } - memcpy(out_ptr, &out, elem_size); - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - -template -int -quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - - while (N--) { - if (backend == BACKEND_SLEEF) { - *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr); - } - else { - *(long double *)out_ptr = longdouble_op((long double *)in1_ptr, (long double *)in2_ptr); - } - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - -static int -quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], - PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) -{ - int nin = ufunc->nin; - int nargs = ufunc->nargs; - PyArray_DTypeMeta *common = NULL; - bool has_quad = false; - - // Handle the special case for reductions - if (op_dtypes[0] == NULL) { - assert(nin == 2 && ufunc->nout == 1); /* must be reduction */ - for (int i = 0; i < 3; i++) { - Py_INCREF(op_dtypes[1]); - new_op_dtypes[i] = op_dtypes[1]; - } - return 0; - } - - // Check if any input or signature is QuadPrecision - for (int i = 0; i < nin; i++) { - if (op_dtypes[i] == &QuadPrecDType) { - has_quad = true; - } - } - - if (has_quad) { - common = &QuadPrecDType; - } - else { - for (int i = nin; i < nargs; i++) { - if (signature[i] != NULL) { - if (common == NULL) { - Py_INCREF(signature[i]); - common = signature[i]; - } - else if (common != signature[i]) { - Py_CLEAR(common); // Not homogeneous, unset common - break; - } - } - } - } - // If no common output dtype, use standard promotion for inputs - if (common == NULL) { - common = PyArray_PromoteDTypeSequence(nin, op_dtypes); - if (common == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) { - PyErr_Clear(); // Do not propagate normal promotion errors - } - - return -1; - } - } - - // Set all new_op_dtypes to the common dtype - for (int i = 0; i < nargs; i++) { - if (signature[i]) { - // If signature is specified for this argument, use it - Py_INCREF(signature[i]); - new_op_dtypes[i] = signature[i]; - } - else { - // Otherwise, use the common dtype - Py_INCREF(common); - - new_op_dtypes[i] = common; - } - } - - Py_XDECREF(common); - - return 0; -} - -template -int -create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) -{ - PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); - if (ufunc == NULL) { - return -1; - } - - PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType}; - - PyType_Slot slots[] = { - {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors}, - {NPY_METH_strided_loop, - (void *)&quad_generic_binop_strided_loop_aligned}, - {NPY_METH_unaligned_strided_loop, - (void *)&quad_generic_binop_strided_loop_unaligned}, - {0, NULL}}; - - PyArrayMethod_Spec Spec = { - .name = "quad_binop", - .nin = 2, - .nout = 1, - .casting = NPY_NO_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE), - .dtypes = dtypes, - .slots = slots, - }; - - if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { - return -1; - } - - PyObject *promoter_capsule = - PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL); - if (promoter_capsule == NULL) { - return -1; - } - - PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type); - if (DTypes == 0) { - Py_DECREF(promoter_capsule); - return -1; - } - - if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - return -1; - } - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - return 0; -} - -int -init_quad_binary_ops(PyObject *numpy) -{ - if (create_quad_binary_ufunc(numpy, "add") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "subtract") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "multiply") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "divide") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "power") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "mod") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "minimum") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "maximum") < 0) { - return -1; - } - if (create_quad_binary_ufunc(numpy, "arctan2") < 0) { - return -1; - } - return 0; -} - -// comparison functions - -static NPY_CASTING -quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], - PyArray_Descr *const given_descrs[], - PyArray_Descr *loop_descrs[], - npy_intp *NPY_UNUSED(view_offset)) -{ - QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; - QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; - QuadBackendType target_backend; - - // As dealing with different backends then cast to boolean - NPY_CASTING casting = NPY_NO_CASTING; - if (descr_in1->backend != descr_in2->backend) { - target_backend = BACKEND_LONGDOUBLE; - casting = NPY_SAFE_CASTING; - } - else { - target_backend = descr_in1->backend; - } - - // Set up input descriptors, casting if necessary - for (int i = 0; i < 2; i++) { - if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { - loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); - if (!loop_descrs[i]) { - return (NPY_CASTING)-1; - } - } - else { - Py_INCREF(given_descrs[i]); - loop_descrs[i] = given_descrs[i]; - } - } - - // Set up output descriptor - loop_descrs[2] = PyArray_DescrFromType(NPY_BOOL); - if (!loop_descrs[2]) { - return (NPY_CASTING)-1; - } - return casting; -} - -template -int -quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); - - quad_value in1, in2; - while (N--) { - memcpy(&in1, in1_ptr, elem_size); - memcpy(&in2, in2_ptr, elem_size); - npy_bool result; - - if (backend == BACKEND_SLEEF) { - result = sleef_comp(&in1.sleef_value, &in2.sleef_value); - } - else { - result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); - } - - memcpy(out_ptr, &result, sizeof(npy_bool)); - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - -template -int -quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - while (N--) { - quad_value in1 = *(quad_value *)in1_ptr; - quad_value in2 = *(quad_value *)in2_ptr; - - npy_bool result; - - if (backend == BACKEND_SLEEF) { - result = sleef_comp(&in1.sleef_value, &in2.sleef_value); - } - else { - result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); - } - - *(npy_bool *)out_ptr = result; - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - -NPY_NO_EXPORT int -comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], - PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) -{ - PyArray_DTypeMeta *new_signature[NPY_MAXARGS]; - memcpy(new_signature, signature, 3 * sizeof(PyArray_DTypeMeta *)); - new_signature[2] = NULL; - int res = quad_ufunc_promoter(ufunc, op_dtypes, new_signature, new_op_dtypes); - if (res < 0) { - return -1; - } - Py_XSETREF(new_op_dtypes[2], &PyArray_BoolDType); - return 0; -} - -template -int -create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) -{ - PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); - if (ufunc == NULL) { - return -1; - } - - PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType}; - - PyType_Slot slots[] = { - {NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors}, - {NPY_METH_strided_loop, - (void *)&quad_generic_comp_strided_loop_aligned}, - {NPY_METH_unaligned_strided_loop, - (void *)&quad_generic_comp_strided_loop}, - {0, NULL}}; - - PyArrayMethod_Spec Spec = { - .name = "quad_comp", - .nin = 2, - .nout = 1, - .casting = NPY_SAFE_CASTING, - .flags = NPY_METH_SUPPORTS_UNALIGNED, - .dtypes = dtypes, - .slots = slots, - }; - - if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { - return -1; - } - - PyObject *promoter_capsule = - PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL); - if (promoter_capsule == NULL) { - return -1; - } - - PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArray_BoolDType); - if (DTypes == 0) { - Py_DECREF(promoter_capsule); - return -1; - } - - if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - return -1; - } - Py_DECREF(promoter_capsule); - Py_DECREF(DTypes); - - return 0; -} - -int -init_quad_comps(PyObject *numpy) -{ - if (create_quad_comparison_ufunc(numpy, "equal") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "not_equal") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "less") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "less_equal") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "greater") < 0) { - return -1; - } - if (create_quad_comparison_ufunc(numpy, "greater_equal") < - 0) { - return -1; - } - - return 0; -} - -int -init_quad_umath(void) -{ - PyObject *numpy = PyImport_ImportModule("numpy"); - if (!numpy) { - PyErr_SetString(PyExc_ImportError, "Failed to import numpy module"); - return -1; - } - - if (init_quad_unary_ops(numpy) < 0) { - PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad unary operations"); - goto err; - } - - if (init_quad_binary_ops(numpy) < 0) { - PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad binary operations"); - goto err; - } - - if (init_quad_comps(numpy) < 0) { - PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad comparison operations"); - goto err; - } - - Py_DECREF(numpy); - return 0; - -err: - Py_DECREF(numpy); - return -1; -} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/binary_ops.cpp b/quaddtype/numpy_quaddtype/src/umath/binary_ops.cpp new file mode 100644 index 00000000..aa6d19ce --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/binary_ops.cpp @@ -0,0 +1,235 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + + +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" +#include "numpy/dtype_api.h" +#include "numpy/ndarraytypes.h" + +#include "../quad_common.h" +#include "../scalar.h" +#include "../dtype.h" +#include "../ops.hpp" +#include "promoters.hpp" +#include "binary_ops.h" + +static NPY_CASTING +quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], + PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) +{ + QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + QuadBackendType target_backend; + + // Determine target backend and if casting is needed + NPY_CASTING casting = NPY_NO_CASTING; + if (descr_in1->backend != descr_in2->backend) { + target_backend = BACKEND_LONGDOUBLE; + casting = NPY_SAFE_CASTING; + } + else { + target_backend = descr_in1->backend; + } + + // Set up input descriptors, casting if necessary + for (int i = 0; i < 2; i++) { + if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { + loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[i]) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[i]); + loop_descrs[i] = given_descrs[i]; + } + } + + // Set up output descriptor + if (given_descrs[2] == NULL) { + loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + } + else { + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2]; + if (descr_out->backend != target_backend) { + loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[2]); + loop_descrs[2] = given_descrs[2]; + } + } + return casting; +} + +template +int +quad_generic_binop_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in1, in2, out; + while (N--) { + memcpy(&in1, in1_ptr, elem_size); + memcpy(&in2, in2_ptr, elem_size); + if (backend == BACKEND_SLEEF) { + out.sleef_value = sleef_op(&in1.sleef_value, &in2.sleef_value); + } + else { + out.longdouble_value = longdouble_op(&in1.longdouble_value, &in2.longdouble_value); + } + memcpy(out_ptr, &out, elem_size); + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + + while (N--) { + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr); + } + else { + *(long double *)out_ptr = longdouble_op((long double *)in1_ptr, (long double *)in2_ptr); + } + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + + +template +int +create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) +{ + PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); + if (ufunc == NULL) { + return -1; + } + + PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType}; + + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors}, + {NPY_METH_strided_loop, + (void *)&quad_generic_binop_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_binop_strided_loop_unaligned}, + {0, NULL}}; + + PyArrayMethod_Spec Spec = { + .name = "quad_binop", + .nin = 2, + .nout = 1, + .casting = NPY_NO_CASTING, + .flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE), + .dtypes = dtypes, + .slots = slots, + }; + + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { + return -1; + } + + PyObject *promoter_capsule = + PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL); + if (promoter_capsule == NULL) { + return -1; + } + + PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type); + if (DTypes == 0) { + Py_DECREF(promoter_capsule); + return -1; + } + + if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + return -1; + } + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + return 0; +} + + +int +init_quad_binary_ops(PyObject *numpy) +{ + if (create_quad_binary_ufunc(numpy, "add") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "subtract") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "multiply") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "divide") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "power") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "mod") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "minimum") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "maximum") < 0) { + return -1; + } + if (create_quad_binary_ufunc(numpy, "arctan2") < 0) { + return -1; + } + return 0; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/binary_ops.h b/quaddtype/numpy_quaddtype/src/umath/binary_ops.h new file mode 100644 index 00000000..c45ec453 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/binary_ops.h @@ -0,0 +1,9 @@ +#ifndef _QUADDTYPE_BINARY_OPS_H +#define _QUADDTYPE_BINARY_OPS_H + +#include + +int +init_quad_binary_ops(PyObject *numpy); + +#endif \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/comparison_ops.cpp b/quaddtype/numpy_quaddtype/src/umath/comparison_ops.cpp new file mode 100644 index 00000000..095b6d39 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/comparison_ops.cpp @@ -0,0 +1,240 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + + +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" +#include "numpy/dtype_api.h" +#include "numpy/ndarraytypes.h" + +#include "../quad_common.h" +#include "../scalar.h" +#include "../dtype.h" +#include "umath.h" +#include "../ops.hpp" +#include "promoters.hpp" +#include "binary_ops.h" +#include "comparison_ops.h" + + +static NPY_CASTING +quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], + PyArray_Descr *loop_descrs[], + npy_intp *NPY_UNUSED(view_offset)) +{ + QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + QuadBackendType target_backend; + + // As dealing with different backends then cast to boolean + NPY_CASTING casting = NPY_NO_CASTING; + if (descr_in1->backend != descr_in2->backend) { + target_backend = BACKEND_LONGDOUBLE; + casting = NPY_SAFE_CASTING; + } + else { + target_backend = descr_in1->backend; + } + + // Set up input descriptors, casting if necessary + for (int i = 0; i < 2; i++) { + if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { + loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[i]) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[i]); + loop_descrs[i] = given_descrs[i]; + } + } + + // Set up output descriptor + loop_descrs[2] = PyArray_DescrFromType(NPY_BOOL); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + return casting; +} + +template +int +quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in1, in2; + while (N--) { + memcpy(&in1, in1_ptr, elem_size); + memcpy(&in2, in2_ptr, elem_size); + npy_bool result; + + if (backend == BACKEND_SLEEF) { + result = sleef_comp(&in1.sleef_value, &in2.sleef_value); + } + else { + result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); + } + + memcpy(out_ptr, &result, sizeof(npy_bool)); + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + while (N--) { + quad_value in1 = *(quad_value *)in1_ptr; + quad_value in2 = *(quad_value *)in2_ptr; + + npy_bool result; + + if (backend == BACKEND_SLEEF) { + result = sleef_comp(&in1.sleef_value, &in2.sleef_value); + } + else { + result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); + } + + *(npy_bool *)out_ptr = result; + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + +NPY_NO_EXPORT int +comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], + PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) +{ + PyArray_DTypeMeta *new_signature[NPY_MAXARGS]; + memcpy(new_signature, signature, 3 * sizeof(PyArray_DTypeMeta *)); + new_signature[2] = NULL; + int res = quad_ufunc_promoter(ufunc, op_dtypes, new_signature, new_op_dtypes); + if (res < 0) { + return -1; + } + Py_XSETREF(new_op_dtypes[2], &PyArray_BoolDType); + return 0; +} + +template +int +create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) +{ + PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); + if (ufunc == NULL) { + return -1; + } + + PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType}; + + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors}, + {NPY_METH_strided_loop, + (void *)&quad_generic_comp_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_comp_strided_loop}, + {0, NULL}}; + + PyArrayMethod_Spec Spec = { + .name = "quad_comp", + .nin = 2, + .nout = 1, + .casting = NPY_SAFE_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { + return -1; + } + + PyObject *promoter_capsule = + PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL); + if (promoter_capsule == NULL) { + return -1; + } + + PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArray_BoolDType); + if (DTypes == 0) { + Py_DECREF(promoter_capsule); + return -1; + } + + if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + return -1; + } + Py_DECREF(promoter_capsule); + Py_DECREF(DTypes); + + return 0; +} + +int +init_quad_comps(PyObject *numpy) +{ + if (create_quad_comparison_ufunc(numpy, "equal") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "not_equal") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "less") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "less_equal") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "greater") < 0) { + return -1; + } + if (create_quad_comparison_ufunc(numpy, "greater_equal") < + 0) { + return -1; + } + + return 0; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/comparison_ops.h b/quaddtype/numpy_quaddtype/src/umath/comparison_ops.h new file mode 100644 index 00000000..e3b8cc0e --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/comparison_ops.h @@ -0,0 +1,9 @@ +#ifndef _QUADDTYPE_COMPARISON_OPS_H +#define _QUADDTYPE_COMPARISON_OPS_H + +#include + +int +init_quad_comps(PyObject *numpy); + +#endif \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/matmul.cpp b/quaddtype/numpy_quaddtype/src/umath/matmul.cpp new file mode 100644 index 00000000..354a342d --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/matmul.cpp @@ -0,0 +1,450 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + +extern "C" { +#include +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/ufuncobject.h" +#include "numpy/dtype_api.h" +} + +#include "../quad_common.h" +#include "../scalar.h" +#include "../dtype.h" +#include "../ops.hpp" +#include "matmul.h" +#include "promoters.hpp" +#include "../quadblas_interface.h" + +static NPY_CASTING +quad_matmul_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], + npy_intp *NPY_UNUSED(view_offset)) +{ + QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + + // QBLAS only supports SLEEF backend + if (descr_in1->backend != BACKEND_SLEEF || descr_in2->backend != BACKEND_SLEEF) { + PyErr_SetString(PyExc_NotImplementedError, + "QBLAS-accelerated matmul only supports SLEEF backend. " + "Other backends are not supported with QBLAS."); + return (NPY_CASTING)-1; + } + + // Both inputs must use SLEEF backend + QuadBackendType target_backend = BACKEND_SLEEF; + NPY_CASTING casting = NPY_NO_CASTING; + + // Set up input descriptors + for (int i = 0; i < 2; i++) { + Py_INCREF(given_descrs[i]); + loop_descrs[i] = given_descrs[i]; + } + + // Set up output descriptor + if (given_descrs[2] == NULL) { + loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + } + else { + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2]; + if (descr_out->backend != target_backend) { + PyErr_SetString(PyExc_NotImplementedError, + "QBLAS-accelerated matmul only supports SLEEF backend for output."); + return (NPY_CASTING)-1; + } + else { + Py_INCREF(given_descrs[2]); + loop_descrs[2] = given_descrs[2]; + } + } + return casting; +} + +enum MatmulOperationType { + MATMUL_DOT, + MATMUL_GEMV, + MATMUL_GEMM +}; + +static MatmulOperationType +determine_operation_type(npy_intp m, npy_intp n, npy_intp p) +{ + if (m == 1 && p == 1) { + return MATMUL_DOT; + } + else if (p == 1) { + return MATMUL_GEMV; + } + else { + return MATMUL_GEMM; + } +} + +static int +quad_matmul_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + // Extract dimensions + npy_intp N = dimensions[0]; // Batch size, this remains always 1 for matmul afaik + npy_intp m = dimensions[1]; // Rows of first matrix + npy_intp n = dimensions[2]; // Cols of first matrix / rows of second matrix + npy_intp p = dimensions[3]; // Cols of second matrix + + // batch strides + npy_intp A_stride = strides[0]; + npy_intp B_stride = strides[1]; + npy_intp C_stride = strides[2]; + + // core strides for matrix dimensions + npy_intp A_row_stride = strides[3]; + npy_intp A_col_stride = strides[4]; + npy_intp B_row_stride = strides[5]; + npy_intp B_col_stride = strides[6]; + npy_intp C_row_stride = strides[7]; + npy_intp C_col_stride = strides[8]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + if (descr->backend != BACKEND_SLEEF) { + PyErr_SetString(PyExc_RuntimeError, "Internal error: non-SLEEF backend in QBLAS matmul"); + return -1; + } + + MatmulOperationType op_type = determine_operation_type(m, n, p); + Sleef_quad alpha = Sleef_cast_from_doubleq1(1.0); + Sleef_quad beta = Sleef_cast_from_doubleq1(0.0); + + char *A = data[0]; + char *B = data[1]; + char *C = data[2]; + + Sleef_quad *A_ptr = (Sleef_quad *)A; + Sleef_quad *B_ptr = (Sleef_quad *)B; + Sleef_quad *C_ptr = (Sleef_quad *)C; + + int result = -1; + + switch (op_type) { + case MATMUL_DOT: { + size_t incx = A_col_stride / sizeof(Sleef_quad); + size_t incy = B_row_stride / sizeof(Sleef_quad); + + result = qblas_dot(n, A_ptr, incx, B_ptr, incy, C_ptr); + break; + } + + case MATMUL_GEMV: { + size_t lda = A_row_stride / sizeof(Sleef_quad); + size_t incx = B_row_stride / sizeof(Sleef_quad); + size_t incy = C_row_stride / sizeof(Sleef_quad); + + memset(C_ptr, 0, m * p * sizeof(Sleef_quad)); + + result = + qblas_gemv('R', 'N', m, n, &alpha, A_ptr, lda, B_ptr, incx, &beta, C_ptr, incy); + break; + } + + case MATMUL_GEMM: { + size_t lda = A_row_stride / sizeof(Sleef_quad); + size_t ldb = B_row_stride / sizeof(Sleef_quad); + size_t ldc_numpy = C_row_stride / sizeof(Sleef_quad); + + memset(C_ptr, 0, m * p * sizeof(Sleef_quad)); + + size_t ldc_temp = p; + + result = qblas_gemm('R', 'N', 'N', m, p, n, &alpha, A_ptr, lda, B_ptr, ldb, &beta, + C_ptr, ldc_numpy); + break; + } + } + + if (result != 0) { + PyErr_SetString(PyExc_RuntimeError, "QBLAS operation failed"); + return -1; + } + + return 0; +} + +static int +quad_matmul_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + // Extract dimensions + npy_intp N = dimensions[0]; // Batch size, this remains always 1 for matmul afaik + npy_intp m = dimensions[1]; // Rows of first matrix + npy_intp n = dimensions[2]; // Cols of first matrix / rows of second matrix + npy_intp p = dimensions[3]; // Cols of second matrix + + // batch strides + npy_intp A_stride = strides[0]; + npy_intp B_stride = strides[1]; + npy_intp C_stride = strides[2]; + + // core strides for matrix dimensions + npy_intp A_row_stride = strides[3]; + npy_intp A_col_stride = strides[4]; + npy_intp B_row_stride = strides[5]; + npy_intp B_col_stride = strides[6]; + npy_intp C_row_stride = strides[7]; + npy_intp C_col_stride = strides[8]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + if (descr->backend != BACKEND_SLEEF) { + PyErr_SetString(PyExc_RuntimeError, "Internal error: non-SLEEF backend in QBLAS matmul"); + return -1; + } + + MatmulOperationType op_type = determine_operation_type(m, n, p); + Sleef_quad alpha = Sleef_cast_from_doubleq1(1.0); + Sleef_quad beta = Sleef_cast_from_doubleq1(0.0); + + char *A = data[0]; + char *B = data[1]; + char *C = data[2]; + + Sleef_quad *A_ptr = (Sleef_quad *)A; + Sleef_quad *B_ptr = (Sleef_quad *)B; + Sleef_quad *C_ptr = (Sleef_quad *)C; + + int result = -1; + + switch (op_type) { + case MATMUL_DOT: { + Sleef_quad *temp_A_buffer = new Sleef_quad[n]; + Sleef_quad *temp_B_buffer = new Sleef_quad[n]; + + memcpy(temp_A_buffer, A_ptr, n * sizeof(Sleef_quad)); + memcpy(temp_B_buffer, B_ptr, n * sizeof(Sleef_quad)); + + size_t incx = 1; + size_t incy = 1; + + result = qblas_dot(n, temp_A_buffer, incx, temp_B_buffer, incy, C_ptr); + + delete[] temp_A_buffer; + delete[] temp_B_buffer; + break; + } + + case MATMUL_GEMV: { + size_t lda = A_row_stride / sizeof(Sleef_quad); + size_t incx = B_row_stride / sizeof(Sleef_quad); + size_t incy = C_row_stride / sizeof(Sleef_quad); + + Sleef_quad *temp_A_buffer = new Sleef_quad[m * n]; + Sleef_quad *temp_B_buffer = new Sleef_quad[n * p]; + memcpy(temp_A_buffer, A_ptr, m * n * sizeof(Sleef_quad)); + memcpy(temp_B_buffer, B_ptr, n * p * sizeof(Sleef_quad)); + A_ptr = temp_A_buffer; + B_ptr = temp_B_buffer; + + // Use temp_C_buffer to avoid unaligned writes + Sleef_quad *temp_C_buffer = new Sleef_quad[m * p]; + + lda = n; + incx = 1; + incy = 1; + + memset(temp_C_buffer, 0, m * p * sizeof(Sleef_quad)); + + result = qblas_gemv('R', 'N', m, n, &alpha, A_ptr, lda, B_ptr, incx, &beta, + temp_C_buffer, incy); + break; + } + + case MATMUL_GEMM: { + size_t lda = A_row_stride / sizeof(Sleef_quad); + size_t ldb = B_row_stride / sizeof(Sleef_quad); + size_t ldc_numpy = C_row_stride / sizeof(Sleef_quad); + + Sleef_quad *temp_A_buffer = new Sleef_quad[m * n]; + Sleef_quad *temp_B_buffer = new Sleef_quad[n * p]; + memcpy(temp_A_buffer, A_ptr, m * n * sizeof(Sleef_quad)); + memcpy(temp_B_buffer, B_ptr, n * p * sizeof(Sleef_quad)); + A_ptr = temp_A_buffer; + B_ptr = temp_B_buffer; + + // since these are now contiguous so, + lda = n; + ldb = p; + size_t ldc_temp = p; + + Sleef_quad *temp_C_buffer = new Sleef_quad[m * p]; + memset(temp_C_buffer, 0, m * p * sizeof(Sleef_quad)); + + result = qblas_gemm('R', 'N', 'N', m, p, n, &alpha, A_ptr, lda, B_ptr, ldb, &beta, + temp_C_buffer, ldc_temp); + + if (result == 0) { + memcpy(C_ptr, temp_C_buffer, m * p * sizeof(Sleef_quad)); + } + + delete[] temp_C_buffer; + delete[] temp_A_buffer; + delete[] temp_B_buffer; + break; + } + } + + if (result != 0) { + PyErr_SetString(PyExc_RuntimeError, "QBLAS operation failed"); + return -1; + } + + return 0; +} + +static int +naive_matmul_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + npy_intp m = dimensions[1]; + npy_intp n = dimensions[2]; + npy_intp p = dimensions[3]; + + npy_intp A_batch_stride = strides[0]; + npy_intp B_stride = strides[1]; + npy_intp C_stride = strides[2]; + + npy_intp A_row_stride = strides[3]; + npy_intp A_col_stride = strides[4]; + npy_intp B_row_stride = strides[5]; + npy_intp B_col_stride = strides[6]; + npy_intp C_row_stride = strides[7]; + npy_intp C_col_stride = strides[8]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + char *A = data[0]; + char *B = data[1]; + char *C = data[2]; + + for (npy_intp i = 0; i < m; i++) { + for (npy_intp j = 0; j < p; j++) { + char *C_ij = C + i * C_row_stride + j * C_col_stride; + + if (backend == BACKEND_SLEEF) { + Sleef_quad sum = Sleef_cast_from_doubleq1(0.0); + + for (npy_intp k = 0; k < n; k++) { + char *A_ik = A + i * A_row_stride + k * A_col_stride; + char *B_kj = B + k * B_row_stride + j * B_col_stride; + + Sleef_quad a_val, b_val; + memcpy(&a_val, A_ik, sizeof(Sleef_quad)); + memcpy(&b_val, B_kj, sizeof(Sleef_quad)); + sum = Sleef_fmaq1_u05(a_val, b_val, sum); + } + + memcpy(C_ij, &sum, sizeof(Sleef_quad)); + } + else { + long double sum = 0.0L; + + for (npy_intp k = 0; k < n; k++) { + char *A_ik = A + i * A_row_stride + k * A_col_stride; + char *B_kj = B + k * B_row_stride + j * B_col_stride; + + long double a_val, b_val; + memcpy(&a_val, A_ik, sizeof(long double)); + memcpy(&b_val, B_kj, sizeof(long double)); + + sum += a_val * b_val; + } + + memcpy(C_ij, &sum, sizeof(long double)); + } + } + } + + return 0; +} + +int +init_matmul_ops(PyObject *numpy) +{ + PyObject *ufunc = PyObject_GetAttrString(numpy, "matmul"); + if (ufunc == NULL) { + return -1; + } + + PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &QuadPrecDType}; + +#ifndef DISABLE_QUADBLAS + // set threading to max + int num_threads = _quadblas_get_num_threads(); + _quadblas_set_num_threads(num_threads); + + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_matmul_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&quad_matmul_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_matmul_strided_loop_unaligned}, + {0, NULL}}; +#else + PyType_Slot slots[] = {{NPY_METH_resolve_descriptors, (void *)&quad_matmul_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&naive_matmul_strided_loop}, + {NPY_METH_unaligned_strided_loop, (void *)&naive_matmul_strided_loop}, + {0, NULL}}; +#endif // DISABLE_QUADBLAS + + PyArrayMethod_Spec Spec = { + .name = "quad_matmul_qblas", + .nin = 2, + .nout = 1, + .casting = NPY_NO_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { + Py_DECREF(ufunc); + return -1; + } + + PyObject *promoter_capsule = + PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL); + if (promoter_capsule == NULL) { + Py_DECREF(ufunc); + return -1; + } + + PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type); + if (DTypes == NULL) { + Py_DECREF(promoter_capsule); + Py_DECREF(ufunc); + return -1; + } + + if (PyUFunc_AddPromoter(ufunc, DTypes, promoter_capsule) < 0) { + PyErr_Clear(); + } + else { + } + + Py_DECREF(DTypes); + Py_DECREF(promoter_capsule); + Py_DECREF(ufunc); + + return 0; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/matmul.h b/quaddtype/numpy_quaddtype/src/umath/matmul.h new file mode 100644 index 00000000..12858497 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/matmul.h @@ -0,0 +1,17 @@ +#ifndef _QUADDTYPE_MATMUL_H +#define _QUADDTYPE_MATMUL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int +init_matmul_ops(PyObject *numpy); + +#ifdef __cplusplus +} +#endif + +#endif // _QUADDTYPE_MATMUL_H \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/promoters.hpp b/quaddtype/numpy_quaddtype/src/umath/promoters.hpp new file mode 100644 index 00000000..3b3c1ef3 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/promoters.hpp @@ -0,0 +1,90 @@ +#ifndef _QUADDTYPE_PROMOTERS +#define _QUADDTYPE_PROMOTERS + +#include +#include +#include +#include "numpy/arrayobject.h" +#include "numpy/ndarrayobject.h" +#include "numpy/ufuncobject.h" +#include "numpy/dtype_api.h" + +#include "../dtype.h" + +inline int +quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], + PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) +{ + int nin = ufunc->nin; + int nargs = ufunc->nargs; + PyArray_DTypeMeta *common = NULL; + bool has_quad = false; + + // Handle the special case for reductions + if (op_dtypes[0] == NULL) { + assert(nin == 2 && ufunc->nout == 1); /* must be reduction */ + for (int i = 0; i < 3; i++) { + Py_INCREF(op_dtypes[1]); + new_op_dtypes[i] = op_dtypes[1]; + } + return 0; + } + + // Check if any input or signature is QuadPrecision + for (int i = 0; i < nin; i++) { + if (op_dtypes[i] == &QuadPrecDType) { + has_quad = true; + } + } + + if (has_quad) { + common = &QuadPrecDType; + } + else { + for (int i = nin; i < nargs; i++) { + if (signature[i] != NULL) { + if (common == NULL) { + Py_INCREF(signature[i]); + common = signature[i]; + } + else if (common != signature[i]) { + Py_CLEAR(common); // Not homogeneous, unset common + break; + } + } + } + } + // If no common output dtype, use standard promotion for inputs + if (common == NULL) { + common = PyArray_PromoteDTypeSequence(nin, op_dtypes); + if (common == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); // Do not propagate normal promotion errors + } + + return -1; + } + } + + // Set all new_op_dtypes to the common dtype + for (int i = 0; i < nargs; i++) { + if (signature[i]) { + // If signature is specified for this argument, use it + Py_INCREF(signature[i]); + new_op_dtypes[i] = signature[i]; + } + else { + // Otherwise, use the common dtype + Py_INCREF(common); + + new_op_dtypes[i] = common; + } + } + + Py_XDECREF(common); + + return 0; +} + + +#endif \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/umath.cpp b/quaddtype/numpy_quaddtype/src/umath/umath.cpp new file mode 100644 index 00000000..50f95623 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/umath.cpp @@ -0,0 +1,116 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + +extern "C" { +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/ufuncobject.h" +#include "numpy/dtype_api.h" +} +#include "../quad_common.h" +#include "../scalar.h" +#include "../dtype.h" +#include "umath.h" +#include "../ops.hpp" +#include "unary_ops.h" +#include "binary_ops.h" +#include "comparison_ops.h" +#include "matmul.h" + +// helper debugging function +static const char * +get_dtype_name(PyArray_DTypeMeta *dtype) +{ + if (dtype == &QuadPrecDType) { + return "QuadPrecDType"; + } + else if (dtype == &PyArray_BoolDType) { + return "BoolDType"; + } + else if (dtype == &PyArray_ByteDType) { + return "ByteDType"; + } + else if (dtype == &PyArray_UByteDType) { + return "UByteDType"; + } + else if (dtype == &PyArray_ShortDType) { + return "ShortDType"; + } + else if (dtype == &PyArray_UShortDType) { + return "UShortDType"; + } + else if (dtype == &PyArray_IntDType) { + return "IntDType"; + } + else if (dtype == &PyArray_UIntDType) { + return "UIntDType"; + } + else if (dtype == &PyArray_LongDType) { + return "LongDType"; + } + else if (dtype == &PyArray_ULongDType) { + return "ULongDType"; + } + else if (dtype == &PyArray_LongLongDType) { + return "LongLongDType"; + } + else if (dtype == &PyArray_ULongLongDType) { + return "ULongLongDType"; + } + else if (dtype == &PyArray_FloatDType) { + return "FloatDType"; + } + else if (dtype == &PyArray_DoubleDType) { + return "DoubleDType"; + } + else if (dtype == &PyArray_LongDoubleDType) { + return "LongDoubleDType"; + } + else { + return "UnknownDType"; + } +} + +int +init_quad_umath(void) +{ + PyObject *numpy = PyImport_ImportModule("numpy"); + if (!numpy) { + PyErr_SetString(PyExc_ImportError, "Failed to import numpy module"); + return -1; + } + + if (init_quad_unary_ops(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad unary operations"); + goto err; + } + + if (init_quad_binary_ops(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad binary operations"); + goto err; + } + + if (init_quad_comps(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad comparison operations"); + goto err; + } + + if (init_matmul_ops(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad matrix multiplication operations"); + goto err; + } + + Py_DECREF(numpy); + return 0; + +err: + Py_DECREF(numpy); + return -1; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath.h b/quaddtype/numpy_quaddtype/src/umath/umath.h similarity index 95% rename from quaddtype/numpy_quaddtype/src/umath.h rename to quaddtype/numpy_quaddtype/src/umath/umath.h index d64f26be..c9253ef7 100644 --- a/quaddtype/numpy_quaddtype/src/umath.h +++ b/quaddtype/numpy_quaddtype/src/umath/umath.h @@ -12,4 +12,4 @@ init_quad_umath(void); } #endif -#endif +#endif \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/unary_ops.cpp b/quaddtype/numpy_quaddtype/src/umath/unary_ops.cpp new file mode 100644 index 00000000..4c8f31f0 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/unary_ops.cpp @@ -0,0 +1,214 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + +extern "C" { +#include +#include + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/ufuncobject.h" + +#include "numpy/dtype_api.h" +} +#include "../quad_common.h" +#include "../scalar.h" +#include "../dtype.h" +#include "../ops.hpp" + +static NPY_CASTING +quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], + npy_intp *NPY_UNUSED(view_offset)) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + Py_INCREF(given_descrs[0]); + loop_descrs[1] = given_descrs[0]; + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)loop_descrs[1]; + + if (descr_in->backend != descr_out->backend) { + return NPY_UNSAFE_CASTING; + } + + return NPY_NO_CASTING; +} + +template +int +quad_generic_unary_op_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in, out; + while (N--) { + memcpy(&in, in_ptr, elem_size); + if (backend == BACKEND_SLEEF) { + out.sleef_value = sleef_op(&in.sleef_value); + } + else { + out.longdouble_value = longdouble_op(&in.longdouble_value); + } + memcpy(out_ptr, &out, elem_size); + + in_ptr += in_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +quad_generic_unary_op_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + + while (N--) { + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in_ptr); + } + else { + *(long double *)out_ptr = longdouble_op((long double *)in_ptr); + } + in_ptr += in_stride; + out_ptr += out_stride; + } + return 0; +} + +template +int +create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) +{ + PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); + if (ufunc == NULL) { + return -1; + } + + PyArray_DTypeMeta *dtypes[2] = {&QuadPrecDType, &QuadPrecDType}; + + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors}, + {NPY_METH_strided_loop, + (void *)&quad_generic_unary_op_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_unary_op_strided_loop_unaligned}, + {0, NULL}}; + + PyArrayMethod_Spec Spec = { + .name = "quad_unary_op", + .nin = 1, + .nout = 1, + .casting = NPY_NO_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec) < 0) { + return -1; + } + + return 0; +} + +int +init_quad_unary_ops(PyObject *numpy) +{ + if (create_quad_unary_ufunc(numpy, "negative") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "positive") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "absolute") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "rint") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "trunc") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "floor") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "ceil") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "sqrt") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "square") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log2") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log10") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "log1p") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "exp") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "exp2") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "sin") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "cos") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "tan") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arcsin") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arccos") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arctan") < 0) { + return -1; + } + return 0; +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath/unary_ops.h b/quaddtype/numpy_quaddtype/src/umath/unary_ops.h new file mode 100644 index 00000000..6c3f17a4 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/umath/unary_ops.h @@ -0,0 +1,9 @@ +#ifndef _QUADDTYPE_UNARY_OPS_H +#define _QUADDTYPE_UNARY_OPS_H + +#include + +int +init_quad_unary_ops(PyObject *numpy); + +#endif diff --git a/quaddtype/release_tracker.md b/quaddtype/release_tracker.md new file mode 100644 index 00000000..fbd3b205 --- /dev/null +++ b/quaddtype/release_tracker.md @@ -0,0 +1,93 @@ +# Plan for `numpy-quaddtype` v1.0.0 + +| ufunc name | Added | +| ------------- | ----- | +| add | ✅ | +| subtract | ✅ | +| multiply | ✅ | +| matmul | #116 | +| divide | ✅ | +| logaddexp | | +| logaddexp2 | | +| true_divide | | +| floor_divide | | +| negative | ✅ | +| positive | ✅ | +| power | ✅ | +| float_power | | +| remainder | | +| mod | ✅ | +| fmod | | +| divmod | | +| absolute | ✅ | +| fabs | | +| rint | ✅ | +| sign | | +| heaviside | | +| conj | | +| conjugate | | +| exp | ✅ | +| exp2 | ✅ | +| log | ✅ | +| log2 | ✅ | +| log10 | ✅ | +| expm1 | | +| log1p | ✅ | +| sqrt | ✅ | +| square | ✅ | +| cbrt | | +| reciprocal | | +| gcd | | +| lcm | | +| sin | ✅ | +| cos | ✅ | +| tan | ✅ | +| arcsin | ✅ | +| arccos | ✅ | +| arctan | ✅ | +| arctan2 | ✅ | +| hypot | | +| sinh | | +| cosh | | +| tanh | | +| arcsinh | | +| arccosh | | +| arctanh | | +| degrees | | +| radians | | +| deg2rad | | +| rad2deg | | +| bitwise_and | | +| bitwise_or | | +| bitwise_xor | | +| invert | | +| left_shift | | +| right_shift | | +| greater | ✅ | +| greater_equal | ✅ | +| less | ✅ | +| less_equal | ✅ | +| not_equal | ✅ | +| equal | ✅ | +| logical_and | | +| logical_or | | +| logical_xor | | +| logical_not | | +| maximum | ✅ | +| minimum | ✅ | +| fmax | | +| fmin | | +| isfinite | | +| isinf | | +| isnan | | +| isnat | | +| signbit | | +| copysign | | +| nextafter | | +| spacing | | +| modf | | +| ldexp | | +| frexp | | +| floor | ✅ | +| ceil | ✅ | +| trunc | ✅ | diff --git a/quaddtype/tests/test_dot.py b/quaddtype/tests/test_dot.py index ed135f47..9256f3d8 100644 --- a/quaddtype/tests/test_dot.py +++ b/quaddtype/tests/test_dot.py @@ -1,19 +1,6 @@ -""" -Focused test suite for the dot function in numpy_quaddtype - -This module tests the QuadBLAS dot function for: -- Vector-vector dot products -- Matrix-vector multiplication -- Matrix-matrix multiplication -- Small and large matrix operations -- Basic correctness validation - -Uses only the Sleef backend for simplicity. -""" - import pytest import numpy as np -from numpy_quaddtype import QuadPrecision, QuadPrecDType, dot +from numpy_quaddtype import QuadPrecision, QuadPrecDType # ================================================================================ @@ -81,14 +68,14 @@ def create_quad_array(values, shape=None): # ================================================================================ class TestVectorVectorDot: - """Test vector-vector dot products""" + """Test vector-vector np.matmul products""" def test_simple_dot_product(self): - """Test basic vector dot product""" + """Test basic vector np.matmul product""" x = create_quad_array([1, 2, 3]) y = create_quad_array([4, 5, 6]) - result = dot(x, y) + result = np.matmul(x, y) expected = 1*4 + 2*5 + 3*6 # = 32 assert isinstance(result, QuadPrecision) @@ -99,14 +86,14 @@ def test_orthogonal_vectors(self): x = create_quad_array([1, 0, 0]) y = create_quad_array([0, 1, 0]) - result = dot(x, y) + result = np.matmul(x, y) assert_quad_equal(result, 0.0) def test_same_vector(self): - """Test dot product of vector with itself""" + """Test np.matmul product of vector with itself""" x = create_quad_array([2, 3, 4]) - result = dot(x, x) + result = np.matmul(x, x) expected = 2*2 + 3*3 + 4*4 # = 29 assert_quad_equal(result, expected) @@ -121,7 +108,7 @@ def test_various_vector_sizes(self, size): x = create_quad_array(x_vals) y = create_quad_array(y_vals) - result = dot(x, y) + result = np.matmul(x, y) expected = sum(x_vals[i] * y_vals[i] for i in range(size)) assert_quad_equal(result, expected) @@ -131,7 +118,7 @@ def test_negative_and_fractional_values(self): x = create_quad_array([1.5, -2.5, 3.25]) y = create_quad_array([-1.25, 2.75, -3.5]) - result = dot(x, y) + result = np.matmul(x, y) expected = 1.5*(-1.25) + (-2.5)*2.75 + 3.25*(-3.5) assert_quad_equal(result, expected) @@ -151,7 +138,7 @@ def test_simple_matrix_vector(self): # 3x1 vector x = create_quad_array([1, 1, 1]) - result = dot(A, x) + result = np.matmul(A, x) expected = [1+2+3, 4+5+6] # [6, 15] assert result.shape == (2,) @@ -164,7 +151,7 @@ def test_identity_matrix_vector(self): I = create_quad_array([1, 0, 0, 0, 1, 0, 0, 0, 1], shape=(3, 3)) x = create_quad_array([2, 3, 4]) - result = dot(I, x) + result = np.matmul(I, x) assert result.shape == (3,) for i in range(3): @@ -181,7 +168,7 @@ def test_various_matrix_vector_sizes(self, m, n): x_vals = [i + 1 for i in range(n)] x = create_quad_array(x_vals) - result = dot(A, x) + result = np.matmul(A, x) assert result.shape == (m,) @@ -205,7 +192,7 @@ def test_simple_matrix_matrix(self): A = create_quad_array([1, 2, 3, 4], shape=(2, 2)) B = create_quad_array([5, 6, 7, 8], shape=(2, 2)) - result = dot(A, B) + result = np.matmul(A, B) # Expected: [[1*5+2*7, 1*6+2*8], [3*5+4*7, 3*6+4*8]] = [[19, 22], [43, 50]] expected = [[19, 22], [43, 50]] @@ -221,11 +208,11 @@ def test_identity_matrix_multiplication(self): I = create_quad_array([1, 0, 0, 1], shape=(2, 2)) # A * I should equal A - result1 = dot(A, I) + result1 = np.matmul(A, I) assert_quad_array_equal(result1, A) # I * A should equal A - result2 = dot(I, A) + result2 = np.matmul(I, A) assert_quad_array_equal(result2, A) @pytest.mark.parametrize("m,n,k", [(2,2,2), (2,3,4), (3,2,5), (4,4,4), (5,6,7)]) @@ -239,7 +226,7 @@ def test_various_matrix_sizes(self, m, n, k): B_vals = [(i*n + j + 1) for i in range(k) for j in range(n)] B = create_quad_array(B_vals, shape=(k, n)) - result = dot(A, B) + result = np.matmul(A, B) assert result.shape == (m, n) @@ -258,12 +245,12 @@ def test_associativity(self): C = create_quad_array([1, 1, 2, 1], shape=(2, 2)) # Compute (A*B)*C - AB = dot(A, B) - result1 = dot(AB, C) + AB = np.matmul(A, B) + result1 = np.matmul(AB, C) # Compute A*(B*C) - BC = dot(B, C) - result2 = dot(A, BC) + BC = np.matmul(B, C) + result2 = np.matmul(A, BC) assert_quad_array_equal(result1, result2, rtol=1e-25) @@ -285,7 +272,7 @@ def test_large_square_matrices(self, size): A = create_quad_array(A_vals, shape=(size, size)) B = create_quad_array(B_vals, shape=(size, size)) - result = dot(A, B) + result = np.matmul(A, B) assert result.shape == (size, size) @@ -303,7 +290,7 @@ def test_large_square_matrices(self, size): assert_quad_equal(result[size//2, size//2], expected_value, rtol=1e-15, atol=1e-15) def test_large_vector_operations(self): - """Test large vector dot products""" + """Test large vector np.matmul products""" size = 1000 # Create vectors with known sum @@ -313,7 +300,7 @@ def test_large_vector_operations(self): x = create_quad_array(x_vals) y = create_quad_array(y_vals) - result = dot(x, y) + result = np.matmul(x, y) expected = size * 1.0 * 2.0 # = 2000.0 assert_quad_equal(result, expected) @@ -329,7 +316,7 @@ def test_rectangular_large_matrices(self): A = create_quad_array(A_vals, shape=(m, k)) B = create_quad_array(B_vals, shape=(k, n)) - result = dot(A, B) + result = np.matmul(A, B) assert result.shape == (m, n) @@ -353,24 +340,24 @@ def test_dimension_mismatch_vectors(self): x = create_quad_array([1, 2]) y = create_quad_array([1, 2, 3]) - with pytest.raises(ValueError, match="same length"): - dot(x, y) + with pytest.raises(ValueError, match=r"matmul: Input operand 1 has a mismatch in its core dimension 0"): + np.matmul(x, y) def test_dimension_mismatch_matrix_vector(self): """Test dimension mismatch in matrix-vector""" A = create_quad_array([1, 2, 3, 4], shape=(2, 2)) x = create_quad_array([1, 2, 3]) # Wrong size - with pytest.raises(ValueError, match="columns must match"): - dot(A, x) + with pytest.raises(ValueError, match=r"matmul: Input operand 1 has a mismatch in its core dimension 0"): + np.matmul(A, x) def test_dimension_mismatch_matrices(self): """Test dimension mismatch in matrix-matrix""" A = create_quad_array([1, 2, 3, 4], shape=(2, 2)) B = create_quad_array([1, 2, 3, 4, 5, 6], shape=(3, 2)) # Wrong size - with pytest.raises(ValueError, match="Matrix inner dimensions must match"): - dot(A, B) + with pytest.raises(ValueError, match=r"matmul: Input operand 1 has a mismatch in its core dimension 0"): + np.matmul(A, B) if __name__ == "__main__":