diff --git a/pypardiso/pardiso_wrapper.py b/pypardiso/pardiso_wrapper.py index be9174b..a9529c1 100644 --- a/pypardiso/pardiso_wrapper.py +++ b/pypardiso/pardiso_wrapper.py @@ -142,6 +142,24 @@ def __init__(self, mtype=11, phase=13, size_limit_storage=5e7): self.size_limit_storage = size_limit_storage self._solve_transposed = False + self._init_iparm() # manually setup iparm + + def _init_iparm(self): + # https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-fortran/2025-2/pardiso-iparm-parameter.html + self.set_iparm(1 , 1 ) # manually setting + self.set_iparm(2 , 103) # OMP version of fill-in reduce, maximum level of optimization (L=10) + self.set_iparm(6 , 0 ) # set to 1 if overwrite_b + self.set_iparm(8 , 2 ) # default 2-steps refinement + self.set_iparm(10, 13 ) # set to 8 if symmetric + self.set_iparm(11, 1 ) # set to 0 if symmetric + self.set_iparm(12, 0 ) # Ax=b, no transpose + self.set_iparm(13, 1 ) # enable matching for nonsymmetric + self.set_iparm(18, 0 ) # disable nnz report + self.set_iparm(21, 1 ) # default pivoting + self.set_iparm(24, 0 ) # cannot use 10, idk why + self.set_iparm(35, 1 ) # zero-based indexing + self.set_iparm(37, 0 ) # CSR format + def factorize(self, A): """ Factorize the matrix A, the factorization will automatically be used if the same matrix A is passed to the @@ -163,7 +181,7 @@ def factorize(self, A): b = np.zeros((A.shape[0], 1)) self._call_pardiso(A, b) - def solve(self, A, b): + def solve(self, A, b, overwrite_b: bool=False): """ solve Ax=b for x @@ -178,14 +196,24 @@ def solve(self, A, b): """ self._check_A(A) - b = self._check_b(A, b) + b_fcontig = self._check_b(A, b) + + iparm6 = self.get_iparm(6) + if not(b_fcontig is b): + if overwrite_b: + warnings.warn('overwrite_b takes no effect: input b is not f_contiguous or the dtype is not compatible', + SparseEfficiencyWarning) + overwrite_b = True # already made a copy of b, safe to overwrite + + if overwrite_b: + self.set_iparm(6, 1) # overwrite if self._is_already_factorized(A): self.set_phase(33) else: self.set_phase(13) - x = self._call_pardiso(A, b) + x = self._call_pardiso(A, b_fcontig, overwrite_b=overwrite_b) # it is possible to call the solver with empty columns, but computationally expensive to check this # beforehand, therefore only the result is checked for infinite elements. @@ -193,6 +221,7 @@ def solve(self, A, b): # warnings.warn('The result contains infinite elements. Make sure that matrix A contains no empty columns.', # PyPardisoWarning) # --> this check doesn't work consistently, maybe add an advanced input check method for A + self.set_iparm(6, iparm6) # revert return x @@ -263,16 +292,21 @@ def _check_b(self, A, b): return b - def _call_pardiso(self, A, b): + def _call_pardiso(self, A, b, overwrite_b: bool=False): - x = np.zeros_like(b) + # PARDISO will fill x with 0 if iparm(6)=1 + x = np.empty_like(b) if overwrite_b else np.zeros_like(b) pardiso_error = ctypes.c_int32(0) c_int32_p = ctypes.POINTER(ctypes.c_int32) c_float64_p = ctypes.POINTER(ctypes.c_double) - # 1-based indexing - ia = A.indptr + 1 - ja = A.indices + 1 + ia = A.indptr + ja = A.indices + + if self.get_iparm(35) == 0: + # 1-based indexing + ia = ia + 1 + ja = ja + 1 self._mkl_pardiso(self.pt.ctypes.data_as(ctypes.POINTER(self._pt_type[0])), # pt ctypes.byref(ctypes.c_int32(1)), # maxfct @@ -294,7 +328,7 @@ def _call_pardiso(self, A, b): if pardiso_error.value != 0: raise PyPardisoError(pardiso_error.value) else: - return np.ascontiguousarray(x) # change memory-layout back from fortran to c order + return b if overwrite_b else x # no need to force c_contiguous return def get_iparms(self): """Returns a dictionary of iparms""" diff --git a/pypardiso/scipy_aliases.py b/pypardiso/scipy_aliases.py index 7271c39..d48bb8e 100644 --- a/pypardiso/scipy_aliases.py +++ b/pypardiso/scipy_aliases.py @@ -9,7 +9,7 @@ pypardiso_solver = PyPardisoSolver() -def spsolve(A, b, factorize=True, squeeze=True, solver=pypardiso_solver, *args, **kwargs): +def spsolve(A, b, factorize=True, squeeze=True, solver=pypardiso_solver, overwrite_b: bool=False, *args, **kwargs): """ This function mimics scipy.sparse.linalg.spsolve, but uses the Pardiso solver instead of SuperLU/UMFPACK @@ -45,7 +45,7 @@ def spsolve(A, b, factorize=True, squeeze=True, solver=pypardiso_solver, *args, if factorize and not solver._is_already_factorized(A): solver.factorize(A) - x = solver.solve(A, b) + x = solver.solve(A, b, overwrite_b=overwrite_b) if squeeze: return x.squeeze() # scipy spsolve always returns vectors with shape (n,) indstead of (n,1)