From 78cd7b11bf77cfa4eae62d64dd0f7f6f461c1e8c Mon Sep 17 00:00:00 2001 From: Aayush Khanna Date: Fri, 18 Jul 2025 03:53:55 +0000 Subject: [PATCH 1/4] feat: add lapack/base/dlaqr5 --- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed --- --- .../@stdlib/lapack/base/dlaqr5/lib/base.js | 745 ++++++++++++++++++ .../@stdlib/lapack/base/dlaqr5/lib/dlaqr1.js | 124 +++ .../@stdlib/lapack/base/dlaqr5/lib/dlarfg.js | 142 ++++ .../@stdlib/lapack/base/dlaqr5/lib/dtrmm.js | 361 +++++++++ 4 files changed, 1372 insertions(+) create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlaqr1.js create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlarfg.js create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dtrmm.js diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js new file mode 100644 index 000000000000..ea1a7d26f645 --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js @@ -0,0 +1,745 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2025 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +/* eslint-disable max-len, max-params, max-lines-per-function, max-statements, max-lines, max-depth */ + +'use strict'; + +// MODULES // + +var floor = require( '@stdlib/math/base/special/floor' ); +var abs = require( '@stdlib/math/base/special/abs' ); +var dlamch = require( '@stdlib/lapack/base/dlamch' ); +var dgemm = require( '@stdlib/blas/base/dgemm' ).ndarray; +var dlacpy = require( '@stdlib/lapack/base/dlacpy' ).ndarray; +var Float64Array = require( '@stdlib/array/float64' ); +var dlaset = require( '@stdlib/lapack/base/dlaset' ).ndarray; +var mod = require( '@stdlib/math/base/special/fmod' ); +var min = require( '@stdlib/math/base/special/fast/min' ); +var max = require( '@stdlib/math/base/special/fast/max' ); +var dtrmm = require( './dtrmm.js' ); +var dlarfg = require( './dlarfg.js' ); +var dlaqr1 = require( './dlaqr1.js' ); + + +// VARIABLES // + +var safmin = dlamch( 'safe minimum' ); +var ulp = dlamch( 'precision' ); + + +// FUNCTIONS // + +/** +* Shuffle shifts into pairs of real shifts and pairs of complex conjugate shifts, assuming that complex conjugate shifts are already adjacent to one another. +* +* @private +* @param {integer} nshifts - number of simultaneous shifts, must be even and positive +* @param {Float64Array} SR - real parts of the shifts of origin that define the QR sweep +* @param {integer} strideSR - stride length of `SR` +* @param {NonNegativeInteger} offsetSR - starting index for `SR` +* @param {Float64Array} SI - imaginary parts of the shifts of origin that define the QR sweep +* @param {integer} strideSI - stride length of `SI` +* @param {NonNegativeInteger} offsetSI - starting index of `SI` +* @returns {void} +*/ +function shuffleShifts( nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI ) { + var swap; + var isi; + var isr; + var i; + + isi = offsetSI; + isr = offsetSR; + for ( i = 0; i <= nshifts - 2; i += 2 ) { + if ( SI[ isi ] !== -SI[ isi + strideSI ] ) { + swap = SR[ isr ]; + SR[ isr ] = SR[ isr + strideSR ]; + SR[ isr + strideSR ] = SR[ isr + (2*strideSR) ]; + SR[ isr + (2*strideSR) ] = swap; + + swap = SI[ isi ]; + SI[ isi ] = SI[ isi + strideSI ]; + SI[ isi + strideSI ] = SI[ isi + (2*strideSI) ]; + SI[ isi + (2*strideSI) ] = swap; + } + isi += (2*strideSI); + isr += (2*strideSR); + } +} + + +// MAIN // + +/** +* Performs a single, small shift multiline QR sweep. +* +* @param {boolean} wantT - boolean value indicating whether the quasi triangular Schur factor is being computed +* @param {boolean} wantZ - boolean value indicating whether the orthogonal Schur factor is being computed +* @param {integer} kacc22 - integer value ranging from 0 to 2 (inclusive), specifies the computation mode for far-from-diagonal updates +* @param {integer} N - number of rows/columns in `H` +* @param {integer} KTOP - first row and column of the submatrix of `H` where the QR sweep will be applied +* @param {integer} KBOT - last row and column of the submatrix of `H` where the QR sweep will be applied +* @param {integer} nshifts - number of simultaneous shifts, must be even and positive +* @param {Float64Array} SR - real parts of the shifts of origin that define the QR sweep +* @param {integer} strideSR - stride length of `SR` +* @param {NonNegativeInteger} offsetSR - starting index for `SR` +* @param {Float64Array} SI - imaginary parts of the shifts of origin that define the QR sweep +* @param {integer} strideSI - stride length of `SI` +* @param {NonNegativeInteger} offsetSI - starting index of `SI` +* @param {Float64Array} H - input upper hessenberg matrix +* @param {integer} strideH1 - stride of the first dimension of `H` +* @param {integer} strideH2 - stride of the second dimension of `H` +* @param {NonNegativeInteger} offsetH - starting index of `H` +* @param {integer} iloZ - starting row from where the transformation must be applied if `wantZ` is true +* @param {integer} ihiZ - ending row from where the transformation must be applied if `wantZ` is true +* @param {Float64Array} Z - the QR sweep orthogonal similarity transformation is accumulated into `Z` between the rows and columns `iloZ` and `ihiZ` if `wantZ` is true, otherwise `Z` is not referenced +* @param {integer} strideZ1 - stride of the first dimension of `Z` +* @param {integer} strideZ2 - stride of the second dimension of `Z` +* @param {NonNegativeInteger} offsetZ - starting index of `Z` +* @param {Float64Array} V - householder vectors are stored column-wise, used in forming bulges for the multi shift QR algorithm +* @param {integer} strideV1 - stride of the first dimension of `V` +* @param {integer} strideV2 - stride of the second dimension of `V` +* @param {NonNegativeInteger} offsetV - starting index of `V` +* @param {Float64Array} U - used to hold the product of householder reflector that represent accumulated orthogonal transformations from the bulge-chasing process +* @param {integer} strideU1 - stride of the first dimension of `U` +* @param {integer} strideU2 - stride of the second dimension of `U` +* @param {NonNegativeInteger} offsetU - starting index of `U` +* @param {integer} NH - number of columns in `WH` available for workspace +* @param {Float64Array} WH - workspace array +* @param {integer} strideWH1 - stride of the first dimension of `WH` +* @param {integer} strideWH2 - stride of the second dimension of `WH` +* @param {NonNegativeInteger} offsetWH - starting index of `WH` +* @param {integer} NV - number of rows in `WV` available for workspace +* @param {Float64Array} WV - workspace array +* @param {integer} strideWV1 - stride of the first dimension of `WV` +* @param {integer} strideWV2 - stride of the second dimension of `WV` +* @param {NonNegativeInteger} offsetWV - starting index of `WV` +* @returns {void} +* +* var Float64Array = require( '@stdlib/array/float64' ); +* +* var H = new Float64Array( [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] ); +* var Z = new Float64Array( [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] ); +* var V = new Float64Array( 6 ); +* var U = new Float64Array( 10 ); +* var WH = new Float64Array( 16 ); +* var WV = new Float64Array( 16 ); +* var SR = new Float64Array( [ 1.1, 2.2 ] ); +* var SI = new Float64Array( [ 0.0, 0.0 ] ); +* +* dlaqr5( true, true, 0, 4, 1, 4, 2, SR, 1, 0, SI, 1, 0, H, 4, 1, 0, 1, 4, Z, 4, 1, 0, V, 2, 1, 0, U, 2, 1, 0, 4, WH, 4, 1, 0, 4, WV, 4, 1, 0 ); +* H => [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] +* Z => [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] +*/ +function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI, H, strideH1, strideH2, offsetH, iloZ, ihiZ, Z, strideZ1, strideZ2, offsetZ, V, strideV1, strideV2, offsetV, U, strideU1, strideU2, offsetU, NH, WH, strideWH1, strideWH2, offsetWH, NV, WV, strideWV1, strideWV2, offsetWV ) { + var dlarfgOut; + var block22; + var smlnum; + var refsum; + var mstart; + var incol; + var accum; + var nbmps; + var krcol; + var bmp22; + var alpha; + var start; + var ndcol; + var beta; + var jcol; + var jlen; + var jrow; + var jtop; + var jbot; + var mend; + var mtop; + var mbot; + var tst1; + var tst2; + var step; + var h11; + var h12; + var h21; + var h22; + var kdu; + var end; + var kms; + var knz; + var kzs; + var m22; + var scl; + var k1; + var nu; + var vt; + var i2; + var i4; + var j2; + var j4; + var ns; + var ih; + var k; + var m; + var j; + + dlarfgOut = new Float64Array( 2 ); // Workspace array to pass `alpha` to the `dlarfg` routine + vt = new Float64Array( 3 ); // local array + + // If there are no shifts, then there is nothing to do. + if ( nshifts < 2 ) { + return; + } + + // If the active block is empty or 1-by-1, then there is nothing to do. + if ( KTOP >= KBOT ) { + return; + } + + /* + * Shuffle shifts into pairs of real shifts and pairs of complex conjugate shifts, + * assuming that complex conjugate shifts are already adjacent to one another. + */ + shuffleShifts( nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI ); + + // `nshifts` is supposed to be even, but if it is odd, then simply reduce it by one. The shuffle above ensures that the dropped shift is real and that the remaining shifts are paired. + ns = nshifts - mod( nshifts, 2.0 ); + + // Machine constants for deflation + smlnum = safmin * N / ulp; + + // Use accumulated reflections to update far-from-diagonal entries? + accum = ( kacc22 === 1 ) || ( kacc22 === 2 ); + + // If so, exploit the 2-by-2 block structure? + block22 = ( ns > 2 ) && ( kacc22 === 2 ); + + // Clear trash + if ( KTOP + 2 <= KBOT ) { + ih = offsetH + ( (KTOP+2) * strideH1 ) + ( KTOP * strideH2 ); + H[ ih ] = 0.0; + } + + // `nbmps` = number of 2-shift bulges in the chain + nbmps = ns / 2; + + // KDU = width of slab + kdu = ( 6 * nbmps ) - 3; + + start = ( 3 * ( 1 - nbmps ) ) + KTOP - 1; + end = KBOT - 2; + step = ( 3 * nbmps ) - 2; + + // Create and chase chains of `nbmps` bulges + for ( incol = start; incol <= end; incol += step ) { + ndcol = incol + kdu; + if ( accum ) { + dlaset( 'all', kdu, kdu, 0.0, 1.0, U, strideU1, strideU2, offsetU ); + } + + /* + * Near-the-diagonal bulge chase. The following loop performs the + * near-the-diagonal part of a small bulge multi-shift QR sweep. Each + * `6*nbmps-2` column diagonal chunk extends from column `incol` to column + * `ndcol` (including both column `incol` and column `ndcol`). The following + * loop chases a 3*`nbmps` column long chain of `nbmps` bulges `3*nbmps-2` + * columns to the right. (`incol` may be less than `KTOP` and and `ndcol` + * may be greater than `KBOT` indicating phantom columns from which to + * chase bulges before they are actually introduced or to which to + * chase bulges beyond column `KBOT`.) + */ + for ( krcol = incol; krcol < min( incol + ( 3*nbmps ) - 3, KBOT - 2 ); krcol+= 1 ) { + /* + * Bulges number `mtop` to `mbot` are active double implicit shift bulges. + * There may or may not also be small 2-by-2 bulge, if there is room. + * The inactive bulges (if any) must wait until the active bulges + * have moved down the diagonal to make room. The phantom matrix + * paradigm described above helps keep track. + */ + mtop = max( 1, floor( ( KTOP - 1 - krcol + 2 ) / 3 ) + 1 ); + mbot = min( nbmps, floor( ( KBOT - krcol ) / 3 ) ); + m22 = mbot + 1; + bmp22 = ( mbot < nbmps ) && ( krcol + ( 3 * ( m22 - 1 ) ) === KBOT - 2 ); + + /* + * Generate reflections to chase the chain right one column. + * (The minimum value of K is KTOP-1.) + */ + for ( m = mtop; m <= mbot; m++ ) { + k = krcol + ( 3 * ( m - 1 ) ); + if ( k === KTOP - 1 ) { + dlaqr1( 3, H, strideH1, strideH2, offsetH + (KTOP*strideH1) + (KTOP*strideH2), SR[ offsetSR + (((2*m)-2)*strideSR) ], SI[ offsetSI + (((2*m)-2)*strideSI) ], SR[ offsetSR + (((2*m)-1)*strideSR) ], SI[ offsetSI + (((2*m)-1)*strideSI) ], vt, 1, 0 ); + + alpha = V[ offsetV + (m*strideV2) ]; + + // Prepare the `dlarfgOut` array to pass into the routine + dlarfgOut[ 0 ] = alpha; + dlarfgOut[ 1 ] = 0.0; + + // Call `dlarfg` using the `dlarfgOut` array to store outputs + dlarfg( 3, V, strideV1, offsetV + (m*strideV2), dlarfgOut, 1, 0 ); + + // Write the outputs to their expected positions + alpha = dlarfgOut[ 0 ]; + V[ offsetV + (m*strideV2) ] = dlarfgOut[ 1 ]; + } else { + beta = H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ]; + V[ offsetV + (m*strideV2) + strideV1 ] = H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ]; + V[ offsetV + (m*strideV2) + (2*strideV1) ] = H[ offsetH + ((k+3)*strideH1) + (k*strideH2) ]; + + // Prepare the `dlarfgOut` array to pass into the routine + dlarfgOut[ 0 ] = beta; + dlarfgOut[ 1 ] = 0.0; + + // Call `dlarfg` using the `dlarfgOut` array to store outputs + dlarfg( 3, V, strideV1, offsetV + (m*strideV2), dlarfgOut, 1, 0 ); + + // Write the outputs to their expected positions + beta = dlarfgOut[ 0 ]; + V[ offsetV + (m*strideV2) ] = dlarfgOut[ 1 ]; + + /* + * A Bulge may collapse because of vigilant deflation or + * destructive underflow. In the underflow case, try the + * two-small-subdiagonals trick to try to reinflate the bulge. + */ + if ( H[ offsetH + ((k+3)*strideH1) + (k*strideH2) ] !== 0.0 || H[ offsetH + ((k+3)*strideH1) + ((k+1)*strideH2) ] !== 0.0 || H[ offsetH + ((k+3)*strideH1) + ((k+2)*strideH2) ] === 0.0 ) { + // Typical case: not collapsed (yet). + H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] = beta; + H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] = 0.0; + H[ offsetH + ((k+3)*strideH1) + (k*strideH2) ] = 0.0; + } else { + /* + * Atypical case: collapsed. Attempt to reintroduce + * ignoring H(K+1,K) and H(K+2,K). If the fill + * resulting from the new reflector is too large, + * then abandon it. Otherwise, use the new one. + */ + dlaqr1( 3, H, strideH1, strideH2, offsetH + ((k+1)*strideH1) + ((k+1)*strideH2), SR, strideSR, offsetSR + (((2*m)-1)*strideSR), SI, strideSI, offsetSI + (((2*m)-1)*strideSI), SR, strideSR, offsetSR + (2*m*strideSR), SI, strideSI, offsetSI + (2*m*strideSI), vt ); + alpha = vt[ 0 ]; + + // Prepare the `dlarfgOut` array to pass into the routine + dlarfgOut[ 0 ] = alpha; + dlarfgOut[ 1 ] = 0.0; + + // Call `dlarfg` using the `dlarfgOut` array to store outputs + dlarfg( 3, vt, 1, 1, dlarfgOut, 1, 0 ); + + // Write the outputs to their expected positions + alpha = dlarfgOut[ 0 ]; + vt[ 0 ] = dlarfgOut[ 1 ]; + + refsum = vt[ 0 ] * ( H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] + ( vt[ 1 ] * H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] ) ); + + if ( abs( H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] - ( refsum * vt[ 1 ] ) ) + abs( refsum * vt[ 2 ] ) > ulp * ( abs( H[ offsetH + (k*strideH1) + (k*strideH2) ] ) + abs( H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ) + abs( H[ offsetH + ((k+2)*strideH1) + ((k+2)*strideH2) ] ) ) ) { + /* + * Starting a new bulge here would create + * non-negligible fill. Use the old one with + * trepidation. + */ + H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] = beta; + H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] = 0.0; + H[ offsetH + ((k+3)*strideH1) + (k*strideH2) ] = 0.0; + } else { + /* + * Stating a new bulge here would create only + * negligible fill. Replace the old reflector + * with the new one. + */ + H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] -= refsum; + H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] = 0.0; + H[ offsetH + ((k+3)*strideH1) + (k*strideH2) ] = 0.0; + V[ offsetV + (m*strideV2) ] = vt[ 0 ]; + V[ offsetV + (m*strideV2) + strideV1 ] = vt[ 1 ]; + V[ offsetV + (m*strideV2) + (2*strideV1) ] = vt[ 2 ]; + } + } + } + } + + // Generate a 2-by-2 reflection, if needed. + k = krcol + ( 3 * ( m22 - 1 ) ); + if ( bmp22 ) { + if ( k === KTOP - 1 ) { + dlaqr1( 2, H, strideH1, strideH2, offsetH + ((k+1)*strideH1) + ((k+1)*strideH2), SR[ offsetSR + (((2*m22)-2)*strideSR) ], SI[ offsetSI + (((2*m22)-2)*strideSI) ], SR[ offsetSR + (((2*m22)-1)*strideSR) ], SI[ offsetSI + (((2*m22)-1)*strideSI) ], vt, 1, 0 ); + beta = V[ offsetV + (m22*strideV2) ]; + + // Prepare the `dlarfgOut` array to pass into the routine + dlarfgOut[ 0 ] = beta; + dlarfgOut[ 1 ] = 0.0; + + // Call `dlarfg` using the `dlarfgOut` array to store outputs + dlarfg( 2, V, strideV1, offsetV + (m22*strideV2), dlarfgOut, 1, 0 ); + + // Write the outputs to their expected positions + beta = dlarfgOut[ 0 ]; + V[ offsetV + (m22*strideV2) ] = dlarfgOut[ 1 ]; + } else { + beta = H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ]; + V[ offsetV + (m22*strideV2) + strideV1 ] = H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ]; + + // Prepare the `dlarfgOut` array to pass into the routine + dlarfgOut[ 0 ] = beta; + dlarfgOut[ 1 ] = 0.0; + + // Call `dlarfg` using the `dlarfgOut` array to store outputs + dlarfg( 2, V, strideV1, offsetV + (m22*strideV2), dlarfgOut, 1, 0 ); + + // Write the outputs to their expected positions + beta = dlarfgOut[ 0 ]; + V[ offsetV + (m22*strideV2) ] = dlarfgOut[ 1 ]; + + H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] = beta; + H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] = 0.0; + } + } + + // Multiply H by reflections from the left + if ( accum ) { + jbot = min( ndcol, KBOT ); + } else if ( wantT ) { + jbot = N; + } else { + jbot = KBOT; + } + + for ( j = max( KTOP, krcol ); j <= jbot; j++ ) { + mend = min( mbot, floor( ( j - krcol + 2 ) / 3 ) ); + for ( m = mtop; m <= mend; m++ ) { + k = krcol + ( 3 * ( m - 1 ) ); + refsum = V[ offsetV + (m*strideV2) ] * ( H[ offsetH + ((k+1)*strideH1) + (j*strideH2) ] + ( V[ offsetV + (m*strideV2) + strideV1 ] * H[ offsetH + ((k+2)*strideH1) + (j*strideH2) ] ) + ( V[ offsetV + (m*strideV2) + (2*strideV1) ] * H[ offsetH + ((k+3)*strideH1) + (j*strideH2) ] ) ); + H[ offsetH + ((k+1)*strideH1) + (j*strideH2) ] -= refsum; + H[ offsetH + ((k+2)*strideH1) + (j*strideH2) ] -= refsum * V[ offsetV + (m*strideV2) + strideV1 ]; + H[ offsetH + ((k+3)*strideH1) + (j*strideH2) ] -= refsum * V[ offsetV + (m*strideV2) + (2*strideV1) ]; + } + } + + if ( bmp22 ) { + k = krcol + ( 3 * ( m22 - 1 ) ); + for ( j = max( k + 1, KTOP ); j <= jbot; j++ ) { + refsum = V[ offsetV + (m22*strideV2) ] * ( H[ offsetH + ((k+1)*strideH1) + (j*strideH2) ] + ( V[ offsetV + (m22*strideV2) + strideV1 ] * H[ offsetH + ((k+2)*strideH1) + (j*strideH2) ] ) ); + H[ offsetH + ((k+1)*strideH1) + (j*strideH2) ] -= refsum; + H[ offsetH + ((k+2)*strideH1) + (j*strideH2) ] -= refsum * V[ offsetV + (m22*strideV2) + strideV1 ]; + } + } + + /* + * Multiply H by reflections from the right. + * Delay filling in the last row until the + * vigilant deflation check is complete. + */ + if ( accum ) { + jtop = max( KTOP, incol ); + } else if ( wantT ) { + jtop = 1; + } else { + jtop = KTOP; + } + + for ( m = mtop; m <= mbot; m++ ) { + if ( V[ offsetV + (m*strideV2) ] !== 0.0 ) { + k = krcol + ( 3 * ( m - 1 ) ); + for ( j = jtop; j <= min( KBOT, k + 3 ); j++ ) { + refsum = V[ offsetV + (m*strideV2) ] * ( H[ offsetH + (j*strideH1) + ((k+1)*strideH2) ] + ( V[ offsetV + (m*strideV2) + strideV1 ] * H[ offsetH + (j*strideH1) + ((k+2)*strideH2) ] ) + ( V[ offsetV + (m*strideV2) + (2*strideV1) ] * H[ offsetH + (j*strideH1) + ((k+3)*strideH2) ] ) ); + H[ offsetH + (j*strideH1) + ((k+1)*strideH2) ] -= refsum; + H[ offsetH + (j*strideH1) + ((k+2)*strideH2) ] -= refsum * V[ offsetV + (m*strideV2) + strideV1 ]; + H[ offsetH + (j*strideH1) + ((k+3)*strideH2) ] -= refsum * V[ offsetV + (m*strideV2) + (2*strideV1) ]; + } + + if ( accum ) { + /* + * Accumulate U. (If necessary, update Z later + * with with an efficient matrix-matrix multiply.) + */ + kms = k - incol; + for ( j = max( 1, KTOP - incol ); j <= kdu; j++ ) { + refsum = V[ offsetV + (m*strideV2) ] * ( U[ offsetU + (j*strideU1) + ((kms+1)*strideU2) ] + ( V[ offsetV + (m*strideV2) + strideV1 ] * U[ offsetU + (j*strideU1) + ((kms+2)*strideU2) ] ) + ( V[ offsetV + (m*strideV2) + (2*strideV1) ] * U[ offsetU + (j*strideU1) + ((kms+3)*strideU2) ] ) ); + U[ offsetU + (j*strideU1) + ((kms+1)*strideU2) ] -= refsum; + U[ offsetU + (j*strideU1) + ((kms+2)*strideU2) ] -= refsum * V[ offsetV + (m*strideV2) + strideV1 ]; + U[ offsetU + (j*strideU1) + ((kms+3)*strideU2) ] -= refsum * V[ offsetV + (m*strideV2) + (2*strideV1) ]; + } + } else if ( wantZ ) { + /* + * U is not accumulated, so update Z now by + * multiplying by reflections from the right. + */ + for ( j = iloZ; j <= ihiZ; j++ ) { + refsum = V[ offsetV + (m*strideV2) ] * ( Z[ offsetZ + (j*strideZ1) + ((k+1)*strideZ2) ] + ( V[ offsetV + (m*strideV2) + strideV1 ] * Z[ offsetZ + (j*strideZ1) + ((k+2)*strideZ2) ] ) + ( V[ offsetV + (m*strideV2) + (2*strideV1) ] * Z[ offsetZ + (j*strideZ1) + ((k+3)*strideZ2) ] ) ); + Z[ offsetZ + (j*strideZ1) + ((k+1)*strideZ2) ] -= refsum; + Z[ offsetZ + (j*strideZ1) + ((k+2)*strideZ2) ] -= refsum * V[ offsetV + (m*strideV2) + strideV1 ]; + Z[ offsetZ + (j*strideZ1) + ((k+3)*strideZ2) ] -= refsum * V[ offsetV + (m*strideV2) + (2*strideV1) ]; + } + } + } + } + + // Special case: 2-by-2 reflection (if needed) + k = krcol + ( 3 * ( m22 - 1 ) ); + if ( bmp22 ) { + if ( V[ offsetV + (m22*strideV2) ] !== 0.0 ) { + for ( j = jtop; j <= min( KBOT, k + 3 ); j++ ) { + refsum = V[ offsetV + (m22*strideV2) ] * ( H[ offsetH + (j*strideH1) + ((k+1)*strideH2) ] + ( V[ offsetV + (m22*strideV2) + strideV1 ] * H[ offsetH + (j*strideH1) + ((k+2)*strideH2) ] ) ); + H[ offsetH + (j*strideH1) + ((k+1)*strideH2) ] -= refsum; + H[ offsetH + (j*strideH1) + ((k+2)*strideH2) ] -= refsum * V[ offsetV + (m22*strideV2) + strideV1 ]; + } + + if ( accum ) { + kms = k - incol; + for ( j = max( 1, KTOP - incol ); j <= kdu; j++ ) { + refsum = V[ offsetV + (m22*strideV2) ] * ( U[ offsetU + (j*strideU1) + ((kms+1)*strideU2) ] + ( V[ offsetV + (m22*strideV2) + strideV1 ] * U[ offsetU + (j*strideU1) + ((kms+2)*strideU2) ] ) ); + U[ offsetU + (j*strideU1) + ((kms+1)*strideU2) ] -= refsum; + U[ offsetU + (j*strideU1) + ((kms+2)*strideU2) ] -= refsum * V[ offsetV + (m22*strideV2) + strideV1 ]; + } + } else if ( wantZ ) { + for ( j = iloZ; j <= ihiZ; j++ ) { + refsum = V[ offsetV + (m22*strideV2) ] * ( Z[ offsetZ + (j*strideZ1) + ((k+1)*strideZ2) ] + ( V[ offsetV + (m22*strideV2) + strideV1 ] * Z[ offsetZ + (j*strideZ1) + ((k+2)*strideZ2) ] ) ); + Z[ offsetZ + (j*strideZ1) + ((k+1)*strideZ2) ] -= refsum; + Z[ offsetZ + (j*strideZ1) + ((k+2)*strideZ2) ] -= refsum * V[ offsetV + (m22*strideV2) + strideV1 ]; + } + } + } + } + + // Vigilant deflation check + mstart = mtop; + if ( krcol + ( 3 * ( mstart - 1 ) ) < KTOP ) { + mstart += 1; + } + mend = mbot; + if ( bmp22 ) { + mend += 1; + } + if ( krcol === KBOT - 2 ) { + mend += 1; + } + for ( m = mstart; m <= mend; m++ ) { + k = min( KBOT - 1, krcol + ( 3 * ( m - 1 ) ) ); + + /* + * The following convergence test requires that the tradition + * small-compared-to-nearby-diagonals criterion and the + * Ahues & Tisseur (LAWN 122, 1997) criteria both be satisfied. + * The latter improves accuracy in some examples. Falling + * back on an alternate convergence criterion when TST1 or + * TST2 is zero (as done here) is traditional but probably + * unnecessary. + */ + if ( H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] !== 0.0 ) { + tst1 = abs( H[ offsetH + (k*strideH1) + (k*strideH2) ] ) + abs( H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ); + if ( tst1 === 0.0 ) { + if ( k >= KTOP + 1 ) { + tst1 += abs( H[ offsetH + (k*strideH1) + ((k-1)*strideH2) ] ); + } + if ( k >= KTOP + 2 ) { + tst1 += abs( H[ offsetH + (k*strideH1) + ((k-2)*strideH2) ] ); + } + if ( k >= KTOP + 3 ) { + tst1 += abs( H[ offsetH + (k*strideH1) + ((k-3)*strideH2) ] ); + } + if ( k <= KBOT - 2 ) { + tst1 += abs( H[ offsetH + ((k+2)*strideH1) + ((k+1)*strideH2) ] ); + } + if ( k <= KBOT - 3 ) { + tst1 += abs( H[ offsetH + ((k+3)*strideH1) + ((k+1)*strideH2) ] ); + } + if ( k <= KBOT - 4 ) { + tst1 += abs( H[ offsetH + ((k+4)*strideH1) + ((k+1)*strideH2) ] ); + } + } + if ( abs( H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] ) <= max( smlnum, ulp * tst1 ) ) { + h12 = max( abs( H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] ), abs( H[ offsetH + (k*strideH1) + ((k+1)*strideH2) ] ) ); + h21 = min( abs( H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] ), abs( H[ offsetH + (k*strideH1) + ((k+1)*strideH2) ] ) ); + h11 = max( abs( H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ), abs( H[ offsetH + (k*strideH1) + (k*strideH2) ] - H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ) ); + h22 = min( abs( H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ), abs( H[ offsetH + (k*strideH1) + (k*strideH2) ] - H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ) ); + scl = h11 + h12; + tst2 = h22 * ( h11 / scl ); + if ( tst2 === 0.0 || h21 * ( h12 / scl ) <= max( smlnum, ulp * tst2 ) ) { + H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] = 0.0; + } + } + } + } + + // Fill in the last row of each bulge. + mend = min( nbmps, floor( ( KBOT - krcol - 1 ) / 3 ) ); + for ( m = mtop; m <= mend; m++ ) { + k = krcol + ( 3 * ( m - 1 ) ); + refsum = V[ offsetV + (m*strideV2) ] * V[ offsetV + (m*strideV2) + (2*strideV1) ] * H[ offsetH + ((k+4)*strideH1) + ((k+3)*strideH2) ]; + H[ offsetH + ((k+4)*strideH1) + ((k+1)*strideH2) ] = -refsum; + H[ offsetH + ((k+4)*strideH1) + ((k+2)*strideH2) ] = -refsum * V[ offsetV + (m*strideV2) + strideV1 ]; + H[ offsetH + ((k+4)*strideH1) + ((k+3)*strideH2) ] -= refsum * V[ offsetV + (m*strideV2) + (2*strideV1) ]; + } + } // End of near-the-diagonal bulge chase. + + // Use U (if accumulated) to update far-from-diagonal entries in H. If required, use U to update Z as well. + if ( accum ) { + if ( wantT ) { + jtop = 1; + jbot = N; + } else { + jtop = KTOP; + jbot = KBOT; + } + + /* + * Updates not exploiting the 2-by-2 block structure of U. K1 and NU + * keep track of the location and size of U in the special cases of + * introducing bulges and chasing bulges off the bottom. In these + * special cases and in case the number of shifts is NS = 2, there + * is no 2-by-2 block structure to exploit. + */ + if ( !block22 || incol < KTOP || ndcol > KBOT || ns <= 2 ) { + k1 = max( 1, KTOP - incol ); + nu = kdu - max( 0, ndcol - KBOT ) - k1 + 1; + + // Horizontal Multiply + for ( jcol = min( ndcol, KBOT ) + 1; jcol <= jbot; jcol += NH ) { + jlen = min( NH, jbot - jcol + 1 ); + dgemm( 'conjugate-transpose', 'no-transpose', nu, jlen, nu, 1.0, U, strideU1, strideU2, offsetU + (k1*strideU1) + (k1*strideU2), H, strideH1, strideH2, offsetH + ((incol+k1)*strideH1) + (jcol*strideH2), 0.0, WH, strideWH1, strideWH2, offsetWH ); + dlacpy( 'all', nu, jlen, WH, strideWH1, strideWH2, offsetWH, H, strideH1, strideH2, offsetH + ((incol+k1)*strideH1) + (jcol*strideH2) ); + } + + // Vertical multiply + for ( jrow = jtop; jrow <= max( KTOP, incol ) - 1; jrow += NV ) { + jlen = min( NV, max( KTOP, incol ) - jrow ); + dgemm( 'no-transpose', 'no-transpose', jlen, nu, nu, 1.0, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+k1)*strideH2), U, strideU1, strideU2, offsetU + (k1*strideU1) + (k1*strideU2), 0.0, WV, strideWV1, strideWV2, offsetWV ); + dlacpy( 'all', jlen, nu, WV, strideWV1, strideWV2, offsetWV, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+k1)*strideH2) ); + } + + // Z multiply (also vertical) + if ( wantZ ) { + for ( jrow = iloZ; jrow <= ihiZ; jrow += NV ) { + jlen = min( NV, ihiZ - jrow + 1 ); + dgemm( 'no-transpose', 'no-transpose', jlen, nu, nu, 1.0, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+k1)*strideZ2), U, strideU1, strideU2, offsetU + (k1*strideU1) + (k1*strideU2), 0.0, WV, strideWV1, strideWV2, offsetWV ); + dlacpy( 'all', jlen, nu, WV, strideWV1, strideWV2, offsetWV, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+k1)*strideZ2) ); + } + } + } else { + /* + * Updates exploiting U's 2-by-2 block structure. + * (I2, I4, J2, J4 are the last rows and columns of the blocks.) + */ + i2 = floor( ( kdu + 1 ) / 2 ); + i4 = kdu; + j2 = i4 - i2; + j4 = kdu; + + /* + * KZS and KNZ deal with the band of zeros along the diagonal + * of one of the triangular blocks. + */ + kzs = j4 - j2 - ( ns + 1 ); + knz = ns + 1; + + // Horizontal multiply + for ( jcol = min( ndcol, KBOT ) + 1; jcol <= jbot; jcol += NH ) { + jlen = min( NH, jbot - jcol + 1 ); + + /* + * Copy bottom of H to top+KZS of scratch + * (The first KZS rows get multiplied by zero.) + */ + dlacpy( 'all', knz, jlen, H, strideH1, strideH2, offsetH + ((incol+1+j2)*strideH1) + (jcol*strideH2), WH, strideWH1, strideWH2, offsetWH + ((kzs+1)*strideWH1) ); + + // Multiply by U21**T + dlaset( 'all', kzs, jlen, 0.0, 0.0, WH, strideWH1, strideWH2, offsetWH ); + dtrmm( 'left', 'upper', 'conjugate-transpose', 'non-unit', knz, jlen, 1.0, U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((1+kzs)*strideU2), WH, strideWH1, strideWH2, offsetWH + ((kzs+1)*strideWH1) ); + + // Multiply top of H by U11**T + dgemm( 'conjugate-transpose', 'no-transpose', i2, jlen, j2, 1.0, U, strideU1, strideU2, offsetU, H, strideH1, strideH2, offsetH + ((incol+1)*strideH1) + (jcol*strideH2), 1.0, WH, strideWH1, strideWH2, offsetWH ); + + // Copy top of H to bottom of WH + dlacpy( 'all', j2, jlen, H, strideH1, strideH2, offsetH + ((incol+1)*strideH1) + (jcol*strideH2), WH, strideWH1, strideWH2, offsetWH + ((i2+1)*strideWH1) ); + + // Multiply by U21**T + dtrmm( 'left', 'lower', 'conjugate-transpose', 'non-unit', j2, jlen, 1.0, U, strideU1, strideU2, offsetU + (1*strideU1) + ((i2+1)*strideU2), WH, strideWH1, strideWH2, offsetWH + ((i2+1)*strideWH1) ); + + // Multiply by U22 + dgemm( 'conjugate-transpose', 'no-transpose', i4 - i2, jlen, j4 - j2, 1.0, U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((i2+1)*strideU2), H, strideH1, strideH2, offsetH + ((incol+1+j2)*strideH1) + (jcol*strideH2), 1.0, WH, strideWH1, strideWH2, offsetWH + ((i2+1)*strideWH1) ); + + // Copy it back + dlacpy( 'all', kdu, jlen, WH, strideWH1, strideWH2, offsetWH, H, strideH1, strideH2, offsetH + ((incol+1)*strideH1) + (jcol*strideH2) ); + } + + // Vertical multiply + for ( jrow = jtop; jrow <= max( incol, KTOP ) - 1; jrow += NV ) { + jlen = min( NV, max( incol, KTOP ) - jrow ); + + /* + * Copy right of H to scratch (the first KZS columns get multiplied by zero) + */ + dlacpy( 'all', jlen, knz, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+1+j2)*strideH2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+kzs)*strideWV2) ); + + // Multiply by U21 + dlaset( 'all', jlen, kzs, 0.0, 0.0, WV, strideWV1, strideWV2, offsetWV ); + dtrmm( 'right', 'upper', 'no-transpose', 'non-unit', jlen, knz, 1.0, U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((1+kzs)*strideU2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+kzs)*strideWV2) ); + + // Multiply by U11 + dgemm( 'no-transpose', 'no-transpose', jlen, i2, j2, 1.0, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+1)*strideH2), U, strideU1, strideU2, offsetU, 1.0, WV, strideWV1, strideWV2, offsetWV ); + + // Copy left of H to right of scratch + dlacpy( 'all', jlen, j2, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+1)*strideH2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); + + // Multiply by U21 + dtrmm( 'right', 'lower', 'no-transpose', 'non-unit', jlen, i4 - i2, 1.0, U, strideU1, strideU2, offsetU + (1*strideU1) + ((i2+1)*strideU2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); + + // Multiply by U22 + dgemm( 'no-transpose', 'no-transpose', jlen, i4 - i2, j4 - j2, 1.0, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+1+j2)*strideH2), U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((i2+1)*strideU2), 1.0, WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); + + // Copy it back + dlacpy( 'all', jlen, kdu, WV, strideWV1, strideWV2, offsetWV, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+1)*strideH2) ); + } + + // Multiply Z (also vertical) + if ( wantZ ) { + for ( jrow = iloZ; jrow <= ihiZ; jrow += NV ) { + jlen = min( NV, ihiZ - jrow + 1 ); + + /* + * Copy right of Z to left of scratch (first KZS columns get multiplied by zero) + */ + dlacpy( 'all', jlen, knz, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+1+j2)*strideZ2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+kzs)*strideWV2) ); + + // Multiply by U12 + dlaset( 'all', jlen, kzs, 0.0, 0.0, WV, strideWV1, strideWV2, offsetWV ); + dtrmm( 'right', 'upper', 'no-transpose', 'non-unit', jlen, knz, 1.0, U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((1+kzs)*strideU2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+kzs)*strideWV2) ); + + // Multiply by U11 + dgemm( 'no-transpose', 'no-transpose', jlen, i2, j2, 1.0, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+1)*strideZ2), U, strideU1, strideU2, offsetU, 1.0, WV, strideWV1, strideWV2, offsetWV ); + + // Copy left of Z to right of scratch + dlacpy( 'all', jlen, j2, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+1)*strideZ2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); + + // Multiply by U21 + dtrmm( 'right', 'lower', 'no-transpose', 'non-unit', jlen, i4 - i2, 1.0, U, strideU1, strideU2, offsetU + (1*strideU1) + ((i2+1)*strideU2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); + + // Multiply by U22 + dgemm( 'no-transpose', 'no-transpose', jlen, i4 - i2, j4 - j2, 1.0, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+1+j2)*strideZ2), U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((i2+1)*strideU2), 1.0, WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); + + // Copy the result back to Z + dlacpy( 'all', jlen, kdu, WV, strideWV1, strideWV2, offsetWV, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+1)*strideZ2) ); + } + } + } + } + } +} + + +// EXPORTS // + +module.exports = dlaqr5; diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlaqr1.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlaqr1.js new file mode 100644 index 000000000000..bd695f5d94d5 --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlaqr1.js @@ -0,0 +1,124 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2025 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var abs = require( '@stdlib/math/base/special/fast/abs' ); + + +// MAIN // + +/** +* Given a 2-by-2 or a 3-by-3 matrix `H`, this function sets `V` to a scalar multiple of the first column of `K` where `K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I)`. +* +* ## Notes +* +* - It is expected that either `sr1 = sr2` and `si1 + si2 = 0` or `si1 = si2 = 0` (i.e., they represent complex conjugate values). +* - This is useful for starting double implicit shift bulges in the QR algorithm. +* - `V` should have at least `N` indexed elements. +* +* @private +* @param {PositiveInteger} N - number of row/columns in `H` +* @param {Float64Array} H - input matrix +* @param {integer} strideH1 - stride of the first dimension of `H` +* @param {integer} strideH2 - stride of the second dimension of `H` +* @param {NonNegativeInteger} offsetH - index offset for `H` +* @param {number} sr1 - real part of the first conjugate complex shift +* @param {number} si1 - imaginary part of the first conjugate complex shift +* @param {number} sr2 - real part of the second conjugate complex shift +* @param {number} si2 - imaginary part of the second conjugate complex shift +* @param {Float64Array} V - output array +* @param {integer} strideV - stride length for `V` +* @param {NonNegativeInteger} offsetV - index offset for `V` +* @returns {Float64Array} `V` +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* +* var H = new Float64Array( [ 1.0, 3.0, 2.0, 2.0, 4.0, 6.0, 0.0, 5.0, 7.0 ] ); // => [ [ 1.0, 3.0, 2.0 ], [ 2.0, 4.0, 6.0 ], [ 0.0, 5.0, 7.0 ] ] +* var V = new Float64Array( 3 ); +* +* var out = dlaqr1( 3, H, 3, 1, 0, 1.5, 0.0, 2.5, 0.0, V, 1, 0 ); +* // returns [ ~1.93, ~0.57, ~2.86 ] +*/ +function dlaqr1( N, H, strideH1, strideH2, offsetH, sr1, si1, sr2, si2, V, strideV, offsetV ) { // eslint-disable-line max-params, max-len + var h21s; + var h31s; + var h11; + var h12; + var h13; + var h21; + var h22; + var h23; + var h31; + var h32; + var h33; + var iv; + var s; + var i; + + h11 = offsetH; + h12 = offsetH + strideH2; + h21 = offsetH + strideH1; + h22 = h21 + strideH2; + + if ( N === 2 ) { + s = abs( H[ h11 ] - sr2 ) + abs( si2 ) + abs( H[ h21 ] ); + if ( s === 0.0 ) { + V[ offsetV ] = 0.0; + V[ strideV + offsetV ] = 0.0; + return V; + } + h21s = H[ h21 ] / s; + V[ offsetV ] = ( h21s * H[ h12 ] ) + ( ( H[ h11 ]-sr1 ) * ( ( H[ h11 ]-sr2 ) / s ) ) - ( si1*( si2 / s ) ); // eslint-disable-line max-len + V[ offsetV + strideV ] = h21s*( H[ h11 ]+H[ h22 ]-sr1-sr2 ); + return V; + } + + h13 = h12 + strideH2; + h31 = h21 + strideH1; + h33 = h22 + strideH1 + strideH2; + h23 = h22 + strideH2; + h32 = h22 + strideH1; + + s = abs( H[ h11 ]-sr2 ) + abs( si2 ) + abs( H[ h21 ] ) + abs( H[ h31 ] ); + if ( s === 0.0 ) { + iv = offsetV; + for ( i = 0; i < 3; i++ ) { + V[ iv ] = 0.0; + iv += strideV; + } + return V; + } + h21s = H[ h21 ] / s; + h31s = H[ h31 ] / s; + iv = offsetV; + V[ iv ] = (( H[ h11 ]-sr1 )*( ( H[ h11 ]-sr2 ) / s )) - (si1*( si2 / s )) + ((H[ h12 ]*h21s) + (H[ h13 ]*h31s)); // eslint-disable-line max-len + iv += strideV; + V[ iv ] = (h21s*( H[ h11 ]+H[ h22 ]-sr1-sr2 )) + (H[ h23 ]*h31s); + iv += strideV; + V[ iv ] = (h31s*( H[ h11 ]+H[ h33 ]-sr1-sr2 )) + (h21s*H[ h32 ]); + return V; +} + + +// EXPORTS // + +module.exports = dlaqr1; diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlarfg.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlarfg.js new file mode 100644 index 000000000000..da2c67ed7284 --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlarfg.js @@ -0,0 +1,142 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2025 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var dnrm2 = require( '@stdlib/blas/base/dnrm2' ).ndarray; +var sign = require( '@stdlib/math/base/special/copysign' ); +var dlamch = require( '@stdlib/lapack/base/dlamch' ); +var abs = require( '@stdlib/math/base/special/abs' ); +var dscal = require( '@stdlib/blas/base/dscal' ).ndarray; +var dlapy2 = require( '@stdlib/lapack/base/dlapy2' ); + + +// MAIN // + +/** +* Generates a real elementary reflector `H` of order `N` such that applying `H` to a vector `[alpha; X]` zeros out `X`. +* +* `H` is a Householder matrix with the form: +* +* ```tex +* H \cdot \begin{bmatrix} \alpha \\ x \end{bmatrix} = \begin{bmatrix} \beta \\ 0 \end{bmatrix}, \quad \text{and} \quad H^T H = I +* ``` +* +* where: +* +* - `tau` is a scalar +* - `X` is a vector of length `N-1` +* - `beta` is a scalar value +* - `H` is an orthogonal matrix known as a Householder reflector. +* +* The reflector `H` is constructed in the form: +* +* ```tex +* H = I - \tau \begin{bmatrix}1 \\ v \end{bmatrix} \begin{bmatrix}1 & v^T \end{bmatrix} +* ``` +* +* where: +* +* - `tau` is a real scalar +* - `V` is a real vector of length `N-1` that defines the Householder vector +* - The vector `[1; V]` is the Householder direction\ +* +* The values of `tau` and `V` are chosen so that applying `H` to the vector `[alpha; X]` results in a new vector `[beta; 0]`, i.e., only the first component remains nonzero. The reflector matrix `H` is symmetric and orthogonal, satisfying `H^T = H` and `H^T H = I` +* +* ## Special cases +* +* - If all elements of `X` are zero, then `tau = 0` and `H = I`, the identity matrix. +* - Otherwise, `tau` satisfies `1 ≤ tau ≤ 2`, ensuring numerical stability in transformations. +* +* ## Notes +* +* - `X` should have `N-1` indexed elements +* - The output array contains the following two elements: `alpha` and `tau` +* +* @private +* @param {NonNegativeInteger} N - number of rows/columns of the elementary reflector `H` +* @param {Float64Array} X - input vector +* @param {integer} strideX - stride length for `X` +* @param {NonNegativeInteger} offsetX - starting index of `X` +* @param {Float64Array} out - output array +* @param {integer} strideOut - stride length for `out` +* @param {NonNegativeInteger} offsetOut - starting index of `out` +* @returns {void} +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* +* var X = new Float64Array( [ 2.0, 3.0, 4.0 ] ); +* var out = new Float64Array( [ 4.0, 0.0 ] ); +* +* dlarfg( 4, X, 1, 0, out, 1, 0 ); +* // X => [ ~0.19, ~0.28, ~0.37 ] +* // out => [ ~-6.7, ~1.6 ] +*/ +function dlarfg( N, X, strideX, offsetX, out, strideOut, offsetOut ) { + var safemin; + var rsafmin; + var xnorm; + var alpha; + var beta; + var tau; + var knt; + var i; + + if ( N <= 1 ) { + out[ offsetOut + strideOut ] = 0.0; + return; + } + + xnorm = dnrm2( N - 1, X, strideX, offsetX ); + alpha = out[ offsetOut ]; + + if ( xnorm === 0.0 ) { + out[ strideOut + offsetOut ] = 0.0; + } else { + beta = -1.0 * sign( dlapy2( alpha, xnorm ), alpha ); + safemin = dlamch( 'safemin' ) / dlamch( 'epsilon' ); + knt = 0; + if ( abs( beta ) < safemin ) { + rsafmin = 1.0 / safemin; + while ( abs( beta ) < safemin && knt < 20 ) { + knt += 1; + dscal( N-1, rsafmin, X, strideX, offsetX ); + beta *= rsafmin; + alpha *= rsafmin; + } + xnorm = dnrm2( N - 1, X, strideX, offsetX ); + beta = -1.0 * sign( dlapy2( alpha, xnorm ), alpha ); + } + tau = ( beta - alpha ) / beta; + dscal( N-1, 1.0 / ( alpha - beta ), X, strideX, offsetX ); + for ( i = 0; i < knt; i++ ) { + beta *= safemin; + } + + out[ offsetOut ] = beta; + out[ strideOut + offsetOut ] = tau; + } +} + + +// EXPORTS // + +module.exports = dlarfg; diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dtrmm.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dtrmm.js new file mode 100644 index 000000000000..f844cfed69c2 --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dtrmm.js @@ -0,0 +1,361 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2025 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +/* eslint-disable max-len, max-statements, max-lines-per-function */ + +'use strict'; + +// MODULES // + +var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major' ); + + +// FUNCTIONS // + +/** +* Fills a matrix with zeros. +* +* @private +* @param {NonNegativeInteger} M - number of rows +* @param {NonNegativeInteger} N - number of columns +* @param {Float64Array} X - matrix to fill +* @param {integer} strideX1 - stride of the first dimension of `X` +* @param {integer} strideX2 - stride of the second dimension of `X` +* @param {NonNegativeInteger} offsetX - starting index for `X` +* @returns {Float64Array} input matrix +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* +* var X = new Float64Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ] ); +* +* zeros( 2, 3, X, 3, 1, 0 ); +* // X => [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* +* var X = new Float64Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ] ); +* +* zeros( 2, 3, X, 1, 2, 0 ); +* // X => [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] +*/ +function zeros( M, N, X, strideX1, strideX2, offsetX ) { // TODO: consider moving to a separate package + var dx0; + var dx1; + var S0; + var S1; + var i0; + var i1; + var ix; + + if ( isRowMajor( [ strideX1, strideX2 ] ) ) { + // For row-major matrices, the last dimension has the fastest changing index... + S0 = N; + S1 = M; + dx0 = strideX2; // offset increment for innermost loop + dx1 = strideX1 - ( S0*strideX2 ); // offset increment for outermost loop + } else { // column-major + // For column-major matrices, the first dimension has the fastest changing index... + S0 = M; + S1 = N; + dx0 = strideX1; // offset increment for innermost loop + dx1 = strideX2 - ( S0*strideX1 ); // offset increment for outermost loop + } + ix = offsetX; + for ( i1 = 0; i1 < S1; i1++ ) { + for ( i0 = 0; i0 < S0; i0++ ) { + X[ ix ] = 0.0; + ix += dx0; + } + ix += dx1; + } + return X; +} + + +// MAIN // + +/** +* Performs one of the matrix-matrix operations `B = α * op(A) * B` or `B = α * B * op(A)` where `α` is a scalar, `B` is an `M` by `N` matrix, `A` is a unit, or non-unit, upper or lower triangular matrix and `op( A )` is one of `op( A ) = A` or `op( A ) = A^T`. +* +* @private +* @param {string} side - specifies whether `op( A )` appears on the left or right side of `B` +* @param {string} uplo - specifies whether the upper or lower triangular part of the matrix `A` should be referenced +* @param {string} transa - specifies whether `A` should be transposed, conjugate-transposed, or not transposed +* @param {string} diag - specifies whether or not `A` is unit triangular +* @param {NonNegativeInteger} M - number of rows in `B` +* @param {NonNegativeInteger} N - number of columns in `B` +* @param {number} alpha - scalar constant +* @param {Float64Array} A - first input matrix +* @param {integer} strideA1 - stride of the first dimension of `A` +* @param {integer} strideA2 - stride of the second dimension of `A` +* @param {NonNegativeInteger} offsetA - starting index for `A` +* @param {Float64Array} B - second input matrix +* @param {integer} strideB1 - stride of the first dimension of `B` +* @param {integer} strideB2 - stride of the second dimension of `B` +* @param {NonNegativeInteger} offsetB - starting index for `B` +* @returns {Float64Array} `B` +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* +* var A = new Float64Array( [ 1.0, 0.0, 0.0, 2.0, 3.0, 0.0, 4.0, 5.0, 6.0 ] ); +* var B = new Float64Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 ] ); +* +* dtrmm( 'left', 'lower', 'no-transpose', 'unit', 3, 3, 1.0, A, 3, 1, 0, B, 3, 1, 0 ); +* // B => [ 1.0, 2.0, 3.0, 6.0, 9.0, 12.0, 31.0, 41.0, 51.0 ] +*/ +function dtrmm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, offsetA, B, strideB1, strideB2, offsetB ) { // eslint-disable-line max-params + var nonunit; + var isrma; + var tmp; + var oa2; + var ob2; + var sa0; + var sa1; + var sb0; + var sb1; + var oa; + var ob; + var ia; + var ib; + var i; + var j; + var k; + + // Note on variable naming convention: sa#, sb# where # corresponds to the loop number, with `0` being the innermost loop... + + isrma = isRowMajor( [ strideA1, strideA2 ] ); + nonunit = ( diag === 'non-unit' ); + + if ( M === 0 || N === 0 ) { + return B; + } + if ( isrma ) { + // For row-major matrices, the last dimension has the fastest changing index... + sa0 = strideA2; // stride for innermost loop + sa1 = strideA1; // stride for outermost loop + sb0 = strideB2; // stride for innermost loop + sb1 = strideB1; // stride for outermost loop + } else { + sa0 = strideA1; // stride for innermost loop + sa1 = strideA2; // stride for outermost loop + sb0 = strideB1; // stride for innermost loop + sb1 = strideB2; // stride for outermost loop + } + if ( alpha === 0.0 ) { + zeros( M, N, B, sb0, sb1, offsetB ); + return B; + } + + if ( + ( isrma && side === 'left' && uplo === 'upper' && transa === 'no-transpose' ) || + ( !isrma && side === 'right' && uplo === 'lower' && transa === 'no-transpose' ) + ) { + for ( j = 0; j < N; j++ ) { + ib = offsetB + ( j*sb0 ); + for ( k = 0; k < M; k++ ) { + ob2 = ib + ( k * sb1 ); + tmp = alpha * B[ ob2 ]; + ia = offsetA + ( k*sa0 ); + for ( i = 0; i < k; i++ ) { + B[ ib + ( i*sb1 ) ] += ( tmp * A[ ia + ( i*sa1 ) ] ); + } + if ( nonunit ) { + tmp *= A[ ia + ( k*sa1 ) ]; + } + B[ ob2 ] = tmp; + } + } + return B; + } + if ( + ( isrma && side === 'left' && uplo === 'lower' && transa === 'no-transpose' ) || + ( !isrma && side === 'right' && uplo === 'upper' && transa === 'no-transpose' ) + ) { + for ( j = 0; j < N; j++ ) { + ib = offsetB + ( j*sb0 ); + for ( k = M - 1; k >= 0; k-- ) { + ob2 = ib + ( k*sb1 ); + tmp = alpha * B[ ob2 ]; + ia = offsetA + ( k*sa0 ); + for ( i = k + 1; i < M; i++ ) { + oa2 = ia + ( i*sa1 ); + B[ ib + ( i*sb1 ) ] += ( tmp * A[ oa2 ] ); + } + if ( nonunit ) { + tmp *= A[ ia + ( k*sa1 ) ]; + } + B[ ob2 ] = tmp; + } + } + return B; + } + if ( + ( isrma && side === 'left' && uplo === 'upper' && transa !== 'no-transpose' ) || + ( !isrma && side === 'right' && uplo === 'lower' && transa !== 'no-transpose' ) + ) { + for ( j = 0; j < N; j++ ) { + ib = offsetB + ( j*sb0 ); + for ( i = M - 1; i >= 0; i-- ) { + ob2 = ib + ( i*sb1 ); + tmp = 0.0; + ia = offsetA + ( i*sa0 ); + if ( nonunit ) { + tmp += ( A[ ia + ( i*sa1 ) ] * B[ ob2 ] ); + } else { + tmp += B[ ob2 ]; + } + for ( k = 0; k < i; k++ ) { + oa2 = ia + ( k*sa1 ); + tmp += A[ oa2 ] * B[ ib + ( k*sb1 ) ]; + } + B[ ob2 ] = alpha * tmp; + } + } + return B; + } + if ( + ( isrma && side === 'left' && uplo === 'lower' && transa !== 'no-transpose' ) || + ( !isrma && side === 'right' && uplo === 'upper' && transa === 'transpose' ) + ) { + for ( j = 0; j < N; j++ ) { + ib = offsetB + ( j * sb0 ); + for ( i = 0; i < M; i++ ) { + ia = offsetA + ( i * sa0 ); + ob2 = ib + ( i * sb1 ); + tmp = 0.0; + for ( k = i + 1; k < M; k++ ) { + tmp += A[ ia + ( k * sa1 ) ] * B[ ib + ( k * sb1 ) ]; + } + if ( nonunit ) { + tmp += ( A[ ia + ( i * sa1 ) ] * B[ ob2 ] ); + } else { + tmp += B[ ob2 ]; + } + B[ ob2 ] = alpha * tmp; + } + } + return B; + } + if ( + ( isrma && side === 'right' && uplo === 'upper' && transa === 'no-transpose' ) || + ( !isrma && side === 'left' && uplo === 'lower' && transa === 'no-transpose' ) + ) { + for ( j = N - 1; j >= 0; j-- ) { + ia = offsetA + ( j*sa0 ); + ib = offsetB + ( j*sb0 ); + for ( i = 0; i < M; i++ ) { + ob = ib + ( i*sb1 ); + B[ ob ] *= alpha; + if ( nonunit ) { + oa2 = ia + ( j*sa1 ); + tmp = A[ oa2 ]; + B[ ob ] *= tmp; + } + for ( k = 0; k < j; k++ ) { + oa2 = ia + ( k*sa1 ); + ob2 = offsetB + ( k*sb0 ); + if ( A[ oa2 ] !== 0.0 ) { + tmp = alpha * A[ oa2 ]; + B[ ob ] += ( tmp * B[ ob2 + ( i*sb1 ) ] ); + } + } + } + } + return B; + } + if ( + ( isrma && side === 'right' && uplo === 'lower' && transa === 'no-transpose' ) || + ( !isrma && side === 'left' && uplo === 'upper' && transa === 'no-transpose' ) + ) { + for ( j = 0; j < N; j++ ) { + ia = offsetA + ( j*sa0 ); + for ( i = 0; i < M; i++ ) { + ib = offsetB + ( i*sb1 ); + ob = ib + ( j*sb0 ); + B[ ob ] *= alpha; + if ( nonunit ) { + oa = ia + ( j*sa1 ); + B[ ob ] *= A[ oa ]; + } + for ( k = j + 1; k < N; k++ ) { + oa2 = ia + ( k*sa1 ); + ob2 = ib + ( k*sb0 ); + if ( A[ oa2 ] !== 0.0 ) { + tmp = alpha * A[ oa2 ]; + B[ ob ] += ( tmp * B[ ob2 ] ); + } + } + } + } + return B; + } + if ( + ( isrma && side === 'right' && uplo === 'upper' && transa !== 'no-transpose' ) || + ( !isrma && side === 'left' && uplo === 'lower' && transa !== 'no-transpose' ) + ) { + for ( j = 0; j < N; j++ ) { + ia = offsetA + ( j*sa1 ); + for ( i = 0; i < M; i++ ) { + ib = offsetB + ( i*sb1 ); + oa = ia + ( j*sa0 ); + ob = ib + ( j*sb0 ); + if ( nonunit ) { + tmp = B[ ob ] * A[ oa ]; + } else { + tmp = B[ ob ]; + } + for ( k = j + 1; k < N; k++ ) { + oa2 = ia + ( k*sa0 ); + ob2 = ib + ( k*sb0 ); + tmp += ( B[ ob2 ] * A[ oa2 ] ); + } + B[ ob ] = alpha * tmp; + } + } + return B; + } + // ( isrma && side === 'right' && uplo === 'lower' && transa !== 'no-transpose' ) || ( !isrma && side === 'left' && uplo === 'upper' && transa !== 'no-transpose' ) + for ( i = 0; i < M; i++ ) { + ib = offsetB + ( i*sb1 ); + for ( j = N - 1; j >= 0; j-- ) { + ia = offsetA + ( j*sa1 ); + oa = ia + ( j*sa0 ); + ob = ib + ( j*sb0 ); + if ( nonunit ) { + tmp = B[ ob ] * A[ oa ]; + } else { + tmp = B[ ob ]; + } + for ( k = 0; k < j; k++ ) { + oa2 = ia + ( k*sa0 ); + ob2 = ib + ( k*sb0 ); + tmp += ( B[ ob2 ] * A[ oa2 ] ); + } + B[ ob ] = alpha * tmp; + } + } + return B; +} + + +// EXPORTS // + +module.exports = dtrmm; From 8171fe9cac140609dd924716d3e173f74d4ab431 Mon Sep 17 00:00:00 2001 From: Aayush Khanna Date: Fri, 18 Jul 2025 05:05:27 +0000 Subject: [PATCH 2/4] feat: add exports --- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed --- --- .../@stdlib/lapack/base/dlaqr5/lib/base.js | 6 +- .../@stdlib/lapack/base/dlaqr5/lib/dlaqr5.js | 132 ++++++++++++++++++ .../@stdlib/lapack/base/dlaqr5/lib/index.js | 65 +++++++++ .../@stdlib/lapack/base/dlaqr5/lib/main.js | 35 +++++ .../@stdlib/lapack/base/dlaqr5/lib/ndarray.js | 99 +++++++++++++ 5 files changed, 335 insertions(+), 2 deletions(-) create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlaqr5.js create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/index.js create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/main.js create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/ndarray.js diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js index ea1a7d26f645..3c6505658c34 100644 --- a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js @@ -89,6 +89,7 @@ function shuffleShifts( nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI /** * Performs a single, small shift multiline QR sweep. * +* @private * @param {boolean} wantT - boolean value indicating whether the quasi triangular Schur factor is being computed * @param {boolean} wantZ - boolean value indicating whether the orthogonal Schur factor is being computed * @param {integer} kacc22 - integer value ranging from 0 to 2 (inclusive), specifies the computation mode for far-from-diagonal updates @@ -132,6 +133,7 @@ function shuffleShifts( nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI * @param {NonNegativeInteger} offsetWV - starting index of `WV` * @returns {void} * +* @example * var Float64Array = require( '@stdlib/array/float64' ); * * var H = new Float64Array( [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] ); @@ -144,8 +146,8 @@ function shuffleShifts( nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI * var SI = new Float64Array( [ 0.0, 0.0 ] ); * * dlaqr5( true, true, 0, 4, 1, 4, 2, SR, 1, 0, SI, 1, 0, H, 4, 1, 0, 1, 4, Z, 4, 1, 0, V, 2, 1, 0, U, 2, 1, 0, 4, WH, 4, 1, 0, 4, WV, 4, 1, 0 ); -* H => [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] -* Z => [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] +* // H => [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] +* // Z => [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] */ function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI, H, strideH1, strideH2, offsetH, iloZ, ihiZ, Z, strideZ1, strideZ2, offsetZ, V, strideV1, strideV2, offsetV, U, strideU1, strideU2, offsetU, NH, WH, strideWH1, strideWH2, offsetWH, NV, WV, strideWV1, strideWV2, offsetWV ) { var dlarfgOut; diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlaqr5.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlaqr5.js new file mode 100644 index 000000000000..fede835942eb --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/dlaqr5.js @@ -0,0 +1,132 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2025 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +/* eslint-disable max-len, max-params */ + +'use strict'; + +// MODULES // + +var isLayout = require( '@stdlib/blas/base/assert/is-layout' ); +var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major-string' ); +var format = require( '@stdlib/string/format' ); +var base = require( './base.js' ); + + +// MAIN // + +/** +* Performs a single, small shift multiline QR sweep. +* +* @param {string} order - storage layout +* @param {boolean} wantT - boolean value indicating whether the quasi triangular Schur factor is being computed +* @param {boolean} wantZ - boolean value indicating whether the orthogonal Schur factor is being computed +* @param {integer} kacc22 - integer value ranging from 0 to 2 (inclusive), specifies the computation mode for far-from-diagonal updates +* @param {integer} N - number of rows/columns in `H` +* @param {integer} KTOP - first row and column of the submatrix of `H` where the QR sweep will be applied +* @param {integer} KBOT - last row and column of the submatrix of `H` where the QR sweep will be applied +* @param {integer} nshifts - number of simultaneous shifts, must be even and positive +* @param {Float64Array} SR - real parts of the shifts of origin that define the QR sweep +* @param {Float64Array} SI - imaginary parts of the shifts of origin that define the QR sweep +* @param {Float64Array} H - input upper hessenberg matrix +* @param {PositiveInteger} LDH - stride of the first dimension of `H` (a.k.a., leading dimension of the matrix `H`) +* @param {integer} iloZ - starting row from where the transformation must be applied if `wantZ` is true +* @param {integer} ihiZ - ending row from where the transformation must be applied if `wantZ` is true +* @param {Float64Array} Z - the QR sweep orthogonal similarity transformation is accumulated into `Z` between the rows and columns `iloZ` and `ihiZ` if `wantZ` is true, otherwise `Z` is not referenced +* @param {PositiveInteger} LDZ - stride of the first dimension of `Z` (a.k.a., leading dimension of the matrix `Z`) +* @param {Float64Array} V - householder vectors are stored column-wise, used in forming bulges for the multi shift QR algorithm +* @param {PositiveInteger} LDV - stride of the first dimension of `V` (a.k.a., leading dimension of the matrix `V`) +* @param {Float64Array} U - used to hold the product of householder reflector that represent accumulated orthogonal transformations from the bulge-chasing process +* @param {PositiveInteger} LDU - stride of the first dimension of `U` (a.k.a., leading dimension of the matrix `U`) +* @param {integer} NH - number of columns in `WH` available for workspace +* @param {Float64Array} WH - workspace array +* @param {PositiveInteger} LDWH - stride of the first dimension of `WH` (a.k.a., leading dimension of the matrix `WH`) +* @param {integer} NV - number of rows in `WV` available for workspace +* @param {Float64Array} WV - workspace array +* @param {PositiveInteger} LDWV - stride of the first dimension of `WV` (a.k.a., leading dimension of the matrix `WV`) +* @throws {TypeError} first argument must be a valid order +* @throws {RangeError} fourth argument must be greater than or equal to max(1,N) +* @returns {void} permuted matrix `A` +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* +* var H = new Float64Array( [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] ); +* var Z = new Float64Array( [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] ); +* var V = new Float64Array( 6 ); +* var U = new Float64Array( 10 ); +* var WH = new Float64Array( 16 ); +* var WV = new Float64Array( 16 ); +* var SR = new Float64Array( [ 1.1, 2.2 ] ); +* var SI = new Float64Array( [ 0.0, 0.0 ] ); +* +* dlaqr5( 'row-major', true, true, 0, 4, 1, 4, 2, SR, SI, H, 4, 1, 4, Z, 4, V, 2, U, 2, 4, WH, 4, 4, WV, 4 ); +* // H => [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] +* // Z => [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] +*/ +function dlaqr5( order, wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, SI, H, LDH, iloZ, ihiZ, Z, LDZ, V, LDV, U, LDU, NH, WH, LDWH, NV, WV, LDWV ) { + var swh1; + var swh2; + var swv1; + var swv2; + var sh1; + var sh2; + var sz1; + var sz2; + var sv1; + var sv2; + var su1; + var su2; + + if ( !isLayout( order ) ) { + throw new TypeError( format( 'invalid argument. First argument must be a valid order. Value: `%s`.', order ) ); + } + if ( isRowMajor( order ) ) { + sh1 = LDH; + sh2 = 1; + sz1 = LDZ; + sz2 = 1; + sv1 = LDV; + sv2 = 1; + su1 = LDU; + su2 = 1; + swh1 = LDWH; + swh2 = 1; + swv1 = LDWV; + swv2 = 1; + } else { // order === 'col-major' + sh1 = 1; + sh2 = LDH; + sz1 = 1; + sz2 = LDZ; + sv1 = 1; + sv2 = LDV; + su1 = 1; + su2 = LDU; + swh1 = 1; + swh2 = LDWH; + swv1 = 1; + swv2 = LDWV; + } + base( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, 1, 0, SI, 1, 0, H, sh1, sh2, 0, iloZ, ihiZ, Z, sz1, sz2, 0, V, sv1, sv2, 0, U, su1, su2, 0, NH, WH, swh1, swh2, 0, NV, WV, swv1, swv2, 0 ); +} + + +// EXPORTS // + +module.exports = dlaqr5; diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/index.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/index.js new file mode 100644 index 000000000000..38fa62da954d --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/index.js @@ -0,0 +1,65 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2025 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +/** +* LAPACK routine to perform a single, small shift multiline QR sweep. +* +* @module @stdlib/lapack/base/dlaqr5 +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* var dlaqr5 = require( '@stdlib/lapack/base/dlaqr5' ); +* +* var H = new Float64Array( [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] ); +* var Z = new Float64Array( [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] ); +* var V = new Float64Array( 6 ); +* var U = new Float64Array( 10 ); +* var WH = new Float64Array( 16 ); +* var WV = new Float64Array( 16 ); +* var SR = new Float64Array( [ 1.1, 2.2 ] ); +* var SI = new Float64Array( [ 0.0, 0.0 ] ); +* +* dlaqr5( 'row-major', true, true, 0, 4, 1, 4, 2, SR, SI, H, 4, 1, 4, Z, 4, V, 2, U, 2, 4, WH, 4, 4, WV, 4 ); +* // H => [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] +* // Z => [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] +*/ + +// MODULES // + +var join = require( 'path' ).join; +var tryRequire = require( '@stdlib/utils/try-require' ); +var isError = require( '@stdlib/assert/is-error' ); +var main = require( './main.js' ); + + +// MAIN // + +var dlaqr5; +var tmp = tryRequire( join( __dirname, './native.js' ) ); +if ( isError( tmp ) ) { + dlaqr5 = main; +} else { + dlaqr5 = tmp; +} + + +// EXPORTS // + +module.exports = dlaqr5; diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/main.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/main.js new file mode 100644 index 000000000000..db3e66b5059a --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/main.js @@ -0,0 +1,35 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2025 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var setReadOnly = require( '@stdlib/utils/define-nonenumerable-read-only-property' ); +var dlaqr5 = require( './dlaqr5.js' ); +var ndarray = require( './ndarray.js' ); + + +// MAIN // + +setReadOnly( dlaqr5, 'ndarray', ndarray ); + + +// EXPORTS // + +module.exports = dlaqr5; diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/ndarray.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/ndarray.js new file mode 100644 index 000000000000..e6c8e66d65d7 --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/ndarray.js @@ -0,0 +1,99 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2025 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +/* eslint-disable max-len, max-params */ + +'use strict'; + +// MODULES // + +var base = require( './base.js' ); + + +// MAIN // + +/** +* Performs a single, small shift multiline QR sweep using alternative indexing semantics. +* +* @param {boolean} wantT - boolean value indicating whether the quasi triangular Schur factor is being computed +* @param {boolean} wantZ - boolean value indicating whether the orthogonal Schur factor is being computed +* @param {integer} kacc22 - integer value ranging from 0 to 2 (inclusive), specifies the computation mode for far-from-diagonal updates +* @param {integer} N - number of rows/columns in `H` +* @param {integer} KTOP - first row and column of the submatrix of `H` where the QR sweep will be applied +* @param {integer} KBOT - last row and column of the submatrix of `H` where the QR sweep will be applied +* @param {integer} nshifts - number of simultaneous shifts, must be even and positive +* @param {Float64Array} SR - real parts of the shifts of origin that define the QR sweep +* @param {integer} strideSR - stride length of `SR` +* @param {NonNegativeInteger} offsetSR - starting index for `SR` +* @param {Float64Array} SI - imaginary parts of the shifts of origin that define the QR sweep +* @param {integer} strideSI - stride length of `SI` +* @param {NonNegativeInteger} offsetSI - starting index of `SI` +* @param {Float64Array} H - input upper hessenberg matrix +* @param {integer} strideH1 - stride of the first dimension of `H` +* @param {integer} strideH2 - stride of the second dimension of `H` +* @param {NonNegativeInteger} offsetH - starting index of `H` +* @param {integer} iloZ - starting row from where the transformation must be applied if `wantZ` is true +* @param {integer} ihiZ - ending row from where the transformation must be applied if `wantZ` is true +* @param {Float64Array} Z - the QR sweep orthogonal similarity transformation is accumulated into `Z` between the rows and columns `iloZ` and `ihiZ` if `wantZ` is true, otherwise `Z` is not referenced +* @param {integer} strideZ1 - stride of the first dimension of `Z` +* @param {integer} strideZ2 - stride of the second dimension of `Z` +* @param {NonNegativeInteger} offsetZ - starting index of `Z` +* @param {Float64Array} V - householder vectors are stored column-wise, used in forming bulges for the multi shift QR algorithm +* @param {integer} strideV1 - stride of the first dimension of `V` +* @param {integer} strideV2 - stride of the second dimension of `V` +* @param {NonNegativeInteger} offsetV - starting index of `V` +* @param {Float64Array} U - used to hold the product of householder reflector that represent accumulated orthogonal transformations from the bulge-chasing process +* @param {integer} strideU1 - stride of the first dimension of `U` +* @param {integer} strideU2 - stride of the second dimension of `U` +* @param {NonNegativeInteger} offsetU - starting index of `U` +* @param {integer} NH - number of columns in `WH` available for workspace +* @param {Float64Array} WH - workspace array +* @param {integer} strideWH1 - stride of the first dimension of `WH` +* @param {integer} strideWH2 - stride of the second dimension of `WH` +* @param {NonNegativeInteger} offsetWH - starting index of `WH` +* @param {integer} NV - number of rows in `WV` available for workspace +* @param {Float64Array} WV - workspace array +* @param {integer} strideWV1 - stride of the first dimension of `WV` +* @param {integer} strideWV2 - stride of the second dimension of `WV` +* @param {NonNegativeInteger} offsetWV - starting index of `WV` +* @returns {void} +* +* @example +* var Float64Array = require( '@stdlib/array/float64' ); +* +* var H = new Float64Array( [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] ); +* var Z = new Float64Array( [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] ); +* var V = new Float64Array( 6 ); +* var U = new Float64Array( 10 ); +* var WH = new Float64Array( 16 ); +* var WV = new Float64Array( 16 ); +* var SR = new Float64Array( [ 1.1, 2.2 ] ); +* var SI = new Float64Array( [ 0.0, 0.0 ] ); +* +* dlaqr5( true, true, 0, 4, 1, 4, 2, SR, 1, 0, SI, 1, 0, H, 4, 1, 0, 1, 4, Z, 4, 1, 0, V, 2, 1, 0, U, 2, 1, 0, 4, WH, 4, 1, 0, 4, WV, 4, 1, 0 ); +* // H => [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3, 2.0, 0.0, 0.0, 0.0, 4.0 ] +* // Z => [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ] +*/ +function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI, H, strideH1, strideH2, offsetH, iloZ, ihiZ, Z, strideZ1, strideZ2, offsetZ, V, strideV1, strideV2, offsetV, U, strideU1, strideU2, offsetU, NH, WH, strideWH1, strideWH2, offsetWH, NV, WV, strideWV1, strideWV2, offsetWV ) { + base( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI, H, strideH1, strideH2, offsetH, iloZ, ihiZ, Z, strideZ1, strideZ2, offsetZ, V, strideV1, strideV2, offsetV, U, strideU1, strideU2, offsetU, NH, WH, strideWH1, strideWH2, offsetWH, NV, WV, strideWV1, strideWV2, offsetWV ); +} + + +// EXPORTS // + +module.exports = dlaqr5; From 7fa54ea2f055c1eed29429bb8292f36d8c13cc6f Mon Sep 17 00:00:00 2001 From: Aayush Khanna Date: Fri, 18 Jul 2025 07:18:47 +0000 Subject: [PATCH 3/4] test: add initial tests --- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: na - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: passed - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed --- --- .../lapack/base/dlaqr5/test/fixtures/a.json | 113 ++++++++++++++++++ .../lapack/base/dlaqr5/test/test.dlaqr5.js | 79 ++++++++++++ .../@stdlib/lapack/base/dlaqr5/test/test.js | 82 +++++++++++++ 3 files changed, 274 insertions(+) create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/test/fixtures/a.json create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.dlaqr5.js create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.js diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/fixtures/a.json b/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/fixtures/a.json new file mode 100644 index 000000000000..b21eb4ee26f6 --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/fixtures/a.json @@ -0,0 +1,113 @@ +{ + "order": "row-major", + + "wantT": true, + "wantZ": true, + "kacc22": 0, + "N": 4, + "KTOP": 1, + "KBOT": 4, + "nshifts": 2, + + "SR": [ 1.1, 2.2 ], + "strideSR": 1, + "offsetSR": 0, + + "SI": [ 0.0, 0.0 ], + "strideSI": 1, + "offsetSI": 0, + + "H": [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 4.0 ], + "strideH1": 4, + "strideH2": 1, + "offsetH": 0, + "LDH": 4, + "H_mat": [ + [ 1.0, 1.0, 0.0, 0.0 ], + [ 0.0, 2.0, 1.5, 0.0 ], + [ 0.0, 0.0, 3.0, 2.0 ], + [ 0.0, 0.0, 0.0, 4.0 ] + ], + + "iloZ": 1, + "ihiZ": 4, + + "Z": [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ], + "strideZ1": 4, + "strideZ2": 1, + "offsetZ": 0, + "LDZ": 4, + "Z_mat": [ + [ 1.0, 0.0, 0.0, 0.0 ], + [ 0.0, 1.0, 0.0, 0.0 ], + [ 0.0, 0.0, 1.0, 0.0 ], + [ 0.0, 0.0, 0.0, 1.0 ] + ], + + "V": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + "strideV1": 2, + "strideV2": 1, + "offsetV": 0, + "LDV": 2, + "V_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + + "U": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + "strideU1": 2, + "strideU2": 1, + "offsetU": 0, + "LDU": 2, + "U_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + + "NH": 4, + + "WH": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + "strideWH1": 4, + "strideWH2": 1, + "offsetWH": 0, + "LDWH": 4, + "WH_mat": [ + [ 0.0, 0.0, 0.0, 0.0 ], + [ 0.0, 0.0, 0.0, 0.0 ], + [ 0.0, 0.0, 0.0, 0.0 ], + [ 0.0, 0.0, 0.0, 0.0 ] + ], + + "NV": 4, + "WV": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + "strideWV1": 4, + "strideWV2": 1, + "offsetWV": 0, + "LDWV": 4, + "WV_mat": [ + [ 0.0, 0.0, 0.0, 0.0 ], + [ 0.0, 0.0, 0.0, 0.0 ], + [ 0.0, 0.0, 0.0, 0.0 ], + [ 0.0, 0.0, 0.0, 0.0 ] + ], + + "H_out": [ 1.0, 1.0, 0.0, 0.0, 0.0, 2.0, 1.5, 0.0, 0.0, 0.0, 3.0, 2.0, 0.0, 0.0, 0.0, 4.0 ], + "H_out_mat": [ + [ 1.0, 1.0, 0.0, 0.0 ], + [ 0.0, 2.0, 1.5, 0.0 ], + [ 0.0, 0.0, 3.0, 2.0 ], + [ 0.0, 0.0, 0.0, 4.0 ] + ], + + "Z_out": [ 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0 ], + "Z_out_mat": [ + [ 1.0, 0.0, 0.0, 0.0 ], + [ 0.0, 1.0, 0.0, 0.0 ], + [ 0.0, 0.0, 1.0, 0.0 ], + [ 0.0, 0.0, 0.0, 1.0 ] + ] +} diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.dlaqr5.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.dlaqr5.js new file mode 100644 index 000000000000..6ae40e5e8bac --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.dlaqr5.js @@ -0,0 +1,79 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2025 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +/* eslint-disable max-len */ + +'use strict'; + +// MODULES // + +var tape = require( 'tape' ); +var Float64Array = require( '@stdlib/array/float64' ); +var dlaqr5 = require( './../lib/dlaqr5.js' ); + + +// FIXTURES // + +var A = require( './fixtures/a.json' ); + + +// TESTS // + +tape( 'main export is a function', function test( t ) { + t.ok( true, __filename ); + t.strictEqual( typeof dlaqr5, 'function', 'main export is a function' ); + t.end(); +}); + +tape( 'the function has an arity of 26', function test( t ) { + t.strictEqual( dlaqr5.length, 26, 'returns expected value' ); + t.end(); +}); + +tape( 'the function returns expected values', function test( t ) { + var expectedH; + var expectedZ; + var data; + var WH; + var WV; + var SR; + var SI; + var H; + var Z; + var U; + var V; + + data = A; + + WH = new Float64Array( data.WH ); + WV = new Float64Array( data.WV ); + H = new Float64Array( data.H ); + Z = new Float64Array( data.Z ); + U = new Float64Array( data.U ); + V = new Float64Array( data.V ); + SR = new Float64Array( data.SR ); + SI = new Float64Array( data.SI ); + expectedH = new Float64Array( data.H_out ); + expectedZ = new Float64Array( data.Z_out ); + + dlaqr5( data.order, data.wantT, data.wantZ, data.kacc22, data.N, data.KTOP, data.KBOT, data.nshifts, SR, SI, H, data.LDH, data.iloZ, data.ihiZ, Z, data.LDZ, V, data.LDV, U, data.LDU, data.NH, WH, data.LDWH, data.NV, WV, data.LDWV ); + + t.deepEqual( H, expectedH, 'returns expected value' ); + t.deepEqual( Z, expectedZ, 'returns expected value' ); + t.end(); +}); diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.js new file mode 100644 index 000000000000..25038ec436fe --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.js @@ -0,0 +1,82 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2025 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var tape = require( 'tape' ); +var proxyquire = require( 'proxyquire' ); +var IS_BROWSER = require( '@stdlib/assert/is-browser' ); +var dlaqr5 = require( './../lib' ); + + +// VARIABLES // + +var opts = { + 'skip': IS_BROWSER +}; + + +// TESTS // + +tape( 'main export is a function', function test( t ) { + t.ok( true, __filename ); + t.strictEqual( typeof dlaqr5, 'function', 'main export is a function' ); + t.end(); +}); + +tape( 'attached to the main export is a method providing an ndarray interface', function test( t ) { + t.strictEqual( typeof dlaqr5.ndarray, 'function', 'method is a function' ); + t.end(); +}); + +tape( 'if a native implementation is available, the main export is the native implementation', opts, function test( t ) { + var dlaqr5 = proxyquire( './../lib', { + '@stdlib/utils/try-require': tryRequire + }); + + t.strictEqual( dlaqr5, mock, 'returns expected value' ); + t.end(); + + function tryRequire() { + return mock; + } + + function mock() { + // Mock... + } +}); + +tape( 'if a native implementation is not available, the main export is a JavaScript implementation', opts, function test( t ) { + var dlaqr5; + var main; + + main = require( './../lib/dlaqr5.js' ); + + dlaqr5 = proxyquire( './../lib', { + '@stdlib/utils/try-require': tryRequire + }); + + t.strictEqual( dlaqr5, main, 'returns expected value' ); + t.end(); + + function tryRequire() { + return new Error( 'Cannot find module' ); + } +}); From dd9d7848de477632f1a0b29502625762bf2ead13 Mon Sep 17 00:00:00 2001 From: Aayush Khanna Date: Mon, 21 Jul 2025 00:36:27 +0530 Subject: [PATCH 4/4] chore: working through stuff --- .../@stdlib/lapack/base/dlaqr5/lib/base.js | 520 ++---------------- .../lapack/base/dlaqr5/test/fixtures/b.json | 171 ++++++ .../lapack/base/dlaqr5/test/test.dlaqr5.js | 3 +- 3 files changed, 224 insertions(+), 470 deletions(-) create mode 100644 lib/node_modules/@stdlib/lapack/base/dlaqr5/test/fixtures/b.json diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js index 3c6505658c34..f1e765cf23d3 100644 --- a/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/lib/base.js @@ -37,12 +37,6 @@ var dlarfg = require( './dlarfg.js' ); var dlaqr1 = require( './dlaqr1.js' ); -// VARIABLES // - -var safmin = dlamch( 'safe minimum' ); -var ulp = dlamch( 'precision' ); - - // FUNCTIONS // /** @@ -94,8 +88,8 @@ function shuffleShifts( nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI * @param {boolean} wantZ - boolean value indicating whether the orthogonal Schur factor is being computed * @param {integer} kacc22 - integer value ranging from 0 to 2 (inclusive), specifies the computation mode for far-from-diagonal updates * @param {integer} N - number of rows/columns in `H` -* @param {integer} KTOP - first row and column of the submatrix of `H` where the QR sweep will be applied -* @param {integer} KBOT - last row and column of the submatrix of `H` where the QR sweep will be applied +* @param {integer} KTOP - first row and column of the submatrix of `H` where the QR sweep will be applied, should be zero based +* @param {integer} KBOT - last row and column of the submatrix of `H` where the QR sweep will be applied, should be zero based * @param {integer} nshifts - number of simultaneous shifts, must be even and positive * @param {Float64Array} SR - real parts of the shifts of origin that define the QR sweep * @param {integer} strideSR - stride length of `SR` @@ -107,8 +101,8 @@ function shuffleShifts( nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI * @param {integer} strideH1 - stride of the first dimension of `H` * @param {integer} strideH2 - stride of the second dimension of `H` * @param {NonNegativeInteger} offsetH - starting index of `H` -* @param {integer} iloZ - starting row from where the transformation must be applied if `wantZ` is true -* @param {integer} ihiZ - ending row from where the transformation must be applied if `wantZ` is true +* @param {integer} iloZ - starting row from where the transformation must be applied if `wantZ` is true, should be zero based +* @param {integer} ihiZ - ending row from where the transformation must be applied if `wantZ` is true, should be zero based * @param {Float64Array} Z - the QR sweep orthogonal similarity transformation is accumulated into `Z` between the rows and columns `iloZ` and `ihiZ` if `wantZ` is true, otherwise `Z` is not referenced * @param {integer} strideZ1 - stride of the first dimension of `Z` * @param {integer} strideZ2 - stride of the second dimension of `Z` @@ -152,6 +146,7 @@ function shuffleShifts( nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI, H, strideH1, strideH2, offsetH, iloZ, ihiZ, Z, strideZ1, strideZ2, offsetZ, V, strideV1, strideV2, offsetV, U, strideU1, strideU2, offsetU, NH, WH, strideWH1, strideWH2, offsetWH, NV, WV, strideWV1, strideWV2, offsetWV ) { var dlarfgOut; var block22; + var safmin; var smlnum; var refsum; var mstart; @@ -161,7 +156,6 @@ function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, off var krcol; var bmp22; var alpha; - var start; var ndcol; var beta; var jcol; @@ -174,17 +168,19 @@ function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, off var mbot; var tst1; var tst2; - var step; + var swap; + var ulp; var h11; var h12; var h21; var h22; var kdu; - var end; var kms; var knz; var kzs; var m22; + var isi; + var isr; var scl; var k1; var nu; @@ -198,6 +194,7 @@ function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, off var k; var m; var j; + var i; dlarfgOut = new Float64Array( 2 ); // Workspace array to pass `alpha` to the `dlarfg` routine vt = new Float64Array( 3 ); // local array @@ -216,20 +213,33 @@ function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, off * Shuffle shifts into pairs of real shifts and pairs of complex conjugate shifts, * assuming that complex conjugate shifts are already adjacent to one another. */ - shuffleShifts( nshifts, SR, strideSR, offsetSR, SI, strideSI, offsetSI ); + for ( i = 0; i <= nshifts - 2; i += 2 ) { + if ( SI[ isi ] !== -SI[ isi + strideSI ] ) { + swap = SR[ isr ]; + SR[ isr ] = SR[ isr + strideSR ]; + SR[ isr + strideSR ] = SR[ isr + (2*strideSR) ]; + SR[ isr + (2*strideSR) ] = swap; + + swap = SI[ isi ]; + SI[ isi ] = SI[ isi + strideSI ]; + SI[ isi + strideSI ] = SI[ isi + (2*strideSI) ]; + SI[ isi + (2*strideSI) ] = swap; + } + isi += (2*strideSI); + isr += (2*strideSR); + } // `nshifts` is supposed to be even, but if it is odd, then simply reduce it by one. The shuffle above ensures that the dropped shift is real and that the remaining shifts are paired. - ns = nshifts - mod( nshifts, 2.0 ); + ns = nshifts - mod( nshifts, 2 ); // Machine constants for deflation + safmin = dlamch( 'safe minimum' ); + ulp = dlamch( 'precision' ); smlnum = safmin * N / ulp; // Use accumulated reflections to update far-from-diagonal entries? accum = ( kacc22 === 1 ) || ( kacc22 === 2 ); - // If so, exploit the 2-by-2 block structure? - block22 = ( ns > 2 ) && ( kacc22 === 2 ); - // Clear trash if ( KTOP + 2 <= KBOT ) { ih = offsetH + ( (KTOP+2) * strideH1 ) + ( KTOP * strideH2 ); @@ -240,14 +250,18 @@ function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, off nbmps = ns / 2; // KDU = width of slab - kdu = ( 6 * nbmps ) - 3; - - start = ( 3 * ( 1 - nbmps ) ) + KTOP - 1; - end = KBOT - 2; - step = ( 3 * nbmps ) - 2; + kdu = 4 * nbmps; // Create and chase chains of `nbmps` bulges - for ( incol = start; incol <= end; incol += step ) { + for ( incol = KTOP - ( 2*nbmps ) + 1; incol <= KBOT - 2; incol += 2 * nbmps ) { + if ( accum ) { + jtop = max( KTOP, incol ); + } else if ( wantT ) { + jtop = 0; + } else { + jtop = KTOP; + } + ndcol = incol + kdu; if ( accum ) { dlaset( 'all', kdu, kdu, 0.0, 1.0, U, strideU1, strideU2, offsetU ); @@ -264,7 +278,7 @@ function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, off * chase bulges before they are actually introduced or to which to * chase bulges beyond column `KBOT`.) */ - for ( krcol = incol; krcol < min( incol + ( 3*nbmps ) - 3, KBOT - 2 ); krcol+= 1 ) { + for ( krcol = incol; krcol <= min( incol + ( 2*nbmps ) - 1, KBOT - 2 ); krcol += 1 ) { /* * Bulges number `mtop` to `mbot` are active double implicit shift bulges. * There may or may not also be small 2-by-2 bulge, if there is room. @@ -272,469 +286,37 @@ function dlaqr5( wantT, wantZ, kacc22, N, KTOP, KBOT, nshifts, SR, strideSR, off * have moved down the diagonal to make room. The phantom matrix * paradigm described above helps keep track. */ - mtop = max( 1, floor( ( KTOP - 1 - krcol + 2 ) / 3 ) + 1 ); - mbot = min( nbmps, floor( ( KBOT - krcol ) / 3 ) ); + mtop = max( 0, floor( ( KTOP - krcol ) / 2 ) ); + mbot = min( nbmps, floor( ( KBOT - krcol - 1 ) / 2 ) ); m22 = mbot + 1; - bmp22 = ( mbot < nbmps ) && ( krcol + ( 3 * ( m22 - 1 ) ) === KBOT - 2 ); + bmp22 = ( mbot < nbmps ) && ( krcol + ( 2 * m22 ) === KBOT - 2 ); /* * Generate reflections to chase the chain right one column. * (The minimum value of K is KTOP-1.) */ - for ( m = mtop; m <= mbot; m++ ) { - k = krcol + ( 3 * ( m - 1 ) ); - if ( k === KTOP - 1 ) { - dlaqr1( 3, H, strideH1, strideH2, offsetH + (KTOP*strideH1) + (KTOP*strideH2), SR[ offsetSR + (((2*m)-2)*strideSR) ], SI[ offsetSI + (((2*m)-2)*strideSI) ], SR[ offsetSR + (((2*m)-1)*strideSR) ], SI[ offsetSI + (((2*m)-1)*strideSI) ], vt, 1, 0 ); - - alpha = V[ offsetV + (m*strideV2) ]; - - // Prepare the `dlarfgOut` array to pass into the routine - dlarfgOut[ 0 ] = alpha; - dlarfgOut[ 1 ] = 0.0; - - // Call `dlarfg` using the `dlarfgOut` array to store outputs - dlarfg( 3, V, strideV1, offsetV + (m*strideV2), dlarfgOut, 1, 0 ); - - // Write the outputs to their expected positions - alpha = dlarfgOut[ 0 ]; - V[ offsetV + (m*strideV2) ] = dlarfgOut[ 1 ]; - } else { - beta = H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ]; - V[ offsetV + (m*strideV2) + strideV1 ] = H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ]; - V[ offsetV + (m*strideV2) + (2*strideV1) ] = H[ offsetH + ((k+3)*strideH1) + (k*strideH2) ]; - - // Prepare the `dlarfgOut` array to pass into the routine - dlarfgOut[ 0 ] = beta; - dlarfgOut[ 1 ] = 0.0; - - // Call `dlarfg` using the `dlarfgOut` array to store outputs - dlarfg( 3, V, strideV1, offsetV + (m*strideV2), dlarfgOut, 1, 0 ); - - // Write the outputs to their expected positions - beta = dlarfgOut[ 0 ]; - V[ offsetV + (m*strideV2) ] = dlarfgOut[ 1 ]; - - /* - * A Bulge may collapse because of vigilant deflation or - * destructive underflow. In the underflow case, try the - * two-small-subdiagonals trick to try to reinflate the bulge. - */ - if ( H[ offsetH + ((k+3)*strideH1) + (k*strideH2) ] !== 0.0 || H[ offsetH + ((k+3)*strideH1) + ((k+1)*strideH2) ] !== 0.0 || H[ offsetH + ((k+3)*strideH1) + ((k+2)*strideH2) ] === 0.0 ) { - // Typical case: not collapsed (yet). - H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] = beta; - H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] = 0.0; - H[ offsetH + ((k+3)*strideH1) + (k*strideH2) ] = 0.0; - } else { - /* - * Atypical case: collapsed. Attempt to reintroduce - * ignoring H(K+1,K) and H(K+2,K). If the fill - * resulting from the new reflector is too large, - * then abandon it. Otherwise, use the new one. - */ - dlaqr1( 3, H, strideH1, strideH2, offsetH + ((k+1)*strideH1) + ((k+1)*strideH2), SR, strideSR, offsetSR + (((2*m)-1)*strideSR), SI, strideSI, offsetSI + (((2*m)-1)*strideSI), SR, strideSR, offsetSR + (2*m*strideSR), SI, strideSI, offsetSI + (2*m*strideSI), vt ); - alpha = vt[ 0 ]; - - // Prepare the `dlarfgOut` array to pass into the routine - dlarfgOut[ 0 ] = alpha; - dlarfgOut[ 1 ] = 0.0; - - // Call `dlarfg` using the `dlarfgOut` array to store outputs - dlarfg( 3, vt, 1, 1, dlarfgOut, 1, 0 ); - - // Write the outputs to their expected positions - alpha = dlarfgOut[ 0 ]; - vt[ 0 ] = dlarfgOut[ 1 ]; - - refsum = vt[ 0 ] * ( H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] + ( vt[ 1 ] * H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] ) ); - - if ( abs( H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] - ( refsum * vt[ 1 ] ) ) + abs( refsum * vt[ 2 ] ) > ulp * ( abs( H[ offsetH + (k*strideH1) + (k*strideH2) ] ) + abs( H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ) + abs( H[ offsetH + ((k+2)*strideH1) + ((k+2)*strideH2) ] ) ) ) { - /* - * Starting a new bulge here would create - * non-negligible fill. Use the old one with - * trepidation. - */ - H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] = beta; - H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] = 0.0; - H[ offsetH + ((k+3)*strideH1) + (k*strideH2) ] = 0.0; - } else { - /* - * Stating a new bulge here would create only - * negligible fill. Replace the old reflector - * with the new one. - */ - H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] -= refsum; - H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] = 0.0; - H[ offsetH + ((k+3)*strideH1) + (k*strideH2) ] = 0.0; - V[ offsetV + (m*strideV2) ] = vt[ 0 ]; - V[ offsetV + (m*strideV2) + strideV1 ] = vt[ 1 ]; - V[ offsetV + (m*strideV2) + (2*strideV1) ] = vt[ 2 ]; - } - } - } - } - - // Generate a 2-by-2 reflection, if needed. - k = krcol + ( 3 * ( m22 - 1 ) ); if ( bmp22 ) { + // Special case: 2-by-2 reflection at the bottom treated separately + k = krcol + ( 2 * m22 ); if ( k === KTOP - 1 ) { - dlaqr1( 2, H, strideH1, strideH2, offsetH + ((k+1)*strideH1) + ((k+1)*strideH2), SR[ offsetSR + (((2*m22)-2)*strideSR) ], SI[ offsetSI + (((2*m22)-2)*strideSI) ], SR[ offsetSR + (((2*m22)-1)*strideSR) ], SI[ offsetSI + (((2*m22)-1)*strideSI) ], vt, 1, 0 ); - beta = V[ offsetV + (m22*strideV2) ]; + dlaqr1( 2, H, strideH1, strideH2, offsetH + ((k+1)*strideH1) + ((k+1)*strideH2), SR[ offsetSR + (strideSR*2*m22) ], SI[ offsetSI + (strideSR*2*m22) ], SR[ offsetSR + ( ((2*m22) + 1) * strideSR ) ], SI[ offsetSI + ( ((2*m22) + 1) * strideSI ) ], V, strideV1, offsetV + (m22 * strideH2) ); + + beta = V[ offsetV + (m22 * strideH2) ]; - // Prepare the `dlarfgOut` array to pass into the routine dlarfgOut[ 0 ] = beta; dlarfgOut[ 1 ] = 0.0; - // Call `dlarfg` using the `dlarfgOut` array to store outputs - dlarfg( 2, V, strideV1, offsetV + (m22*strideV2), dlarfgOut, 1, 0 ); + dlarfg( 2, V, strideV2, offsetV + strideV2 +(m22*strideV1), dlarfgOut, 1, 0 ); // ====> check here - // Write the outputs to their expected positions - beta = dlarfgOut[ 0 ]; V[ offsetV + (m22*strideV2) ] = dlarfgOut[ 1 ]; } else { beta = H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ]; - V[ offsetV + (m22*strideV2) + strideV1 ] = H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ]; + V[ offsetV + (m22*strideV1) + strideV2 ] = H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ]; - // Prepare the `dlarfgOut` array to pass into the routine dlarfgOut[ 0 ] = beta; - dlarfgOut[ 1 ] = 0.0; - - // Call `dlarfg` using the `dlarfgOut` array to store outputs - dlarfg( 2, V, strideV1, offsetV + (m22*strideV2), dlarfgOut, 1, 0 ); - - // Write the outputs to their expected positions - beta = dlarfgOut[ 0 ]; - V[ offsetV + (m22*strideV2) ] = dlarfgOut[ 1 ]; - - H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] = beta; - H[ offsetH + ((k+2)*strideH1) + (k*strideH2) ] = 0.0; - } - } - - // Multiply H by reflections from the left - if ( accum ) { - jbot = min( ndcol, KBOT ); - } else if ( wantT ) { - jbot = N; - } else { - jbot = KBOT; - } - - for ( j = max( KTOP, krcol ); j <= jbot; j++ ) { - mend = min( mbot, floor( ( j - krcol + 2 ) / 3 ) ); - for ( m = mtop; m <= mend; m++ ) { - k = krcol + ( 3 * ( m - 1 ) ); - refsum = V[ offsetV + (m*strideV2) ] * ( H[ offsetH + ((k+1)*strideH1) + (j*strideH2) ] + ( V[ offsetV + (m*strideV2) + strideV1 ] * H[ offsetH + ((k+2)*strideH1) + (j*strideH2) ] ) + ( V[ offsetV + (m*strideV2) + (2*strideV1) ] * H[ offsetH + ((k+3)*strideH1) + (j*strideH2) ] ) ); - H[ offsetH + ((k+1)*strideH1) + (j*strideH2) ] -= refsum; - H[ offsetH + ((k+2)*strideH1) + (j*strideH2) ] -= refsum * V[ offsetV + (m*strideV2) + strideV1 ]; - H[ offsetH + ((k+3)*strideH1) + (j*strideH2) ] -= refsum * V[ offsetV + (m*strideV2) + (2*strideV1) ]; - } - } - - if ( bmp22 ) { - k = krcol + ( 3 * ( m22 - 1 ) ); - for ( j = max( k + 1, KTOP ); j <= jbot; j++ ) { - refsum = V[ offsetV + (m22*strideV2) ] * ( H[ offsetH + ((k+1)*strideH1) + (j*strideH2) ] + ( V[ offsetV + (m22*strideV2) + strideV1 ] * H[ offsetH + ((k+2)*strideH1) + (j*strideH2) ] ) ); - H[ offsetH + ((k+1)*strideH1) + (j*strideH2) ] -= refsum; - H[ offsetH + ((k+2)*strideH1) + (j*strideH2) ] -= refsum * V[ offsetV + (m22*strideV2) + strideV1 ]; - } - } - - /* - * Multiply H by reflections from the right. - * Delay filling in the last row until the - * vigilant deflation check is complete. - */ - if ( accum ) { - jtop = max( KTOP, incol ); - } else if ( wantT ) { - jtop = 1; - } else { - jtop = KTOP; - } - - for ( m = mtop; m <= mbot; m++ ) { - if ( V[ offsetV + (m*strideV2) ] !== 0.0 ) { - k = krcol + ( 3 * ( m - 1 ) ); - for ( j = jtop; j <= min( KBOT, k + 3 ); j++ ) { - refsum = V[ offsetV + (m*strideV2) ] * ( H[ offsetH + (j*strideH1) + ((k+1)*strideH2) ] + ( V[ offsetV + (m*strideV2) + strideV1 ] * H[ offsetH + (j*strideH1) + ((k+2)*strideH2) ] ) + ( V[ offsetV + (m*strideV2) + (2*strideV1) ] * H[ offsetH + (j*strideH1) + ((k+3)*strideH2) ] ) ); - H[ offsetH + (j*strideH1) + ((k+1)*strideH2) ] -= refsum; - H[ offsetH + (j*strideH1) + ((k+2)*strideH2) ] -= refsum * V[ offsetV + (m*strideV2) + strideV1 ]; - H[ offsetH + (j*strideH1) + ((k+3)*strideH2) ] -= refsum * V[ offsetV + (m*strideV2) + (2*strideV1) ]; - } - - if ( accum ) { - /* - * Accumulate U. (If necessary, update Z later - * with with an efficient matrix-matrix multiply.) - */ - kms = k - incol; - for ( j = max( 1, KTOP - incol ); j <= kdu; j++ ) { - refsum = V[ offsetV + (m*strideV2) ] * ( U[ offsetU + (j*strideU1) + ((kms+1)*strideU2) ] + ( V[ offsetV + (m*strideV2) + strideV1 ] * U[ offsetU + (j*strideU1) + ((kms+2)*strideU2) ] ) + ( V[ offsetV + (m*strideV2) + (2*strideV1) ] * U[ offsetU + (j*strideU1) + ((kms+3)*strideU2) ] ) ); - U[ offsetU + (j*strideU1) + ((kms+1)*strideU2) ] -= refsum; - U[ offsetU + (j*strideU1) + ((kms+2)*strideU2) ] -= refsum * V[ offsetV + (m*strideV2) + strideV1 ]; - U[ offsetU + (j*strideU1) + ((kms+3)*strideU2) ] -= refsum * V[ offsetV + (m*strideV2) + (2*strideV1) ]; - } - } else if ( wantZ ) { - /* - * U is not accumulated, so update Z now by - * multiplying by reflections from the right. - */ - for ( j = iloZ; j <= ihiZ; j++ ) { - refsum = V[ offsetV + (m*strideV2) ] * ( Z[ offsetZ + (j*strideZ1) + ((k+1)*strideZ2) ] + ( V[ offsetV + (m*strideV2) + strideV1 ] * Z[ offsetZ + (j*strideZ1) + ((k+2)*strideZ2) ] ) + ( V[ offsetV + (m*strideV2) + (2*strideV1) ] * Z[ offsetZ + (j*strideZ1) + ((k+3)*strideZ2) ] ) ); - Z[ offsetZ + (j*strideZ1) + ((k+1)*strideZ2) ] -= refsum; - Z[ offsetZ + (j*strideZ1) + ((k+2)*strideZ2) ] -= refsum * V[ offsetV + (m*strideV2) + strideV1 ]; - Z[ offsetZ + (j*strideZ1) + ((k+3)*strideZ2) ] -= refsum * V[ offsetV + (m*strideV2) + (2*strideV1) ]; - } - } - } - } - - // Special case: 2-by-2 reflection (if needed) - k = krcol + ( 3 * ( m22 - 1 ) ); - if ( bmp22 ) { - if ( V[ offsetV + (m22*strideV2) ] !== 0.0 ) { - for ( j = jtop; j <= min( KBOT, k + 3 ); j++ ) { - refsum = V[ offsetV + (m22*strideV2) ] * ( H[ offsetH + (j*strideH1) + ((k+1)*strideH2) ] + ( V[ offsetV + (m22*strideV2) + strideV1 ] * H[ offsetH + (j*strideH1) + ((k+2)*strideH2) ] ) ); - H[ offsetH + (j*strideH1) + ((k+1)*strideH2) ] -= refsum; - H[ offsetH + (j*strideH1) + ((k+2)*strideH2) ] -= refsum * V[ offsetV + (m22*strideV2) + strideV1 ]; - } - - if ( accum ) { - kms = k - incol; - for ( j = max( 1, KTOP - incol ); j <= kdu; j++ ) { - refsum = V[ offsetV + (m22*strideV2) ] * ( U[ offsetU + (j*strideU1) + ((kms+1)*strideU2) ] + ( V[ offsetV + (m22*strideV2) + strideV1 ] * U[ offsetU + (j*strideU1) + ((kms+2)*strideU2) ] ) ); - U[ offsetU + (j*strideU1) + ((kms+1)*strideU2) ] -= refsum; - U[ offsetU + (j*strideU1) + ((kms+2)*strideU2) ] -= refsum * V[ offsetV + (m22*strideV2) + strideV1 ]; - } - } else if ( wantZ ) { - for ( j = iloZ; j <= ihiZ; j++ ) { - refsum = V[ offsetV + (m22*strideV2) ] * ( Z[ offsetZ + (j*strideZ1) + ((k+1)*strideZ2) ] + ( V[ offsetV + (m22*strideV2) + strideV1 ] * Z[ offsetZ + (j*strideZ1) + ((k+2)*strideZ2) ] ) ); - Z[ offsetZ + (j*strideZ1) + ((k+1)*strideZ2) ] -= refsum; - Z[ offsetZ + (j*strideZ1) + ((k+2)*strideZ2) ] -= refsum * V[ offsetV + (m22*strideV2) + strideV1 ]; - } - } - } - } - - // Vigilant deflation check - mstart = mtop; - if ( krcol + ( 3 * ( mstart - 1 ) ) < KTOP ) { - mstart += 1; - } - mend = mbot; - if ( bmp22 ) { - mend += 1; - } - if ( krcol === KBOT - 2 ) { - mend += 1; - } - for ( m = mstart; m <= mend; m++ ) { - k = min( KBOT - 1, krcol + ( 3 * ( m - 1 ) ) ); - - /* - * The following convergence test requires that the tradition - * small-compared-to-nearby-diagonals criterion and the - * Ahues & Tisseur (LAWN 122, 1997) criteria both be satisfied. - * The latter improves accuracy in some examples. Falling - * back on an alternate convergence criterion when TST1 or - * TST2 is zero (as done here) is traditional but probably - * unnecessary. - */ - if ( H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] !== 0.0 ) { - tst1 = abs( H[ offsetH + (k*strideH1) + (k*strideH2) ] ) + abs( H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ); - if ( tst1 === 0.0 ) { - if ( k >= KTOP + 1 ) { - tst1 += abs( H[ offsetH + (k*strideH1) + ((k-1)*strideH2) ] ); - } - if ( k >= KTOP + 2 ) { - tst1 += abs( H[ offsetH + (k*strideH1) + ((k-2)*strideH2) ] ); - } - if ( k >= KTOP + 3 ) { - tst1 += abs( H[ offsetH + (k*strideH1) + ((k-3)*strideH2) ] ); - } - if ( k <= KBOT - 2 ) { - tst1 += abs( H[ offsetH + ((k+2)*strideH1) + ((k+1)*strideH2) ] ); - } - if ( k <= KBOT - 3 ) { - tst1 += abs( H[ offsetH + ((k+3)*strideH1) + ((k+1)*strideH2) ] ); - } - if ( k <= KBOT - 4 ) { - tst1 += abs( H[ offsetH + ((k+4)*strideH1) + ((k+1)*strideH2) ] ); - } - } - if ( abs( H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] ) <= max( smlnum, ulp * tst1 ) ) { - h12 = max( abs( H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] ), abs( H[ offsetH + (k*strideH1) + ((k+1)*strideH2) ] ) ); - h21 = min( abs( H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] ), abs( H[ offsetH + (k*strideH1) + ((k+1)*strideH2) ] ) ); - h11 = max( abs( H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ), abs( H[ offsetH + (k*strideH1) + (k*strideH2) ] - H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ) ); - h22 = min( abs( H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ), abs( H[ offsetH + (k*strideH1) + (k*strideH2) ] - H[ offsetH + ((k+1)*strideH1) + ((k+1)*strideH2) ] ) ); - scl = h11 + h12; - tst2 = h22 * ( h11 / scl ); - if ( tst2 === 0.0 || h21 * ( h12 / scl ) <= max( smlnum, ulp * tst2 ) ) { - H[ offsetH + ((k+1)*strideH1) + (k*strideH2) ] = 0.0; - } - } - } - } - - // Fill in the last row of each bulge. - mend = min( nbmps, floor( ( KBOT - krcol - 1 ) / 3 ) ); - for ( m = mtop; m <= mend; m++ ) { - k = krcol + ( 3 * ( m - 1 ) ); - refsum = V[ offsetV + (m*strideV2) ] * V[ offsetV + (m*strideV2) + (2*strideV1) ] * H[ offsetH + ((k+4)*strideH1) + ((k+3)*strideH2) ]; - H[ offsetH + ((k+4)*strideH1) + ((k+1)*strideH2) ] = -refsum; - H[ offsetH + ((k+4)*strideH1) + ((k+2)*strideH2) ] = -refsum * V[ offsetV + (m*strideV2) + strideV1 ]; - H[ offsetH + ((k+4)*strideH1) + ((k+3)*strideH2) ] -= refsum * V[ offsetV + (m*strideV2) + (2*strideV1) ]; - } - } // End of near-the-diagonal bulge chase. - - // Use U (if accumulated) to update far-from-diagonal entries in H. If required, use U to update Z as well. - if ( accum ) { - if ( wantT ) { - jtop = 1; - jbot = N; - } else { - jtop = KTOP; - jbot = KBOT; - } - - /* - * Updates not exploiting the 2-by-2 block structure of U. K1 and NU - * keep track of the location and size of U in the special cases of - * introducing bulges and chasing bulges off the bottom. In these - * special cases and in case the number of shifts is NS = 2, there - * is no 2-by-2 block structure to exploit. - */ - if ( !block22 || incol < KTOP || ndcol > KBOT || ns <= 2 ) { - k1 = max( 1, KTOP - incol ); - nu = kdu - max( 0, ndcol - KBOT ) - k1 + 1; - - // Horizontal Multiply - for ( jcol = min( ndcol, KBOT ) + 1; jcol <= jbot; jcol += NH ) { - jlen = min( NH, jbot - jcol + 1 ); - dgemm( 'conjugate-transpose', 'no-transpose', nu, jlen, nu, 1.0, U, strideU1, strideU2, offsetU + (k1*strideU1) + (k1*strideU2), H, strideH1, strideH2, offsetH + ((incol+k1)*strideH1) + (jcol*strideH2), 0.0, WH, strideWH1, strideWH2, offsetWH ); - dlacpy( 'all', nu, jlen, WH, strideWH1, strideWH2, offsetWH, H, strideH1, strideH2, offsetH + ((incol+k1)*strideH1) + (jcol*strideH2) ); - } - - // Vertical multiply - for ( jrow = jtop; jrow <= max( KTOP, incol ) - 1; jrow += NV ) { - jlen = min( NV, max( KTOP, incol ) - jrow ); - dgemm( 'no-transpose', 'no-transpose', jlen, nu, nu, 1.0, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+k1)*strideH2), U, strideU1, strideU2, offsetU + (k1*strideU1) + (k1*strideU2), 0.0, WV, strideWV1, strideWV2, offsetWV ); - dlacpy( 'all', jlen, nu, WV, strideWV1, strideWV2, offsetWV, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+k1)*strideH2) ); - } - - // Z multiply (also vertical) - if ( wantZ ) { - for ( jrow = iloZ; jrow <= ihiZ; jrow += NV ) { - jlen = min( NV, ihiZ - jrow + 1 ); - dgemm( 'no-transpose', 'no-transpose', jlen, nu, nu, 1.0, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+k1)*strideZ2), U, strideU1, strideU2, offsetU + (k1*strideU1) + (k1*strideU2), 0.0, WV, strideWV1, strideWV2, offsetWV ); - dlacpy( 'all', jlen, nu, WV, strideWV1, strideWV2, offsetWV, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+k1)*strideZ2) ); - } - } - } else { - /* - * Updates exploiting U's 2-by-2 block structure. - * (I2, I4, J2, J4 are the last rows and columns of the blocks.) - */ - i2 = floor( ( kdu + 1 ) / 2 ); - i4 = kdu; - j2 = i4 - i2; - j4 = kdu; - - /* - * KZS and KNZ deal with the band of zeros along the diagonal - * of one of the triangular blocks. - */ - kzs = j4 - j2 - ( ns + 1 ); - knz = ns + 1; - - // Horizontal multiply - for ( jcol = min( ndcol, KBOT ) + 1; jcol <= jbot; jcol += NH ) { - jlen = min( NH, jbot - jcol + 1 ); - - /* - * Copy bottom of H to top+KZS of scratch - * (The first KZS rows get multiplied by zero.) - */ - dlacpy( 'all', knz, jlen, H, strideH1, strideH2, offsetH + ((incol+1+j2)*strideH1) + (jcol*strideH2), WH, strideWH1, strideWH2, offsetWH + ((kzs+1)*strideWH1) ); - - // Multiply by U21**T - dlaset( 'all', kzs, jlen, 0.0, 0.0, WH, strideWH1, strideWH2, offsetWH ); - dtrmm( 'left', 'upper', 'conjugate-transpose', 'non-unit', knz, jlen, 1.0, U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((1+kzs)*strideU2), WH, strideWH1, strideWH2, offsetWH + ((kzs+1)*strideWH1) ); - - // Multiply top of H by U11**T - dgemm( 'conjugate-transpose', 'no-transpose', i2, jlen, j2, 1.0, U, strideU1, strideU2, offsetU, H, strideH1, strideH2, offsetH + ((incol+1)*strideH1) + (jcol*strideH2), 1.0, WH, strideWH1, strideWH2, offsetWH ); - - // Copy top of H to bottom of WH - dlacpy( 'all', j2, jlen, H, strideH1, strideH2, offsetH + ((incol+1)*strideH1) + (jcol*strideH2), WH, strideWH1, strideWH2, offsetWH + ((i2+1)*strideWH1) ); - - // Multiply by U21**T - dtrmm( 'left', 'lower', 'conjugate-transpose', 'non-unit', j2, jlen, 1.0, U, strideU1, strideU2, offsetU + (1*strideU1) + ((i2+1)*strideU2), WH, strideWH1, strideWH2, offsetWH + ((i2+1)*strideWH1) ); - - // Multiply by U22 - dgemm( 'conjugate-transpose', 'no-transpose', i4 - i2, jlen, j4 - j2, 1.0, U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((i2+1)*strideU2), H, strideH1, strideH2, offsetH + ((incol+1+j2)*strideH1) + (jcol*strideH2), 1.0, WH, strideWH1, strideWH2, offsetWH + ((i2+1)*strideWH1) ); - - // Copy it back - dlacpy( 'all', kdu, jlen, WH, strideWH1, strideWH2, offsetWH, H, strideH1, strideH2, offsetH + ((incol+1)*strideH1) + (jcol*strideH2) ); - } - - // Vertical multiply - for ( jrow = jtop; jrow <= max( incol, KTOP ) - 1; jrow += NV ) { - jlen = min( NV, max( incol, KTOP ) - jrow ); - - /* - * Copy right of H to scratch (the first KZS columns get multiplied by zero) - */ - dlacpy( 'all', jlen, knz, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+1+j2)*strideH2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+kzs)*strideWV2) ); - - // Multiply by U21 - dlaset( 'all', jlen, kzs, 0.0, 0.0, WV, strideWV1, strideWV2, offsetWV ); - dtrmm( 'right', 'upper', 'no-transpose', 'non-unit', jlen, knz, 1.0, U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((1+kzs)*strideU2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+kzs)*strideWV2) ); - - // Multiply by U11 - dgemm( 'no-transpose', 'no-transpose', jlen, i2, j2, 1.0, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+1)*strideH2), U, strideU1, strideU2, offsetU, 1.0, WV, strideWV1, strideWV2, offsetWV ); - - // Copy left of H to right of scratch - dlacpy( 'all', jlen, j2, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+1)*strideH2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); - - // Multiply by U21 - dtrmm( 'right', 'lower', 'no-transpose', 'non-unit', jlen, i4 - i2, 1.0, U, strideU1, strideU2, offsetU + (1*strideU1) + ((i2+1)*strideU2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); - - // Multiply by U22 - dgemm( 'no-transpose', 'no-transpose', jlen, i4 - i2, j4 - j2, 1.0, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+1+j2)*strideH2), U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((i2+1)*strideU2), 1.0, WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); - - // Copy it back - dlacpy( 'all', jlen, kdu, WV, strideWV1, strideWV2, offsetWV, H, strideH1, strideH2, offsetH + (jrow*strideH1) + ((incol+1)*strideH2) ); - } - - // Multiply Z (also vertical) - if ( wantZ ) { - for ( jrow = iloZ; jrow <= ihiZ; jrow += NV ) { - jlen = min( NV, ihiZ - jrow + 1 ); - - /* - * Copy right of Z to left of scratch (first KZS columns get multiplied by zero) - */ - dlacpy( 'all', jlen, knz, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+1+j2)*strideZ2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+kzs)*strideWV2) ); - - // Multiply by U12 - dlaset( 'all', jlen, kzs, 0.0, 0.0, WV, strideWV1, strideWV2, offsetWV ); - dtrmm( 'right', 'upper', 'no-transpose', 'non-unit', jlen, knz, 1.0, U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((1+kzs)*strideU2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+kzs)*strideWV2) ); - - // Multiply by U11 - dgemm( 'no-transpose', 'no-transpose', jlen, i2, j2, 1.0, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+1)*strideZ2), U, strideU1, strideU2, offsetU, 1.0, WV, strideWV1, strideWV2, offsetWV ); - - // Copy left of Z to right of scratch - dlacpy( 'all', jlen, j2, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+1)*strideZ2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); - - // Multiply by U21 - dtrmm( 'right', 'lower', 'no-transpose', 'non-unit', jlen, i4 - i2, 1.0, U, strideU1, strideU2, offsetU + (1*strideU1) + ((i2+1)*strideU2), WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); - - // Multiply by U22 - dgemm( 'no-transpose', 'no-transpose', jlen, i4 - i2, j4 - j2, 1.0, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+1+j2)*strideZ2), U, strideU1, strideU2, offsetU + ((j2+1)*strideU1) + ((i2+1)*strideU2), 1.0, WV, strideWV1, strideWV2, offsetWV + (1*strideWV1) + ((1+i2)*strideWV2) ); + dlarfgOut[ 1 ] = 1.0; - // Copy the result back to Z - dlacpy( 'all', jlen, kdu, WV, strideWV1, strideWV2, offsetWV, Z, strideZ1, strideZ2, offsetZ + (jrow*strideZ1) + ((incol+1)*strideZ2) ); - } + dlarfg( 2, V, strideV2, offsetV + (m22*strideV1) + strideV2, outerHeight, 1, 0 ); } } } diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/fixtures/b.json b/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/fixtures/b.json new file mode 100644 index 000000000000..097f7b33fcde --- /dev/null +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/fixtures/b.json @@ -0,0 +1,171 @@ +{ + "order": "row-major", + + "wantT": true, + "wantZ": true, + "kacc22": 0, + "N": 6, + "KTOP": 2, + "KBOT": 5, + "nshifts": 2, + + "SR": [ 5.25, 5.67 ], + "strideSR": 1, + "offsetSR": 0, + + "SI": [ 0.0, 0.0 ], + "strideSI": 1, + "offsetSI": 0, + + "H": [ + 3.000, -1.100, 0.000, 0.000, 0.000, 0.000, + 1.100, 4.000, -1.200, 0.000, 0.000, 0.000, + 0.000, 1.200, 5.000, -1.300, 0.000, 0.000, + 0.000, 0.000, 1.300, 6.000, -1.400, 0.000, + 0.000, 0.000, 0.000, 1.400, 7.000, -1.500, + 0.000, 0.000, 0.000, 0.000, 1.500, 8.000 + ], + "strideH1": 6, + "strideH2": 1, + "offsetH": 0, + "LDH": 6, + "H_mat": [ + [ 3.000, -1.100, 0.000, 0.000, 0.000, 0.000 ], + [ 1.100, 4.000, -1.200, 0.000, 0.000, 0.000 ], + [ 0.000, 1.200, 5.000, -1.300, 0.000, 0.000 ], + [ 0.000, 0.000, 1.300, 6.000, -1.400, 0.000 ], + [ 0.000, 0.000, 0.000, 1.400, 7.000, -1.500 ], + [ 0.000, 0.000, 0.000, 0.000, 1.500, 8.000 ] + ], + + "iloZ": 1, + "ihiZ": 6, + + "Z": [ + 1.000, 0.000, 0.000, 0.000, 0.000, 0.000, + 0.000, 1.000, 0.000, 0.000, 0.000, 0.000, + 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, + 0.000, 0.000, 0.000, 1.000, 0.000, 0.000, + 0.000, 0.000, 0.000, 0.000, 1.000, 0.000, + 0.000, 0.000, 0.000, 0.000, 0.000, 1.000 + ], + "strideZ1": 6, + "strideZ2": 1, + "offsetZ": 0, + "LDZ": 6, + "Z_mat": [ + [ 1.000, 0.000, 0.000, 0.000, 0.000, 0.000 ], + [ 0.000, 1.000, 0.000, 0.000, 0.000, 0.000 ], + [ 0.000, 0.000, 1.000, 0.000, 0.000, 0.000 ], + [ 0.000, 0.000, 0.000, 1.000, 0.000, 0.000 ], + [ 0.000, 0.000, 0.000, 0.000, 1.000, 0.000 ], + [ 0.000, 0.000, 0.000, 0.000, 0.000, 1.000 ] + ], + + "V": [ + 0.0, 0.0, + 0.0, 0.0, + 0.0, 0.0 + ], + "V_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "strideV1": 2, + "strideV2": 1, + "offsetV": 0, + "LDV": 3, + + "U": [ + 0.0, 0.0, + 0.0, 0.0, + 0.0, 0.0, + 0.0, 0.0, + 0.0, 0.0, + 0.0, 0.0 + ], + "U_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "strideU1": 2, + "strideU2": 1, + "offsetU": 0, + "LDU": 6, + + "WV": [ + 0.0, 0.0, + 0.0, 0.0, + 0.0, 0.0, + 0.0, 0.0, + 0.0, 0.0, + 0.0, 0.0 + ], + "WV_mat": [ + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ], + [ 0.0, 0.0 ] + ], + "strideWV1": 2, + "strideWV2": 1, + "offsetWV": 0, + "LDWV": 6, + + "WH": [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + "WH_mat": [ + [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], + [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] + ], + "strideWH1": 6, + "strideWH2": 1, + "offsetWH": 0, + "LDWH": 2, + + "NH": 3, + "NV": 3, + + "H_out": [ + 3.000, 0.249, 0.612, 0.827, 0.298, 0.000, + 1.100, 5.247, -2.049, 0.102, -0.690, 0.000, + 0.000, 1.232, 5.729, -1.362, -1.656, 0.931, + 0.000, 0.000, -0.352, 4.667, 1.023, -0.277, + 0.000, 0.000, 0.000, -1.290, 6.358, -1.143, + 0.000, 0.000, 0.000, 0.000, 1.500, 8.000 + ], + "H_out_mat": [ + [ 3.000, 0.249, 0.612, 0.827, 0.298, 0.000 ], + [ 1.100, 5.247, -2.049, 0.102, -0.690, 0.000 ], + [ 0.000, 1.232, 5.729, -1.362, -1.656, 0.931 ], + [ 0.000, 0.000, -0.352, 4.667, 1.023, -0.277 ], + [ 0.000, 0.000, 0.000, -1.290, 6.358, -1.143 ], + [ 0.000, 0.000, 0.000, 0.000, 1.500, 8.000 ] + ], + + "Z_out": [ + 1.000, 0.000, 0.000, 0.000, 0.000, 0.000, + 0.000, -0.227, -0.556, -0.752, -0.271, 0.000, + 0.000, 0.806, 0.194, -0.486, 0.275, 0.000, + 0.000, -0.546, 0.517, -0.405, 0.519, 0.000, + 0.000, 0.000, -0.621, 0.184, 0.762, 0.000, + 0.000, 0.000, 0.000, 0.000, 0.000, 1.000 + ], + "Z_out_mat": [ + [ 1.000, 0.000, 0.000, 0.000, 0.000, 0.000 ], + [ 0.000, -0.227, -0.556, -0.752, -0.271, 0.000 ], + [ 0.000, 0.806, 0.194, -0.486, 0.275, 0.000 ], + [ 0.000, -0.546, 0.517, -0.405, 0.519, 0.000 ], + [ 0.000, 0.000, -0.621, 0.184, 0.762, 0.000 ], + [ 0.000, 0.000, 0.000, 0.000, 0.000, 1.000 ] + ] +} diff --git a/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.dlaqr5.js b/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.dlaqr5.js index 6ae40e5e8bac..18c625fd404e 100644 --- a/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.dlaqr5.js +++ b/lib/node_modules/@stdlib/lapack/base/dlaqr5/test/test.dlaqr5.js @@ -30,6 +30,7 @@ var dlaqr5 = require( './../lib/dlaqr5.js' ); // FIXTURES // var A = require( './fixtures/a.json' ); +var B = require( './fixtures/b.json' ); // TESTS // @@ -58,7 +59,7 @@ tape( 'the function returns expected values', function test( t ) { var U; var V; - data = A; + data = B; WH = new Float64Array( data.WH ); WV = new Float64Array( data.WV );