Skip to content

Commit 36ab4ad

Browse files
authored
Merge pull request #75 from fslaborg/perf/Optimize LU decomposition
Daily Perf Improver - Optimize LU decomposition with SIMD row operations
2 parents ff9d962 + 0021d51 commit 36ab4ad

File tree

1 file changed

+23
-5
lines changed

1 file changed

+23
-5
lines changed

src/FsMath/Algebra/LinearAlgebra.fs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,10 @@ type LinearAlgebra =
583583
M.[i, col] <- M.[j, col]
584584
M.[j, col] <- tmp
585585

586+
// Get raw data arrays for SIMD-optimized row operations
587+
let Udata = U.Data
588+
let Ldata = L.Data
589+
586590
// Perform the decomposition
587591
for i = 0 to n - 2 do
588592
// 1) Find pivot row by max absolute value in U column i
@@ -602,13 +606,27 @@ type LinearAlgebra =
602606
P.[i] <- P.[pivotRow]
603607
P.[pivotRow] <- tmp
604608

605-
// 3) Eliminate below pivot
609+
// 3) Eliminate below pivot using SIMD-optimized row operations
610+
let diagVal = U.[i,i]
606611
for j = i + 1 to n - 1 do
607612
// L[j,i] = U[j,i] / U[i,i]
608-
L.[j,i] <- U.[j,i] / U.[i,i]
609-
// Update row j of U
610-
for k = i + 1 to n - 1 do
611-
U.[j,k] <- U.[j,k] - L.[j,i] * U.[i,k]
613+
let multiplier = U.[j,i] / diagVal
614+
L.[j,i] <- multiplier
615+
616+
// SIMD-optimized row update: U[j, i+1..n-1] -= multiplier * U[i, i+1..n-1]
617+
// Use subScaledRowInPlace for contiguous row segments
618+
if i + 1 < n then
619+
let count = n - (i + 1)
620+
let rowJOffset = j * n + (i + 1)
621+
let rowIOffset = i * n + (i + 1)
622+
LinearAlgebra.subScaledRowInPlace
623+
multiplier
624+
rowJOffset
625+
rowIOffset
626+
count
627+
Udata
628+
Udata
629+
612630
U.[j,i] <- 'T.Zero
613631

614632
// Add identity to L (so diagonal = 1.0)

0 commit comments

Comments
 (0)