From c1049e557324be75af4d81fa65117c2eefdea1f4 Mon Sep 17 00:00:00 2001
From: Lambert Theisen <lambert.theisen@rwth-aachen.de>
Date: Wed, 30 Jul 2025 13:56:56 +0200
Subject: [PATCH 1/4] Try to fix PeriodicBCTests

- works but there's still a warning/error for precompilation
---
 src/FESpaces.jl          | 75 +++++++++++++++++++++++++++++++---------
 test/PeriodicBCsTests.jl | 33 +++++++++++++++++-
 2 files changed, 90 insertions(+), 18 deletions(-)

diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index f68b747..b53c7a0 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -102,7 +102,7 @@ function fetch_vector_ghost_values_cache(vector_partition,partition)
 end
 
 function fetch_vector_ghost_values!(vector_partition,cache)
-  assemble!((a,b)->b, vector_partition, cache) 
+  assemble!((a,b)->b, vector_partition, cache)
 end
 
 function generate_gids(
@@ -128,7 +128,7 @@ function generate_gids(
         end
       end
     end
-    me = part_id(indices) 
+    me = part_id(indices)
     nodofs = count(p->p==me,ldof_to_owner)
     ldof_to_owner, nodofs
   end |> tuple_of_arrays
@@ -140,7 +140,7 @@ function generate_gids(
   cell_ldofs_to_owner = dof_wise_to_cell_wise(ldof_to_owner,cell_to_ldofs,own_to_local(cell_range))
   consistent!(PVector(cell_ldofs_to_owner,partition(cell_range))) |> wait
   cell_wise_to_dof_wise!(ldof_to_owner,cell_ldofs_to_owner,cell_to_ldofs,ghost_to_local(cell_range))
-  
+
   # Fill owned gids
   ldof_to_gdof = map(ranks,first_gdof,ldof_to_owner) do rank,first_gdof,ldof_to_owner
     ldof_to_gdof = fill(0,length(ldof_to_owner))
@@ -469,7 +469,7 @@ function _find_vector_type(spaces,gids;split_own_and_ghost=false)
   return vector_type
 end
 
-# TODO: We would like to avoid this, but I cannot extract the maximal order 
+# TODO: We would like to avoid this, but I cannot extract the maximal order
 #       from the space itself...
 function _add_distributed_constraint(
   F::DistributedFESpace,reffe::ReferenceFE,constraint
@@ -513,7 +513,7 @@ end
 const DistributedZeroMeanFESpace{A,B,C,D,E,F} = DistributedSingleFieldFESpace{A,B,C,D,DistributedZeroMeanCache{E,F}}
 
 function FESpaces.FESpaceWithConstantFixed(
-  space::DistributedSingleFieldFESpace, 
+  space::DistributedSingleFieldFESpace,
   gid_to_fix::Int = num_free_dofs(space)
 )
   # Find the gid within the processors
@@ -569,23 +569,23 @@ function FESpaces.FEFunction(
   isconsistent=false
 )
   free_values = change_ghost(free_values,f.gids,is_consistent=isconsistent,make_consistent=true)
-  
+
   c = _compute_new_distributed_fixedval(
     f,free_values,dirichlet_values
   )
-  fv = free_values .+ c # TODO: Do we need to copy, or can we just modify? 
+  fv = free_values .+ c # TODO: Do we need to copy, or can we just modify?
   dv = map(dirichlet_values) do dv
     dv .+ c
   end
-  
+
   fields = map(FEFunction,f.spaces,partition(fv),dv)
   trian = get_triangulation(f)
   metadata = DistributedFEFunctionData(fv)
   DistributedCellField(fields,trian,metadata)
 end
 
-# This is required, otherwise we end up calling `FEFunction` with a fixed value of zero, 
-# which does not properly interpolate the function provided. 
+# This is required, otherwise we end up calling `FEFunction` with a fixed value of zero,
+# which does not properly interpolate the function provided.
 # With this change, we are interpolating in the unconstrained space and then
 # substracting the mean.
 function FESpaces.interpolate!(u,free_values::AbstractVector,f::DistributedZeroMeanFESpace)
@@ -602,7 +602,7 @@ function _compute_new_distributed_fixedval(
 )
   dvol = f.metadata.dvol
   vol  = f.metadata.vol
-  
+
   c_i = map(local_views(f),partition(fv),dv,dvol) do space,fv,dv,dvol
     if isa(FESpaces.ConstantApproach(space),FESpaces.FixConstant)
       lid_to_fix = space.dof_to_fix
@@ -618,19 +618,19 @@ end
 
 """
     ConstantFESpace(
-      model::DistributedDiscreteModel; 
-      constraint_type=:global, 
+      model::DistributedDiscreteModel;
+      constraint_type=:global,
       kwargs...
     )
 
 Distributed equivalent to `ConstantFESpace(model;kwargs...)`.
 
 With `constraint_type=:global`, a single dof is shared by all processors.
-This creates a global constraint, which is NOT scalable in parallel. Use at your own peril. 
+This creates a global constraint, which is NOT scalable in parallel. Use at your own peril.
 
 With `constraint_type=:local`, a single dof is owned by each processor and shared with no one else.
 This space is locally-constant in each processor, and therefore scalable (but not equivalent
-to its serial counterpart). 
+to its serial counterpart).
 """
 function FESpaces.ConstantFESpace(
   model::DistributedDiscreteModel;
@@ -694,7 +694,7 @@ function FESpaces.SparseMatrixAssembler(
 )
   assems = map(partition(rows),partition(cols)) do rows,cols
     FESpaces.GenericSparseMatrixAssembler(
-      SparseMatrixBuilder(local_mat_type), 
+      SparseMatrixBuilder(local_mat_type),
       ArrayBuilder(local_vec_type),
       Base.OneTo(length(rows)),
       Base.OneTo(length(cols)),
@@ -752,7 +752,7 @@ end
 
 function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data)
   rows, cols = get_rows(a), get_cols(a)
-  map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)  
+  map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
 end
 
 function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data)
@@ -834,3 +834,44 @@ function FESpaces.collect_cell_matrix_and_vector(
     collect_cell_matrix_and_vector(u,v,m,l,f)
   end
 end
+
+# Fix for LinearFESolver with periodic boundary conditions
+# The issue is that get_free_dof_values(u) creates a vector with the wrong index structure
+# for periodic BCs. We need to use a vector with the same index structure as the operator.
+
+# Override LinearFESolver methods to use compatible vector index structures
+import Gridap.FESpaces: solve!
+
+function solve!(u,solver::LinearFESolver,feop::AffineFEOperator,cache::Nothing)
+  # Get the algebraic operator first
+  op = get_algebraic_operator(feop)
+
+  # Create a vector with compatible index structure using the operator
+  # For periodic BCs, this ensures we get LocalIndicesWithVariableBlockSize instead of PermutedLocalIndices
+  x = similar(get_vector(op))
+
+  # Solve the linear system
+  cache = solve!(x,solver.ls,op)
+
+  # Create the FE function with the solution
+  trial = get_trial(feop)
+  u_new = FEFunction(trial,x)
+  (u_new, cache)
+end
+
+function solve!(u,solver::LinearFESolver,feop::AffineFEOperator, cache)
+  # Get the algebraic operator first
+  op = get_algebraic_operator(feop)
+
+  # Create a vector with compatible index structure using the operator
+  # For periodic BCs, this ensures we get LocalIndicesWithVariableBlockSize instead of PermutedLocalIndices
+  x = similar(get_vector(op))
+
+  # Solve the linear system
+  cache = solve!(x,solver.ls,op,cache)
+
+  # Create the FE function with the solution
+  trial = get_trial(feop)
+  u_new = FEFunction(trial,x)
+  (u_new,cache)
+end
diff --git a/test/PeriodicBCsTests.jl b/test/PeriodicBCsTests.jl
index 247cb6f..e6a1322 100644
--- a/test/PeriodicBCsTests.jl
+++ b/test/PeriodicBCsTests.jl
@@ -29,9 +29,40 @@ function main(distribute,parts)
   l(v) = ∫( v*f )dΩ
   op = AffineFEOperator(a,l,U,V)
 
+  # Test 1: Basic solve(op) functionality
   uh = solve(op)
   eh = u - uh
-  @test sqrt(sum( ∫(abs2(eh))dΩ )) < 0.00122
+  error_solve = sqrt(sum( ∫(abs2(eh))dΩ ))
+  @test error_solve < 0.00122
+
+  # Test 2: LinearFESolver consistency (fix verification)
+  # This tests the specific fix for periodic BCs where LinearFESolver
+  # uses compatible vector index structures
+  solver = LinearFESolver(BackslashSolver())
+  uh_solver = solve(solver, op)
+  eh_solver = u - uh_solver
+  error_solver = sqrt(sum( ∫(abs2(eh_solver))dΩ ))
+
+  # Test 3: Direct matrix solve for comparison
+  A = get_matrix(op)
+  b = get_vector(op)
+  x_direct = A \ b
+  uh_direct = FEFunction(U, x_direct)
+  eh_direct = u - uh_direct
+  error_direct = sqrt(sum( ∫(abs2(eh_direct))dΩ ))
+
+  # Test 4: Verify all methods give consistent results (within numerical precision)
+  # This is the key test for the periodic BC fix - all ratios should be ≈ 1.0
+  ratio_solve_direct = error_solve / error_direct
+  ratio_solver_direct = error_solver / error_direct
+
+  @test abs(ratio_solve_direct - 1.0) < 1e-10  # solve(op) should match direct solve
+  @test abs(ratio_solver_direct - 1.0) < 1e-10  # LinearFESolver should match direct solve
+  @test abs(error_solve - error_solver) < 1e-14  # solve(op) and LinearFESolver should be identical
+
+  # Test 5: Error magnitudes should be reasonable
+  @test error_solver < 0.00122
+  @test error_direct < 0.00122
 
 end
 

From 0f826df391cb4ee050ebf330370f19b6b968276e Mon Sep 17 00:00:00 2001
From: Lambert Theisen <lambert.theisen@rwth-aachen.de>
Date: Wed, 30 Jul 2025 14:10:48 +0200
Subject: [PATCH 2/4] Fix code by not overrriding Gridap.solve! but change
 get_free_dof_values

---
 src/FESpaces.jl | 110 ++++++++++++++++++++++++++++++------------------
 1 file changed, 70 insertions(+), 40 deletions(-)

diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index b53c7a0..b1373d6 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -194,7 +194,76 @@ end
 const DistributedSingleFieldFEFunction{A,B,T} = DistributedCellField{A,B,DistributedFEFunctionData{T}}
 
 function FESpaces.get_free_dof_values(uh::DistributedSingleFieldFEFunction)
-  uh.metadata.free_values
+  free_values = uh.metadata.free_values
+  
+  # Check if we have periodic boundary conditions that require index structure conversion
+  # This is needed when the FE space uses PermutedLocalIndices but the linear system
+  # requires LocalIndicesWithVariableBlockSize for compatibility
+  if _needs_periodic_bc_fix(free_values)
+    return _convert_to_compatible_index_structure(free_values)
+  end
+  
+  return free_values
+end
+
+"""
+Check if the vector needs periodic BC index structure conversion
+"""
+function _needs_periodic_bc_fix(v::PVector)
+  # Check if any partition uses PermutedLocalIndices which typically indicates periodic BCs
+  indices = partition(axes(v, 1))
+  checks = map(indices) do idx
+    isa(idx, PartitionedArrays.PermutedLocalIndices)
+  end
+  # Use reduction to avoid scalar indexing on DebugArray
+  return reduce(|, checks, init=false)
+end
+
+"""
+Convert a vector with PermutedLocalIndices to one with LocalIndicesWithVariableBlockSize
+This is needed for periodic BC compatibility with linear system operators
+"""
+function _convert_to_compatible_index_structure(v::PVector)
+  # Get the current index partition
+  old_indices = partition(axes(v, 1))
+  
+  # Create new indices using LocalIndicesWithVariableBlockSize
+  new_indices = map(old_indices) do idx
+    if isa(idx, PartitionedArrays.PermutedLocalIndices)
+      # Extract the underlying LocalIndicesWithVariableBlockSize
+      return idx.indices
+    else 
+      return idx
+    end
+  end
+  
+  # Create a new PRange with the unpermuted indices
+  new_prange = PRange(new_indices)
+  
+  # Create a new vector with the compatible index structure
+  new_vector = similar(v, new_prange)
+  
+  # Copy values handling the index mapping
+  map(partition(new_vector), partition(v), new_indices, old_indices) do new_part, old_part, new_idx, old_idx
+    if isa(old_idx, PartitionedArrays.PermutedLocalIndices)
+      # Handle permuted indices by mapping through global indices
+      old_loc_to_glo = local_to_global(old_idx)
+      new_glo_to_loc = global_to_local(new_idx)
+      
+      for old_lid in 1:local_length(old_idx)
+        gid = old_loc_to_glo[old_lid]
+        new_lid = new_glo_to_loc[gid]
+        if new_lid > 0
+          new_part[new_lid] = old_part[old_lid]
+        end
+      end
+    else
+      # Direct copy if no permutation
+      copy!(new_part, old_part)
+    end
+  end
+  
+  return new_vector
 end
 
 # Single field related
@@ -835,43 +904,4 @@ function FESpaces.collect_cell_matrix_and_vector(
   end
 end
 
-# Fix for LinearFESolver with periodic boundary conditions
-# The issue is that get_free_dof_values(u) creates a vector with the wrong index structure
-# for periodic BCs. We need to use a vector with the same index structure as the operator.
-
-# Override LinearFESolver methods to use compatible vector index structures
-import Gridap.FESpaces: solve!
-
-function solve!(u,solver::LinearFESolver,feop::AffineFEOperator,cache::Nothing)
-  # Get the algebraic operator first
-  op = get_algebraic_operator(feop)
-
-  # Create a vector with compatible index structure using the operator
-  # For periodic BCs, this ensures we get LocalIndicesWithVariableBlockSize instead of PermutedLocalIndices
-  x = similar(get_vector(op))
-
-  # Solve the linear system
-  cache = solve!(x,solver.ls,op)
 
-  # Create the FE function with the solution
-  trial = get_trial(feop)
-  u_new = FEFunction(trial,x)
-  (u_new, cache)
-end
-
-function solve!(u,solver::LinearFESolver,feop::AffineFEOperator, cache)
-  # Get the algebraic operator first
-  op = get_algebraic_operator(feop)
-
-  # Create a vector with compatible index structure using the operator
-  # For periodic BCs, this ensures we get LocalIndicesWithVariableBlockSize instead of PermutedLocalIndices
-  x = similar(get_vector(op))
-
-  # Solve the linear system
-  cache = solve!(x,solver.ls,op,cache)
-
-  # Create the FE function with the solution
-  trial = get_trial(feop)
-  u_new = FEFunction(trial,x)
-  (u_new,cache)
-end

From ace41829ef6795cd0be2169f5bea3216abd43677 Mon Sep 17 00:00:00 2001
From: Lambert Theisen <lambert.theisen@rwth-aachen.de>
Date: Wed, 30 Jul 2025 15:41:58 +0200
Subject: [PATCH 3/4] try to fix FESpacesTests

---
 src/Assembly.jl      | 39 +++++++++++++++++++++++++++-------
 src/PArraysExtras.jl | 50 ++++++++++++++++++++++----------------------
 2 files changed, 57 insertions(+), 32 deletions(-)

diff --git a/src/Assembly.jl b/src/Assembly.jl
index 41673c5..043a7f6 100644
--- a/src/Assembly.jl
+++ b/src/Assembly.jl
@@ -88,7 +88,7 @@ Algebra.LoopStyle(::Type{<:PSparseMatrixCounter}) = Loop()
 """
     DistributedAllocation{S,T,N} <: GridapType
 
-Distributed N-dimensional allocator, with local allocators of type T. 
+Distributed N-dimensional allocator, with local allocators of type T.
 Follows assembly strategy S.
 """
 struct DistributedAllocation{S,T,N,A,B} <: GridapType
@@ -225,17 +225,42 @@ function Algebra.create_from_nz(a::PVectorAllocation{<:Assembled,<:AbstractVecto
   @assert false
 end
 
+function rhs_callback(values,rows)
+  # Based on the old _rhs_callback pattern:
+  # The issue is that values come from the FE space structure but need to be
+  # organized according to the assembly matrix row structure.
+  # We need to create the vector with proper index alignment.
+
+  if isa(rows, PRange)
+    assembly_indices = partition(rows)
+  else
+    assembly_indices = rows
+  end
+
+  # Create the vector using the assembly indices structure
+  # This ensures proper alignment with matrix rows
+  return PVector(values, assembly_indices)
+end
+
 function Algebra.create_from_nz(a::PVectorAllocation{<:Union{LocallyAssembled,SubAssembled}})
-  rows = remove_ghost(unpermute(axes(a,1)))
+  test_ids = partition(axes(a,1))
+  rows = map(remove_ghost,map(unpermute,test_ids))
   values = local_views(a)
-  b = rhs_callback(values,rows)
+
+  # Use the same pattern as joint assembly: create FE space vector then repartition
+  b_fespace = PVector(values, test_ids)
+  b = locally_repartition(b_fespace, rows)
   return b
 end
 
 function Algebra.create_from_nz(a::PVectorAllocation{<:Assembled})
-  rows = collect_touched_ids(a)
+  new_indices = collect_touched_ids(a)
   values = map(ai -> ai.values, local_views(a))
-  b  = rhs_callback(values,rows)
+
+  # Use the same pattern as joint assembly: create FE space vector then repartition
+  b_fespace = PVector(values, partition(axes(a,1)))
+  b = locally_repartition(b_fespace, new_indices)
+
   t2 = assemble!(b)
   if t2 !== nothing
     wait(t2)
@@ -245,9 +270,9 @@ end
 
 # PSystem assembly chain:
 #
-# When assembling a full system (matrix + vector), it is more efficient to 
+# When assembling a full system (matrix + vector), it is more efficient to
 # overlap communications the assembly of the matrix and the vector.
-# Not only it is faster, but also necessary to ensure identical ghost indices 
+# Not only it is faster, but also necessary to ensure identical ghost indices
 # in both the matrix and vector rows.
 # This is done by using the following specializations:
 
diff --git a/src/PArraysExtras.jl b/src/PArraysExtras.jl
index 302944a..d9dd4a7 100644
--- a/src/PArraysExtras.jl
+++ b/src/PArraysExtras.jl
@@ -3,7 +3,7 @@
     permuted_variable_partition(n_own,gids,owners; n_global, start)
 
 Create indices which are a permuted version of a variable_partition.
-The advantage of this w.r.t. the `LocalIndices` type, is that we can compute 
+The advantage of this w.r.t. the `LocalIndices` type, is that we can compute
 dof owners with minimal communications, since we only need the size of the blocks.
 
 NOTE: This is the type for our FESpace dof_ids.
@@ -48,13 +48,15 @@ end
 """
     unpermute(indices::AbstractLocalIndices)
 
-Given local indices, reorders them to (locally) have own indices first, 
+Given local indices, reorders them to (locally) have own indices first,
 followed by ghost indices.
 """
-function unpermute(indices::AbstractLocalIndices) 
+function unpermute(indices::AbstractLocalIndices)
   @notimplemented
 end
 
+unpermute(prange::PRange) = PRange(map(unpermute, partition(prange)))
+PArrays.remove_ghost(prange::PRange) = PRange(map(PArrays.remove_ghost, partition(prange)))
 unpermute(indices::PermutedLocalIndices) = unpermute(indices.indices)
 unpermute(indices::PartitionedArrays.LocalIndicesInBlockPartition) = indices
 unpermute(indices::OwnAndGhostIndices) = indices
@@ -64,6 +66,10 @@ function unpermute(indices::LocalIndices)
   rank = part_id(indices)
   own = OwnIndices(nglobal,rank,own_to_global(indices))
   ghost = GhostIndices(nglobal,ghost_to_global(indices),ghost_to_owner(indices))
+  
+  # For LocalIndices, we can use the original global_to_owner mapping directly
+  # This should work properly with PartitionedArrays since LocalIndices
+  # has a complete global_to_owner mapping
   OwnAndGhostIndices(own,ghost,global_to_owner(indices))
 end
 
@@ -72,7 +78,7 @@ end
 
 Map the values of a PVector to a new partitioning of the indices.
 
-Similar to `PartitionedArrays.repartition`, but without any communications. Instead, 
+Similar to `PartitionedArrays.repartition`, but without any communications. Instead,
 it is assumed that the local-to-local mapping can be done locally.
 """
 function locally_repartition(v::PVector,new_indices)
@@ -103,9 +109,9 @@ end
 
 Replace ghost ids in `indices` with the ghost ids within `gids`.
 
-NOTE: The issue is that `replace_ghost` does not check if all gids are ghosts or whether 
-they are repeated. It also shifts ownership of the ghosts. Its all quite messy and not what we 
-would need. TODO: Make me better. 
+NOTE: The issue is that `replace_ghost` does not check if all gids are ghosts or whether
+they are repeated. It also shifts ownership of the ghosts. Its all quite messy and not what we
+would need. TODO: Make me better.
 """
 function filter_and_replace_ghost(indices,gids)
   owners = find_owner(indices,gids)
@@ -116,7 +122,7 @@ function filter_and_replace_ghost(indices,gids)
   return new_indices
 end
 
-# Same as PartitionedArrays.filter_ghost, but we do not exclude ghost indices that 
+# Same as PartitionedArrays.filter_ghost, but we do not exclude ghost indices that
 # belong to `indices`. This could eventually be a flag in the original function.
 function _filter_ghost(indices,gids,owners)
   ghosts = Set{Int}()
@@ -154,25 +160,19 @@ function _filter_ghost(indices,gids,owners)
 end
 
 # function PArrays.remove_ghost(indices::PermutedLocalIndices)
-#   n_global = global_length(indices) 
+#   n_global = global_length(indices)
 #   own = OwnIndices(n_global,part_id(indices),own_to_global(indices))
 #   ghost = GhostIndices(n_global,Int[],Int32[])
 #   OwnAndGhostIndices(own,ghost)
 # end
 
 function PArrays.remove_ghost(indices::PermutedLocalIndices)
-  perm = indices.perm
-  own_to_local = indices.own_to_local
-  display(own_to_local)
-  println(issorted(own_to_local))
-  display(perm)
-  println("-------------------------------")
   remove_ghost(indices.indices)
 end
 
 # This function computes a mapping among the local identifiers of a and b
-# for which the corresponding global identifiers are both in a and b. 
-# Note that the haskey check is necessary because in the general case 
+# for which the corresponding global identifiers are both in a and b.
+# Note that the haskey check is necessary because in the general case
 # there might be gids in b which are not present in a
 function local_to_local_map(a::AbstractLocalIndices,b::AbstractLocalIndices)
   a_lid_to_b_lid = fill(zero(Int32),local_length(a))
@@ -184,7 +184,7 @@ function local_to_local_map(a::AbstractLocalIndices,b::AbstractLocalIndices)
     a_lid = a_global_to_local[gid]
     if !iszero(a_lid)
       a_lid_to_b_lid[a_lid] = b_lid
-    end  
+    end
   end
   a_lid_to_b_lid
 end
@@ -228,8 +228,8 @@ function Algebra.axpy_entries!(
   α::Number, A::PSparseMatrix, B::PSparseMatrix;
   check::Bool=true
 )
-# We should definitely check here that the index partitions are the same. 
-# However: Because the different matrices are assembled separately, the objects are not the 
+# We should definitely check here that the index partitions are the same.
+# However: Because the different matrices are assembled separately, the objects are not the
 # same (i.e can't use ===). Checking the index partitions would then be costly...
   @assert reduce(&,map(PArrays.matching_local_indices,partition(axes(A,1)),partition(axes(B,1))))
   @assert reduce(&,map(PArrays.matching_local_indices,partition(axes(A,2)),partition(axes(B,2))))
@@ -249,7 +249,7 @@ function Algebra.axpy_entries!(
   return B
 end
 
-# Array of PArrays -> PArray of Arrays 
+# Array of PArrays -> PArray of Arrays
 # TODO: I think this is now implemented in PartitionedArrays.jl (check)
 function to_parray_of_arrays(a::AbstractArray{<:MPIArray})
   indices = linear_indices(first(a))
@@ -269,7 +269,7 @@ function to_parray_of_arrays(a::AbstractArray{<:DebugArray})
   end
 end
 
-# To local/to global for blocks 
+# To local/to global for blocks
 
 function to_local_indices!(I,ids::PRange;kwargs...)
   map(to_local!,I,partition(ids))
@@ -299,7 +299,7 @@ for f in [:to_local_indices!, :to_global_indices!, :get_gid_owners]
   end
 end
 
-# This type is required because MPIArray from PArrays 
+# This type is required because MPIArray from PArrays
 # cannot be instantiated with a NULL communicator
 struct MPIVoidVector{T} <: AbstractVector{T}
   comm::MPI.Comm
@@ -348,7 +348,7 @@ end
 """
     i_am_in(comm::MPIArray)
     i_am_in(comm::DebugArray)
-  
+
   Returns `true` if the processor is part of the subcommunicator `comm`.
 """
 function i_am_in(comm::MPI.Comm)
@@ -381,7 +381,7 @@ function generate_subparts(parts::MPIArray,new_comm_size)
   else
     mpi_undefined = MPI.API.MPI_UNDEFINED[]
   end
-  
+
   if root_size == new_comm_size
     return parts
   else

From 6fa1eb2f533251c33d66e94120be9c64ea1f2f4c Mon Sep 17 00:00:00 2001
From: Lambert Theisen <lambert.theisen@rwth-aachen.de>
Date: Thu, 31 Jul 2025 15:10:13 +0200
Subject: [PATCH 4/4] revert whitespace s.t. git diff is cleaner

---
 src/Assembly.jl      |  6 +++---
 src/FESpaces.jl      | 34 +++++++++++++++++-----------------
 src/PArraysExtras.jl | 38 +++++++++++++++++++-------------------
 3 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/src/Assembly.jl b/src/Assembly.jl
index 043a7f6..536199c 100644
--- a/src/Assembly.jl
+++ b/src/Assembly.jl
@@ -88,7 +88,7 @@ Algebra.LoopStyle(::Type{<:PSparseMatrixCounter}) = Loop()
 """
     DistributedAllocation{S,T,N} <: GridapType
 
-Distributed N-dimensional allocator, with local allocators of type T.
+Distributed N-dimensional allocator, with local allocators of type T. 
 Follows assembly strategy S.
 """
 struct DistributedAllocation{S,T,N,A,B} <: GridapType
@@ -270,9 +270,9 @@ end
 
 # PSystem assembly chain:
 #
-# When assembling a full system (matrix + vector), it is more efficient to
+# When assembling a full system (matrix + vector), it is more efficient to 
 # overlap communications the assembly of the matrix and the vector.
-# Not only it is faster, but also necessary to ensure identical ghost indices
+# Not only it is faster, but also necessary to ensure identical ghost indices 
 # in both the matrix and vector rows.
 # This is done by using the following specializations:
 
diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index b1373d6..e70c168 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -102,7 +102,7 @@ function fetch_vector_ghost_values_cache(vector_partition,partition)
 end
 
 function fetch_vector_ghost_values!(vector_partition,cache)
-  assemble!((a,b)->b, vector_partition, cache)
+  assemble!((a,b)->b, vector_partition, cache) 
 end
 
 function generate_gids(
@@ -128,7 +128,7 @@ function generate_gids(
         end
       end
     end
-    me = part_id(indices)
+    me = part_id(indices) 
     nodofs = count(p->p==me,ldof_to_owner)
     ldof_to_owner, nodofs
   end |> tuple_of_arrays
@@ -140,7 +140,7 @@ function generate_gids(
   cell_ldofs_to_owner = dof_wise_to_cell_wise(ldof_to_owner,cell_to_ldofs,own_to_local(cell_range))
   consistent!(PVector(cell_ldofs_to_owner,partition(cell_range))) |> wait
   cell_wise_to_dof_wise!(ldof_to_owner,cell_ldofs_to_owner,cell_to_ldofs,ghost_to_local(cell_range))
-
+  
   # Fill owned gids
   ldof_to_gdof = map(ranks,first_gdof,ldof_to_owner) do rank,first_gdof,ldof_to_owner
     ldof_to_gdof = fill(0,length(ldof_to_owner))
@@ -538,7 +538,7 @@ function _find_vector_type(spaces,gids;split_own_and_ghost=false)
   return vector_type
 end
 
-# TODO: We would like to avoid this, but I cannot extract the maximal order
+# TODO: We would like to avoid this, but I cannot extract the maximal order 
 #       from the space itself...
 function _add_distributed_constraint(
   F::DistributedFESpace,reffe::ReferenceFE,constraint
@@ -582,7 +582,7 @@ end
 const DistributedZeroMeanFESpace{A,B,C,D,E,F} = DistributedSingleFieldFESpace{A,B,C,D,DistributedZeroMeanCache{E,F}}
 
 function FESpaces.FESpaceWithConstantFixed(
-  space::DistributedSingleFieldFESpace,
+  space::DistributedSingleFieldFESpace, 
   gid_to_fix::Int = num_free_dofs(space)
 )
   # Find the gid within the processors
@@ -638,23 +638,23 @@ function FESpaces.FEFunction(
   isconsistent=false
 )
   free_values = change_ghost(free_values,f.gids,is_consistent=isconsistent,make_consistent=true)
-
+  
   c = _compute_new_distributed_fixedval(
     f,free_values,dirichlet_values
   )
-  fv = free_values .+ c # TODO: Do we need to copy, or can we just modify?
+  fv = free_values .+ c # TODO: Do we need to copy, or can we just modify? 
   dv = map(dirichlet_values) do dv
     dv .+ c
   end
-
+  
   fields = map(FEFunction,f.spaces,partition(fv),dv)
   trian = get_triangulation(f)
   metadata = DistributedFEFunctionData(fv)
   DistributedCellField(fields,trian,metadata)
 end
 
-# This is required, otherwise we end up calling `FEFunction` with a fixed value of zero,
-# which does not properly interpolate the function provided.
+# This is required, otherwise we end up calling `FEFunction` with a fixed value of zero, 
+# which does not properly interpolate the function provided. 
 # With this change, we are interpolating in the unconstrained space and then
 # substracting the mean.
 function FESpaces.interpolate!(u,free_values::AbstractVector,f::DistributedZeroMeanFESpace)
@@ -671,7 +671,7 @@ function _compute_new_distributed_fixedval(
 )
   dvol = f.metadata.dvol
   vol  = f.metadata.vol
-
+  
   c_i = map(local_views(f),partition(fv),dv,dvol) do space,fv,dv,dvol
     if isa(FESpaces.ConstantApproach(space),FESpaces.FixConstant)
       lid_to_fix = space.dof_to_fix
@@ -687,19 +687,19 @@ end
 
 """
     ConstantFESpace(
-      model::DistributedDiscreteModel;
-      constraint_type=:global,
+      model::DistributedDiscreteModel; 
+      constraint_type=:global, 
       kwargs...
     )
 
 Distributed equivalent to `ConstantFESpace(model;kwargs...)`.
 
 With `constraint_type=:global`, a single dof is shared by all processors.
-This creates a global constraint, which is NOT scalable in parallel. Use at your own peril.
+This creates a global constraint, which is NOT scalable in parallel. Use at your own peril. 
 
 With `constraint_type=:local`, a single dof is owned by each processor and shared with no one else.
 This space is locally-constant in each processor, and therefore scalable (but not equivalent
-to its serial counterpart).
+to its serial counterpart). 
 """
 function FESpaces.ConstantFESpace(
   model::DistributedDiscreteModel;
@@ -763,7 +763,7 @@ function FESpaces.SparseMatrixAssembler(
 )
   assems = map(partition(rows),partition(cols)) do rows,cols
     FESpaces.GenericSparseMatrixAssembler(
-      SparseMatrixBuilder(local_mat_type),
+      SparseMatrixBuilder(local_mat_type), 
       ArrayBuilder(local_vec_type),
       Base.OneTo(length(rows)),
       Base.OneTo(length(cols)),
@@ -821,7 +821,7 @@ end
 
 function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data)
   rows, cols = get_rows(a), get_cols(a)
-  map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
+  map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)  
 end
 
 function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data)
diff --git a/src/PArraysExtras.jl b/src/PArraysExtras.jl
index d9dd4a7..1d29303 100644
--- a/src/PArraysExtras.jl
+++ b/src/PArraysExtras.jl
@@ -3,7 +3,7 @@
     permuted_variable_partition(n_own,gids,owners; n_global, start)
 
 Create indices which are a permuted version of a variable_partition.
-The advantage of this w.r.t. the `LocalIndices` type, is that we can compute
+The advantage of this w.r.t. the `LocalIndices` type, is that we can compute 
 dof owners with minimal communications, since we only need the size of the blocks.
 
 NOTE: This is the type for our FESpace dof_ids.
@@ -48,10 +48,10 @@ end
 """
     unpermute(indices::AbstractLocalIndices)
 
-Given local indices, reorders them to (locally) have own indices first,
+Given local indices, reorders them to (locally) have own indices first, 
 followed by ghost indices.
 """
-function unpermute(indices::AbstractLocalIndices)
+function unpermute(indices::AbstractLocalIndices) 
   @notimplemented
 end
 
@@ -78,7 +78,7 @@ end
 
 Map the values of a PVector to a new partitioning of the indices.
 
-Similar to `PartitionedArrays.repartition`, but without any communications. Instead,
+Similar to `PartitionedArrays.repartition`, but without any communications. Instead, 
 it is assumed that the local-to-local mapping can be done locally.
 """
 function locally_repartition(v::PVector,new_indices)
@@ -109,9 +109,9 @@ end
 
 Replace ghost ids in `indices` with the ghost ids within `gids`.
 
-NOTE: The issue is that `replace_ghost` does not check if all gids are ghosts or whether
-they are repeated. It also shifts ownership of the ghosts. Its all quite messy and not what we
-would need. TODO: Make me better.
+NOTE: The issue is that `replace_ghost` does not check if all gids are ghosts or whether 
+they are repeated. It also shifts ownership of the ghosts. Its all quite messy and not what we 
+would need. TODO: Make me better. 
 """
 function filter_and_replace_ghost(indices,gids)
   owners = find_owner(indices,gids)
@@ -122,7 +122,7 @@ function filter_and_replace_ghost(indices,gids)
   return new_indices
 end
 
-# Same as PartitionedArrays.filter_ghost, but we do not exclude ghost indices that
+# Same as PartitionedArrays.filter_ghost, but we do not exclude ghost indices that 
 # belong to `indices`. This could eventually be a flag in the original function.
 function _filter_ghost(indices,gids,owners)
   ghosts = Set{Int}()
@@ -160,7 +160,7 @@ function _filter_ghost(indices,gids,owners)
 end
 
 # function PArrays.remove_ghost(indices::PermutedLocalIndices)
-#   n_global = global_length(indices)
+#   n_global = global_length(indices) 
 #   own = OwnIndices(n_global,part_id(indices),own_to_global(indices))
 #   ghost = GhostIndices(n_global,Int[],Int32[])
 #   OwnAndGhostIndices(own,ghost)
@@ -171,8 +171,8 @@ function PArrays.remove_ghost(indices::PermutedLocalIndices)
 end
 
 # This function computes a mapping among the local identifiers of a and b
-# for which the corresponding global identifiers are both in a and b.
-# Note that the haskey check is necessary because in the general case
+# for which the corresponding global identifiers are both in a and b. 
+# Note that the haskey check is necessary because in the general case 
 # there might be gids in b which are not present in a
 function local_to_local_map(a::AbstractLocalIndices,b::AbstractLocalIndices)
   a_lid_to_b_lid = fill(zero(Int32),local_length(a))
@@ -184,7 +184,7 @@ function local_to_local_map(a::AbstractLocalIndices,b::AbstractLocalIndices)
     a_lid = a_global_to_local[gid]
     if !iszero(a_lid)
       a_lid_to_b_lid[a_lid] = b_lid
-    end
+    end  
   end
   a_lid_to_b_lid
 end
@@ -228,8 +228,8 @@ function Algebra.axpy_entries!(
   α::Number, A::PSparseMatrix, B::PSparseMatrix;
   check::Bool=true
 )
-# We should definitely check here that the index partitions are the same.
-# However: Because the different matrices are assembled separately, the objects are not the
+# We should definitely check here that the index partitions are the same. 
+# However: Because the different matrices are assembled separately, the objects are not the 
 # same (i.e can't use ===). Checking the index partitions would then be costly...
   @assert reduce(&,map(PArrays.matching_local_indices,partition(axes(A,1)),partition(axes(B,1))))
   @assert reduce(&,map(PArrays.matching_local_indices,partition(axes(A,2)),partition(axes(B,2))))
@@ -249,7 +249,7 @@ function Algebra.axpy_entries!(
   return B
 end
 
-# Array of PArrays -> PArray of Arrays
+# Array of PArrays -> PArray of Arrays 
 # TODO: I think this is now implemented in PartitionedArrays.jl (check)
 function to_parray_of_arrays(a::AbstractArray{<:MPIArray})
   indices = linear_indices(first(a))
@@ -269,7 +269,7 @@ function to_parray_of_arrays(a::AbstractArray{<:DebugArray})
   end
 end
 
-# To local/to global for blocks
+# To local/to global for blocks 
 
 function to_local_indices!(I,ids::PRange;kwargs...)
   map(to_local!,I,partition(ids))
@@ -299,7 +299,7 @@ for f in [:to_local_indices!, :to_global_indices!, :get_gid_owners]
   end
 end
 
-# This type is required because MPIArray from PArrays
+# This type is required because MPIArray from PArrays 
 # cannot be instantiated with a NULL communicator
 struct MPIVoidVector{T} <: AbstractVector{T}
   comm::MPI.Comm
@@ -348,7 +348,7 @@ end
 """
     i_am_in(comm::MPIArray)
     i_am_in(comm::DebugArray)
-
+  
   Returns `true` if the processor is part of the subcommunicator `comm`.
 """
 function i_am_in(comm::MPI.Comm)
@@ -381,7 +381,7 @@ function generate_subparts(parts::MPIArray,new_comm_size)
   else
     mpi_undefined = MPI.API.MPI_UNDEFINED[]
   end
-
+  
   if root_size == new_comm_size
     return parts
   else