From c1049e557324be75af4d81fa65117c2eefdea1f4 Mon Sep 17 00:00:00 2001 From: Lambert Theisen Date: Wed, 30 Jul 2025 13:56:56 +0200 Subject: [PATCH 1/4] Try to fix PeriodicBCTests - works but there's still a warning/error for precompilation --- src/FESpaces.jl | 75 +++++++++++++++++++++++++++++++--------- test/PeriodicBCsTests.jl | 33 +++++++++++++++++- 2 files changed, 90 insertions(+), 18 deletions(-) diff --git a/src/FESpaces.jl b/src/FESpaces.jl index f68b747..b53c7a0 100644 --- a/src/FESpaces.jl +++ b/src/FESpaces.jl @@ -102,7 +102,7 @@ function fetch_vector_ghost_values_cache(vector_partition,partition) end function fetch_vector_ghost_values!(vector_partition,cache) - assemble!((a,b)->b, vector_partition, cache) + assemble!((a,b)->b, vector_partition, cache) end function generate_gids( @@ -128,7 +128,7 @@ function generate_gids( end end end - me = part_id(indices) + me = part_id(indices) nodofs = count(p->p==me,ldof_to_owner) ldof_to_owner, nodofs end |> tuple_of_arrays @@ -140,7 +140,7 @@ function generate_gids( cell_ldofs_to_owner = dof_wise_to_cell_wise(ldof_to_owner,cell_to_ldofs,own_to_local(cell_range)) consistent!(PVector(cell_ldofs_to_owner,partition(cell_range))) |> wait cell_wise_to_dof_wise!(ldof_to_owner,cell_ldofs_to_owner,cell_to_ldofs,ghost_to_local(cell_range)) - + # Fill owned gids ldof_to_gdof = map(ranks,first_gdof,ldof_to_owner) do rank,first_gdof,ldof_to_owner ldof_to_gdof = fill(0,length(ldof_to_owner)) @@ -469,7 +469,7 @@ function _find_vector_type(spaces,gids;split_own_and_ghost=false) return vector_type end -# TODO: We would like to avoid this, but I cannot extract the maximal order +# TODO: We would like to avoid this, but I cannot extract the maximal order # from the space itself... function _add_distributed_constraint( F::DistributedFESpace,reffe::ReferenceFE,constraint @@ -513,7 +513,7 @@ end const DistributedZeroMeanFESpace{A,B,C,D,E,F} = DistributedSingleFieldFESpace{A,B,C,D,DistributedZeroMeanCache{E,F}} function FESpaces.FESpaceWithConstantFixed( - space::DistributedSingleFieldFESpace, + space::DistributedSingleFieldFESpace, gid_to_fix::Int = num_free_dofs(space) ) # Find the gid within the processors @@ -569,23 +569,23 @@ function FESpaces.FEFunction( isconsistent=false ) free_values = change_ghost(free_values,f.gids,is_consistent=isconsistent,make_consistent=true) - + c = _compute_new_distributed_fixedval( f,free_values,dirichlet_values ) - fv = free_values .+ c # TODO: Do we need to copy, or can we just modify? + fv = free_values .+ c # TODO: Do we need to copy, or can we just modify? dv = map(dirichlet_values) do dv dv .+ c end - + fields = map(FEFunction,f.spaces,partition(fv),dv) trian = get_triangulation(f) metadata = DistributedFEFunctionData(fv) DistributedCellField(fields,trian,metadata) end -# This is required, otherwise we end up calling `FEFunction` with a fixed value of zero, -# which does not properly interpolate the function provided. +# This is required, otherwise we end up calling `FEFunction` with a fixed value of zero, +# which does not properly interpolate the function provided. # With this change, we are interpolating in the unconstrained space and then # substracting the mean. function FESpaces.interpolate!(u,free_values::AbstractVector,f::DistributedZeroMeanFESpace) @@ -602,7 +602,7 @@ function _compute_new_distributed_fixedval( ) dvol = f.metadata.dvol vol = f.metadata.vol - + c_i = map(local_views(f),partition(fv),dv,dvol) do space,fv,dv,dvol if isa(FESpaces.ConstantApproach(space),FESpaces.FixConstant) lid_to_fix = space.dof_to_fix @@ -618,19 +618,19 @@ end """ ConstantFESpace( - model::DistributedDiscreteModel; - constraint_type=:global, + model::DistributedDiscreteModel; + constraint_type=:global, kwargs... ) Distributed equivalent to `ConstantFESpace(model;kwargs...)`. With `constraint_type=:global`, a single dof is shared by all processors. -This creates a global constraint, which is NOT scalable in parallel. Use at your own peril. +This creates a global constraint, which is NOT scalable in parallel. Use at your own peril. With `constraint_type=:local`, a single dof is owned by each processor and shared with no one else. This space is locally-constant in each processor, and therefore scalable (but not equivalent -to its serial counterpart). +to its serial counterpart). """ function FESpaces.ConstantFESpace( model::DistributedDiscreteModel; @@ -694,7 +694,7 @@ function FESpaces.SparseMatrixAssembler( ) assems = map(partition(rows),partition(cols)) do rows,cols FESpaces.GenericSparseMatrixAssembler( - SparseMatrixBuilder(local_mat_type), + SparseMatrixBuilder(local_mat_type), ArrayBuilder(local_vec_type), Base.OneTo(length(rows)), Base.OneTo(length(cols)), @@ -752,7 +752,7 @@ end function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data) rows, cols = get_rows(a), get_cols(a) - map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data) + map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data) end function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data) @@ -834,3 +834,44 @@ function FESpaces.collect_cell_matrix_and_vector( collect_cell_matrix_and_vector(u,v,m,l,f) end end + +# Fix for LinearFESolver with periodic boundary conditions +# The issue is that get_free_dof_values(u) creates a vector with the wrong index structure +# for periodic BCs. We need to use a vector with the same index structure as the operator. + +# Override LinearFESolver methods to use compatible vector index structures +import Gridap.FESpaces: solve! + +function solve!(u,solver::LinearFESolver,feop::AffineFEOperator,cache::Nothing) + # Get the algebraic operator first + op = get_algebraic_operator(feop) + + # Create a vector with compatible index structure using the operator + # For periodic BCs, this ensures we get LocalIndicesWithVariableBlockSize instead of PermutedLocalIndices + x = similar(get_vector(op)) + + # Solve the linear system + cache = solve!(x,solver.ls,op) + + # Create the FE function with the solution + trial = get_trial(feop) + u_new = FEFunction(trial,x) + (u_new, cache) +end + +function solve!(u,solver::LinearFESolver,feop::AffineFEOperator, cache) + # Get the algebraic operator first + op = get_algebraic_operator(feop) + + # Create a vector with compatible index structure using the operator + # For periodic BCs, this ensures we get LocalIndicesWithVariableBlockSize instead of PermutedLocalIndices + x = similar(get_vector(op)) + + # Solve the linear system + cache = solve!(x,solver.ls,op,cache) + + # Create the FE function with the solution + trial = get_trial(feop) + u_new = FEFunction(trial,x) + (u_new,cache) +end diff --git a/test/PeriodicBCsTests.jl b/test/PeriodicBCsTests.jl index 247cb6f..e6a1322 100644 --- a/test/PeriodicBCsTests.jl +++ b/test/PeriodicBCsTests.jl @@ -29,9 +29,40 @@ function main(distribute,parts) l(v) = ∫( v*f )dΩ op = AffineFEOperator(a,l,U,V) + # Test 1: Basic solve(op) functionality uh = solve(op) eh = u - uh - @test sqrt(sum( ∫(abs2(eh))dΩ )) < 0.00122 + error_solve = sqrt(sum( ∫(abs2(eh))dΩ )) + @test error_solve < 0.00122 + + # Test 2: LinearFESolver consistency (fix verification) + # This tests the specific fix for periodic BCs where LinearFESolver + # uses compatible vector index structures + solver = LinearFESolver(BackslashSolver()) + uh_solver = solve(solver, op) + eh_solver = u - uh_solver + error_solver = sqrt(sum( ∫(abs2(eh_solver))dΩ )) + + # Test 3: Direct matrix solve for comparison + A = get_matrix(op) + b = get_vector(op) + x_direct = A \ b + uh_direct = FEFunction(U, x_direct) + eh_direct = u - uh_direct + error_direct = sqrt(sum( ∫(abs2(eh_direct))dΩ )) + + # Test 4: Verify all methods give consistent results (within numerical precision) + # This is the key test for the periodic BC fix - all ratios should be ≈ 1.0 + ratio_solve_direct = error_solve / error_direct + ratio_solver_direct = error_solver / error_direct + + @test abs(ratio_solve_direct - 1.0) < 1e-10 # solve(op) should match direct solve + @test abs(ratio_solver_direct - 1.0) < 1e-10 # LinearFESolver should match direct solve + @test abs(error_solve - error_solver) < 1e-14 # solve(op) and LinearFESolver should be identical + + # Test 5: Error magnitudes should be reasonable + @test error_solver < 0.00122 + @test error_direct < 0.00122 end From 0f826df391cb4ee050ebf330370f19b6b968276e Mon Sep 17 00:00:00 2001 From: Lambert Theisen Date: Wed, 30 Jul 2025 14:10:48 +0200 Subject: [PATCH 2/4] Fix code by not overrriding Gridap.solve! but change get_free_dof_values --- src/FESpaces.jl | 110 ++++++++++++++++++++++++++++++------------------ 1 file changed, 70 insertions(+), 40 deletions(-) diff --git a/src/FESpaces.jl b/src/FESpaces.jl index b53c7a0..b1373d6 100644 --- a/src/FESpaces.jl +++ b/src/FESpaces.jl @@ -194,7 +194,76 @@ end const DistributedSingleFieldFEFunction{A,B,T} = DistributedCellField{A,B,DistributedFEFunctionData{T}} function FESpaces.get_free_dof_values(uh::DistributedSingleFieldFEFunction) - uh.metadata.free_values + free_values = uh.metadata.free_values + + # Check if we have periodic boundary conditions that require index structure conversion + # This is needed when the FE space uses PermutedLocalIndices but the linear system + # requires LocalIndicesWithVariableBlockSize for compatibility + if _needs_periodic_bc_fix(free_values) + return _convert_to_compatible_index_structure(free_values) + end + + return free_values +end + +""" +Check if the vector needs periodic BC index structure conversion +""" +function _needs_periodic_bc_fix(v::PVector) + # Check if any partition uses PermutedLocalIndices which typically indicates periodic BCs + indices = partition(axes(v, 1)) + checks = map(indices) do idx + isa(idx, PartitionedArrays.PermutedLocalIndices) + end + # Use reduction to avoid scalar indexing on DebugArray + return reduce(|, checks, init=false) +end + +""" +Convert a vector with PermutedLocalIndices to one with LocalIndicesWithVariableBlockSize +This is needed for periodic BC compatibility with linear system operators +""" +function _convert_to_compatible_index_structure(v::PVector) + # Get the current index partition + old_indices = partition(axes(v, 1)) + + # Create new indices using LocalIndicesWithVariableBlockSize + new_indices = map(old_indices) do idx + if isa(idx, PartitionedArrays.PermutedLocalIndices) + # Extract the underlying LocalIndicesWithVariableBlockSize + return idx.indices + else + return idx + end + end + + # Create a new PRange with the unpermuted indices + new_prange = PRange(new_indices) + + # Create a new vector with the compatible index structure + new_vector = similar(v, new_prange) + + # Copy values handling the index mapping + map(partition(new_vector), partition(v), new_indices, old_indices) do new_part, old_part, new_idx, old_idx + if isa(old_idx, PartitionedArrays.PermutedLocalIndices) + # Handle permuted indices by mapping through global indices + old_loc_to_glo = local_to_global(old_idx) + new_glo_to_loc = global_to_local(new_idx) + + for old_lid in 1:local_length(old_idx) + gid = old_loc_to_glo[old_lid] + new_lid = new_glo_to_loc[gid] + if new_lid > 0 + new_part[new_lid] = old_part[old_lid] + end + end + else + # Direct copy if no permutation + copy!(new_part, old_part) + end + end + + return new_vector end # Single field related @@ -835,43 +904,4 @@ function FESpaces.collect_cell_matrix_and_vector( end end -# Fix for LinearFESolver with periodic boundary conditions -# The issue is that get_free_dof_values(u) creates a vector with the wrong index structure -# for periodic BCs. We need to use a vector with the same index structure as the operator. - -# Override LinearFESolver methods to use compatible vector index structures -import Gridap.FESpaces: solve! - -function solve!(u,solver::LinearFESolver,feop::AffineFEOperator,cache::Nothing) - # Get the algebraic operator first - op = get_algebraic_operator(feop) - - # Create a vector with compatible index structure using the operator - # For periodic BCs, this ensures we get LocalIndicesWithVariableBlockSize instead of PermutedLocalIndices - x = similar(get_vector(op)) - - # Solve the linear system - cache = solve!(x,solver.ls,op) - # Create the FE function with the solution - trial = get_trial(feop) - u_new = FEFunction(trial,x) - (u_new, cache) -end - -function solve!(u,solver::LinearFESolver,feop::AffineFEOperator, cache) - # Get the algebraic operator first - op = get_algebraic_operator(feop) - - # Create a vector with compatible index structure using the operator - # For periodic BCs, this ensures we get LocalIndicesWithVariableBlockSize instead of PermutedLocalIndices - x = similar(get_vector(op)) - - # Solve the linear system - cache = solve!(x,solver.ls,op,cache) - - # Create the FE function with the solution - trial = get_trial(feop) - u_new = FEFunction(trial,x) - (u_new,cache) -end From ace41829ef6795cd0be2169f5bea3216abd43677 Mon Sep 17 00:00:00 2001 From: Lambert Theisen Date: Wed, 30 Jul 2025 15:41:58 +0200 Subject: [PATCH 3/4] try to fix FESpacesTests --- src/Assembly.jl | 39 +++++++++++++++++++++++++++------- src/PArraysExtras.jl | 50 ++++++++++++++++++++++---------------------- 2 files changed, 57 insertions(+), 32 deletions(-) diff --git a/src/Assembly.jl b/src/Assembly.jl index 41673c5..043a7f6 100644 --- a/src/Assembly.jl +++ b/src/Assembly.jl @@ -88,7 +88,7 @@ Algebra.LoopStyle(::Type{<:PSparseMatrixCounter}) = Loop() """ DistributedAllocation{S,T,N} <: GridapType -Distributed N-dimensional allocator, with local allocators of type T. +Distributed N-dimensional allocator, with local allocators of type T. Follows assembly strategy S. """ struct DistributedAllocation{S,T,N,A,B} <: GridapType @@ -225,17 +225,42 @@ function Algebra.create_from_nz(a::PVectorAllocation{<:Assembled,<:AbstractVecto @assert false end +function rhs_callback(values,rows) + # Based on the old _rhs_callback pattern: + # The issue is that values come from the FE space structure but need to be + # organized according to the assembly matrix row structure. + # We need to create the vector with proper index alignment. + + if isa(rows, PRange) + assembly_indices = partition(rows) + else + assembly_indices = rows + end + + # Create the vector using the assembly indices structure + # This ensures proper alignment with matrix rows + return PVector(values, assembly_indices) +end + function Algebra.create_from_nz(a::PVectorAllocation{<:Union{LocallyAssembled,SubAssembled}}) - rows = remove_ghost(unpermute(axes(a,1))) + test_ids = partition(axes(a,1)) + rows = map(remove_ghost,map(unpermute,test_ids)) values = local_views(a) - b = rhs_callback(values,rows) + + # Use the same pattern as joint assembly: create FE space vector then repartition + b_fespace = PVector(values, test_ids) + b = locally_repartition(b_fespace, rows) return b end function Algebra.create_from_nz(a::PVectorAllocation{<:Assembled}) - rows = collect_touched_ids(a) + new_indices = collect_touched_ids(a) values = map(ai -> ai.values, local_views(a)) - b = rhs_callback(values,rows) + + # Use the same pattern as joint assembly: create FE space vector then repartition + b_fespace = PVector(values, partition(axes(a,1))) + b = locally_repartition(b_fespace, new_indices) + t2 = assemble!(b) if t2 !== nothing wait(t2) @@ -245,9 +270,9 @@ end # PSystem assembly chain: # -# When assembling a full system (matrix + vector), it is more efficient to +# When assembling a full system (matrix + vector), it is more efficient to # overlap communications the assembly of the matrix and the vector. -# Not only it is faster, but also necessary to ensure identical ghost indices +# Not only it is faster, but also necessary to ensure identical ghost indices # in both the matrix and vector rows. # This is done by using the following specializations: diff --git a/src/PArraysExtras.jl b/src/PArraysExtras.jl index 302944a..d9dd4a7 100644 --- a/src/PArraysExtras.jl +++ b/src/PArraysExtras.jl @@ -3,7 +3,7 @@ permuted_variable_partition(n_own,gids,owners; n_global, start) Create indices which are a permuted version of a variable_partition. -The advantage of this w.r.t. the `LocalIndices` type, is that we can compute +The advantage of this w.r.t. the `LocalIndices` type, is that we can compute dof owners with minimal communications, since we only need the size of the blocks. NOTE: This is the type for our FESpace dof_ids. @@ -48,13 +48,15 @@ end """ unpermute(indices::AbstractLocalIndices) -Given local indices, reorders them to (locally) have own indices first, +Given local indices, reorders them to (locally) have own indices first, followed by ghost indices. """ -function unpermute(indices::AbstractLocalIndices) +function unpermute(indices::AbstractLocalIndices) @notimplemented end +unpermute(prange::PRange) = PRange(map(unpermute, partition(prange))) +PArrays.remove_ghost(prange::PRange) = PRange(map(PArrays.remove_ghost, partition(prange))) unpermute(indices::PermutedLocalIndices) = unpermute(indices.indices) unpermute(indices::PartitionedArrays.LocalIndicesInBlockPartition) = indices unpermute(indices::OwnAndGhostIndices) = indices @@ -64,6 +66,10 @@ function unpermute(indices::LocalIndices) rank = part_id(indices) own = OwnIndices(nglobal,rank,own_to_global(indices)) ghost = GhostIndices(nglobal,ghost_to_global(indices),ghost_to_owner(indices)) + + # For LocalIndices, we can use the original global_to_owner mapping directly + # This should work properly with PartitionedArrays since LocalIndices + # has a complete global_to_owner mapping OwnAndGhostIndices(own,ghost,global_to_owner(indices)) end @@ -72,7 +78,7 @@ end Map the values of a PVector to a new partitioning of the indices. -Similar to `PartitionedArrays.repartition`, but without any communications. Instead, +Similar to `PartitionedArrays.repartition`, but without any communications. Instead, it is assumed that the local-to-local mapping can be done locally. """ function locally_repartition(v::PVector,new_indices) @@ -103,9 +109,9 @@ end Replace ghost ids in `indices` with the ghost ids within `gids`. -NOTE: The issue is that `replace_ghost` does not check if all gids are ghosts or whether -they are repeated. It also shifts ownership of the ghosts. Its all quite messy and not what we -would need. TODO: Make me better. +NOTE: The issue is that `replace_ghost` does not check if all gids are ghosts or whether +they are repeated. It also shifts ownership of the ghosts. Its all quite messy and not what we +would need. TODO: Make me better. """ function filter_and_replace_ghost(indices,gids) owners = find_owner(indices,gids) @@ -116,7 +122,7 @@ function filter_and_replace_ghost(indices,gids) return new_indices end -# Same as PartitionedArrays.filter_ghost, but we do not exclude ghost indices that +# Same as PartitionedArrays.filter_ghost, but we do not exclude ghost indices that # belong to `indices`. This could eventually be a flag in the original function. function _filter_ghost(indices,gids,owners) ghosts = Set{Int}() @@ -154,25 +160,19 @@ function _filter_ghost(indices,gids,owners) end # function PArrays.remove_ghost(indices::PermutedLocalIndices) -# n_global = global_length(indices) +# n_global = global_length(indices) # own = OwnIndices(n_global,part_id(indices),own_to_global(indices)) # ghost = GhostIndices(n_global,Int[],Int32[]) # OwnAndGhostIndices(own,ghost) # end function PArrays.remove_ghost(indices::PermutedLocalIndices) - perm = indices.perm - own_to_local = indices.own_to_local - display(own_to_local) - println(issorted(own_to_local)) - display(perm) - println("-------------------------------") remove_ghost(indices.indices) end # This function computes a mapping among the local identifiers of a and b -# for which the corresponding global identifiers are both in a and b. -# Note that the haskey check is necessary because in the general case +# for which the corresponding global identifiers are both in a and b. +# Note that the haskey check is necessary because in the general case # there might be gids in b which are not present in a function local_to_local_map(a::AbstractLocalIndices,b::AbstractLocalIndices) a_lid_to_b_lid = fill(zero(Int32),local_length(a)) @@ -184,7 +184,7 @@ function local_to_local_map(a::AbstractLocalIndices,b::AbstractLocalIndices) a_lid = a_global_to_local[gid] if !iszero(a_lid) a_lid_to_b_lid[a_lid] = b_lid - end + end end a_lid_to_b_lid end @@ -228,8 +228,8 @@ function Algebra.axpy_entries!( α::Number, A::PSparseMatrix, B::PSparseMatrix; check::Bool=true ) -# We should definitely check here that the index partitions are the same. -# However: Because the different matrices are assembled separately, the objects are not the +# We should definitely check here that the index partitions are the same. +# However: Because the different matrices are assembled separately, the objects are not the # same (i.e can't use ===). Checking the index partitions would then be costly... @assert reduce(&,map(PArrays.matching_local_indices,partition(axes(A,1)),partition(axes(B,1)))) @assert reduce(&,map(PArrays.matching_local_indices,partition(axes(A,2)),partition(axes(B,2)))) @@ -249,7 +249,7 @@ function Algebra.axpy_entries!( return B end -# Array of PArrays -> PArray of Arrays +# Array of PArrays -> PArray of Arrays # TODO: I think this is now implemented in PartitionedArrays.jl (check) function to_parray_of_arrays(a::AbstractArray{<:MPIArray}) indices = linear_indices(first(a)) @@ -269,7 +269,7 @@ function to_parray_of_arrays(a::AbstractArray{<:DebugArray}) end end -# To local/to global for blocks +# To local/to global for blocks function to_local_indices!(I,ids::PRange;kwargs...) map(to_local!,I,partition(ids)) @@ -299,7 +299,7 @@ for f in [:to_local_indices!, :to_global_indices!, :get_gid_owners] end end -# This type is required because MPIArray from PArrays +# This type is required because MPIArray from PArrays # cannot be instantiated with a NULL communicator struct MPIVoidVector{T} <: AbstractVector{T} comm::MPI.Comm @@ -348,7 +348,7 @@ end """ i_am_in(comm::MPIArray) i_am_in(comm::DebugArray) - + Returns `true` if the processor is part of the subcommunicator `comm`. """ function i_am_in(comm::MPI.Comm) @@ -381,7 +381,7 @@ function generate_subparts(parts::MPIArray,new_comm_size) else mpi_undefined = MPI.API.MPI_UNDEFINED[] end - + if root_size == new_comm_size return parts else From 6fa1eb2f533251c33d66e94120be9c64ea1f2f4c Mon Sep 17 00:00:00 2001 From: Lambert Theisen Date: Thu, 31 Jul 2025 15:10:13 +0200 Subject: [PATCH 4/4] revert whitespace s.t. git diff is cleaner --- src/Assembly.jl | 6 +++--- src/FESpaces.jl | 34 +++++++++++++++++----------------- src/PArraysExtras.jl | 38 +++++++++++++++++++------------------- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/Assembly.jl b/src/Assembly.jl index 043a7f6..536199c 100644 --- a/src/Assembly.jl +++ b/src/Assembly.jl @@ -88,7 +88,7 @@ Algebra.LoopStyle(::Type{<:PSparseMatrixCounter}) = Loop() """ DistributedAllocation{S,T,N} <: GridapType -Distributed N-dimensional allocator, with local allocators of type T. +Distributed N-dimensional allocator, with local allocators of type T. Follows assembly strategy S. """ struct DistributedAllocation{S,T,N,A,B} <: GridapType @@ -270,9 +270,9 @@ end # PSystem assembly chain: # -# When assembling a full system (matrix + vector), it is more efficient to +# When assembling a full system (matrix + vector), it is more efficient to # overlap communications the assembly of the matrix and the vector. -# Not only it is faster, but also necessary to ensure identical ghost indices +# Not only it is faster, but also necessary to ensure identical ghost indices # in both the matrix and vector rows. # This is done by using the following specializations: diff --git a/src/FESpaces.jl b/src/FESpaces.jl index b1373d6..e70c168 100644 --- a/src/FESpaces.jl +++ b/src/FESpaces.jl @@ -102,7 +102,7 @@ function fetch_vector_ghost_values_cache(vector_partition,partition) end function fetch_vector_ghost_values!(vector_partition,cache) - assemble!((a,b)->b, vector_partition, cache) + assemble!((a,b)->b, vector_partition, cache) end function generate_gids( @@ -128,7 +128,7 @@ function generate_gids( end end end - me = part_id(indices) + me = part_id(indices) nodofs = count(p->p==me,ldof_to_owner) ldof_to_owner, nodofs end |> tuple_of_arrays @@ -140,7 +140,7 @@ function generate_gids( cell_ldofs_to_owner = dof_wise_to_cell_wise(ldof_to_owner,cell_to_ldofs,own_to_local(cell_range)) consistent!(PVector(cell_ldofs_to_owner,partition(cell_range))) |> wait cell_wise_to_dof_wise!(ldof_to_owner,cell_ldofs_to_owner,cell_to_ldofs,ghost_to_local(cell_range)) - + # Fill owned gids ldof_to_gdof = map(ranks,first_gdof,ldof_to_owner) do rank,first_gdof,ldof_to_owner ldof_to_gdof = fill(0,length(ldof_to_owner)) @@ -538,7 +538,7 @@ function _find_vector_type(spaces,gids;split_own_and_ghost=false) return vector_type end -# TODO: We would like to avoid this, but I cannot extract the maximal order +# TODO: We would like to avoid this, but I cannot extract the maximal order # from the space itself... function _add_distributed_constraint( F::DistributedFESpace,reffe::ReferenceFE,constraint @@ -582,7 +582,7 @@ end const DistributedZeroMeanFESpace{A,B,C,D,E,F} = DistributedSingleFieldFESpace{A,B,C,D,DistributedZeroMeanCache{E,F}} function FESpaces.FESpaceWithConstantFixed( - space::DistributedSingleFieldFESpace, + space::DistributedSingleFieldFESpace, gid_to_fix::Int = num_free_dofs(space) ) # Find the gid within the processors @@ -638,23 +638,23 @@ function FESpaces.FEFunction( isconsistent=false ) free_values = change_ghost(free_values,f.gids,is_consistent=isconsistent,make_consistent=true) - + c = _compute_new_distributed_fixedval( f,free_values,dirichlet_values ) - fv = free_values .+ c # TODO: Do we need to copy, or can we just modify? + fv = free_values .+ c # TODO: Do we need to copy, or can we just modify? dv = map(dirichlet_values) do dv dv .+ c end - + fields = map(FEFunction,f.spaces,partition(fv),dv) trian = get_triangulation(f) metadata = DistributedFEFunctionData(fv) DistributedCellField(fields,trian,metadata) end -# This is required, otherwise we end up calling `FEFunction` with a fixed value of zero, -# which does not properly interpolate the function provided. +# This is required, otherwise we end up calling `FEFunction` with a fixed value of zero, +# which does not properly interpolate the function provided. # With this change, we are interpolating in the unconstrained space and then # substracting the mean. function FESpaces.interpolate!(u,free_values::AbstractVector,f::DistributedZeroMeanFESpace) @@ -671,7 +671,7 @@ function _compute_new_distributed_fixedval( ) dvol = f.metadata.dvol vol = f.metadata.vol - + c_i = map(local_views(f),partition(fv),dv,dvol) do space,fv,dv,dvol if isa(FESpaces.ConstantApproach(space),FESpaces.FixConstant) lid_to_fix = space.dof_to_fix @@ -687,19 +687,19 @@ end """ ConstantFESpace( - model::DistributedDiscreteModel; - constraint_type=:global, + model::DistributedDiscreteModel; + constraint_type=:global, kwargs... ) Distributed equivalent to `ConstantFESpace(model;kwargs...)`. With `constraint_type=:global`, a single dof is shared by all processors. -This creates a global constraint, which is NOT scalable in parallel. Use at your own peril. +This creates a global constraint, which is NOT scalable in parallel. Use at your own peril. With `constraint_type=:local`, a single dof is owned by each processor and shared with no one else. This space is locally-constant in each processor, and therefore scalable (but not equivalent -to its serial counterpart). +to its serial counterpart). """ function FESpaces.ConstantFESpace( model::DistributedDiscreteModel; @@ -763,7 +763,7 @@ function FESpaces.SparseMatrixAssembler( ) assems = map(partition(rows),partition(cols)) do rows,cols FESpaces.GenericSparseMatrixAssembler( - SparseMatrixBuilder(local_mat_type), + SparseMatrixBuilder(local_mat_type), ArrayBuilder(local_vec_type), Base.OneTo(length(rows)), Base.OneTo(length(cols)), @@ -821,7 +821,7 @@ end function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data) rows, cols = get_rows(a), get_cols(a) - map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data) + map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data) end function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data) diff --git a/src/PArraysExtras.jl b/src/PArraysExtras.jl index d9dd4a7..1d29303 100644 --- a/src/PArraysExtras.jl +++ b/src/PArraysExtras.jl @@ -3,7 +3,7 @@ permuted_variable_partition(n_own,gids,owners; n_global, start) Create indices which are a permuted version of a variable_partition. -The advantage of this w.r.t. the `LocalIndices` type, is that we can compute +The advantage of this w.r.t. the `LocalIndices` type, is that we can compute dof owners with minimal communications, since we only need the size of the blocks. NOTE: This is the type for our FESpace dof_ids. @@ -48,10 +48,10 @@ end """ unpermute(indices::AbstractLocalIndices) -Given local indices, reorders them to (locally) have own indices first, +Given local indices, reorders them to (locally) have own indices first, followed by ghost indices. """ -function unpermute(indices::AbstractLocalIndices) +function unpermute(indices::AbstractLocalIndices) @notimplemented end @@ -78,7 +78,7 @@ end Map the values of a PVector to a new partitioning of the indices. -Similar to `PartitionedArrays.repartition`, but without any communications. Instead, +Similar to `PartitionedArrays.repartition`, but without any communications. Instead, it is assumed that the local-to-local mapping can be done locally. """ function locally_repartition(v::PVector,new_indices) @@ -109,9 +109,9 @@ end Replace ghost ids in `indices` with the ghost ids within `gids`. -NOTE: The issue is that `replace_ghost` does not check if all gids are ghosts or whether -they are repeated. It also shifts ownership of the ghosts. Its all quite messy and not what we -would need. TODO: Make me better. +NOTE: The issue is that `replace_ghost` does not check if all gids are ghosts or whether +they are repeated. It also shifts ownership of the ghosts. Its all quite messy and not what we +would need. TODO: Make me better. """ function filter_and_replace_ghost(indices,gids) owners = find_owner(indices,gids) @@ -122,7 +122,7 @@ function filter_and_replace_ghost(indices,gids) return new_indices end -# Same as PartitionedArrays.filter_ghost, but we do not exclude ghost indices that +# Same as PartitionedArrays.filter_ghost, but we do not exclude ghost indices that # belong to `indices`. This could eventually be a flag in the original function. function _filter_ghost(indices,gids,owners) ghosts = Set{Int}() @@ -160,7 +160,7 @@ function _filter_ghost(indices,gids,owners) end # function PArrays.remove_ghost(indices::PermutedLocalIndices) -# n_global = global_length(indices) +# n_global = global_length(indices) # own = OwnIndices(n_global,part_id(indices),own_to_global(indices)) # ghost = GhostIndices(n_global,Int[],Int32[]) # OwnAndGhostIndices(own,ghost) @@ -171,8 +171,8 @@ function PArrays.remove_ghost(indices::PermutedLocalIndices) end # This function computes a mapping among the local identifiers of a and b -# for which the corresponding global identifiers are both in a and b. -# Note that the haskey check is necessary because in the general case +# for which the corresponding global identifiers are both in a and b. +# Note that the haskey check is necessary because in the general case # there might be gids in b which are not present in a function local_to_local_map(a::AbstractLocalIndices,b::AbstractLocalIndices) a_lid_to_b_lid = fill(zero(Int32),local_length(a)) @@ -184,7 +184,7 @@ function local_to_local_map(a::AbstractLocalIndices,b::AbstractLocalIndices) a_lid = a_global_to_local[gid] if !iszero(a_lid) a_lid_to_b_lid[a_lid] = b_lid - end + end end a_lid_to_b_lid end @@ -228,8 +228,8 @@ function Algebra.axpy_entries!( α::Number, A::PSparseMatrix, B::PSparseMatrix; check::Bool=true ) -# We should definitely check here that the index partitions are the same. -# However: Because the different matrices are assembled separately, the objects are not the +# We should definitely check here that the index partitions are the same. +# However: Because the different matrices are assembled separately, the objects are not the # same (i.e can't use ===). Checking the index partitions would then be costly... @assert reduce(&,map(PArrays.matching_local_indices,partition(axes(A,1)),partition(axes(B,1)))) @assert reduce(&,map(PArrays.matching_local_indices,partition(axes(A,2)),partition(axes(B,2)))) @@ -249,7 +249,7 @@ function Algebra.axpy_entries!( return B end -# Array of PArrays -> PArray of Arrays +# Array of PArrays -> PArray of Arrays # TODO: I think this is now implemented in PartitionedArrays.jl (check) function to_parray_of_arrays(a::AbstractArray{<:MPIArray}) indices = linear_indices(first(a)) @@ -269,7 +269,7 @@ function to_parray_of_arrays(a::AbstractArray{<:DebugArray}) end end -# To local/to global for blocks +# To local/to global for blocks function to_local_indices!(I,ids::PRange;kwargs...) map(to_local!,I,partition(ids)) @@ -299,7 +299,7 @@ for f in [:to_local_indices!, :to_global_indices!, :get_gid_owners] end end -# This type is required because MPIArray from PArrays +# This type is required because MPIArray from PArrays # cannot be instantiated with a NULL communicator struct MPIVoidVector{T} <: AbstractVector{T} comm::MPI.Comm @@ -348,7 +348,7 @@ end """ i_am_in(comm::MPIArray) i_am_in(comm::DebugArray) - + Returns `true` if the processor is part of the subcommunicator `comm`. """ function i_am_in(comm::MPI.Comm) @@ -381,7 +381,7 @@ function generate_subparts(parts::MPIArray,new_comm_size) else mpi_undefined = MPI.API.MPI_UNDEFINED[] end - + if root_size == new_comm_size return parts else