Skip to content

Parallel periodic coupling #71

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ jobs:
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
# env:
# JULIA_NUM_THREADS: 4
env:
JULIA_NUM_THREADS: 4
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v5
docs:
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# CHANGES

## v1.5.0

### Added

- `compute_periodic_coupling_matrix` can be computed thread parallel

## v1.4.0

Expand Down
5 changes: 4 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
name = "ExtendableFEM"
uuid = "a722555e-65e0-4074-a036-ca7ce79a4aed"
version = "1.4"
version = "1.5.0"
authors = ["Christian Merdon <[email protected]>", "Patrick Jaap <[email protected]>"]

[deps]
ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2"
DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
Expand All @@ -18,12 +19,14 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SparseDiffTools = "47a9eef4-7e08-11e9-0b38-333d64bd3804"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"

[compat]
Aqua = "0.8"
ChunkSplitters = "3.1.2"
CommonSolve = "0.2"
DiffResults = "1"
DocStringExtensions = "0.8,0.9"
Expand Down
8 changes: 5 additions & 3 deletions examples/Example212_PeriodicBoundary2D.jl
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ function main(;
h = 1.0e-2,
width = 6.0,
height = 1.0,
threads = 1,
kwargs...
)
xgrid = create_grid(; h, width, height)
Expand Down Expand Up @@ -153,8 +154,7 @@ function main(;
return nothing
end

coupling_matrix = get_periodic_coupling_matrix(FES, reg_left, reg_right, give_opposite!)
display(coupling_matrix)
@time coupling_matrix = get_periodic_coupling_matrix(FES, reg_left, reg_right, give_opposite!; parallel = threads > 1, threads)
assign_operator!(PD, CombineDofs(u, u, coupling_matrix; kwargs...))
end

Expand All @@ -172,8 +172,10 @@ end

generateplots = ExtendableFEM.default_generateplots(Example212_PeriodicBoundary2D, "example212.png") #hide
function runtests() #hide
sol, plt = main() #hide
sol, _ = main() #hide
@test abs(maximum(view(sol[1])) - 1.3447465095618172) < 1.0e-3 #hide
sol2, _ = main(threads = 4) #hide
@test sol.entries ≈ sol2.entries #hide
return nothing #hide
end #hide

Expand Down
10 changes: 6 additions & 4 deletions examples/Example312_PeriodicBoundary3D.jl
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ function main(;
width = 6.0,
height = 0.2,
depth = 1,
threads = 1,
kwargs...
)

Expand Down Expand Up @@ -168,8 +169,7 @@ function main(;
y[1] = width - x[1]
return nothing
end
coupling_matrix = get_periodic_coupling_matrix(FES, reg_left, reg_right, give_opposite!)
display(coupling_matrix)
@time coupling_matrix = get_periodic_coupling_matrix(FES, reg_left, reg_right, give_opposite!; parallel = threads > 1, threads)
assign_operator!(PD, CombineDofs(u, u, coupling_matrix; kwargs...))
end

Expand All @@ -185,8 +185,10 @@ end

generateplots = ExtendableFEM.default_generateplots(Example312_PeriodicBoundary3D, "example312.png") #hide
function runtests() #hide
sol, plt = main() #hide
@test abs(maximum(view(sol[1])) - 1.8004602502175202) < 2.0e-3 #hide
sol, _ = main() #hide
@test abs(maximum(view(sol[1])) - 1.8004602502175202) < 2.0e-3 #hide
sol2, _ = main(threads = 4) #hide
@test sol.entries ≈ sol2.entries #hide
return nothing #hide
end #hide

Expand Down
2 changes: 2 additions & 0 deletions src/ExtendableFEM.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ $(read(joinpath(@__DIR__, "..", "README.md"), String))
"""
module ExtendableFEM

using ChunkSplitters: chunks
using CommonSolve: CommonSolve
using DiffResults: DiffResults
using DocStringExtensions: DocStringExtensions, TYPEDEF, TYPEDSIGNATURES
Expand Down Expand Up @@ -68,6 +69,7 @@ using LinearSolve: LinearSolve, LinearProblem, UMFPACKFactorization, deleteat!,
using Printf: Printf, @printf, @sprintf
using SparseArrays: SparseArrays, AbstractSparseArray, SparseMatrixCSC, findnz, nnz,
nzrange, rowvals, sparse, SparseVector
using StaticArrays: @MArray
using SparseDiffTools: SparseDiffTools, ForwardColorJacCache,
forwarddiff_color_jacobian!, matrix_colors
using Symbolics: Symbolics
Expand Down
140 changes: 96 additions & 44 deletions src/helper_functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -209,17 +209,29 @@ function get_periodic_coupling_matrix(
return _get_periodic_coupling_matrix(FES, xgrid, b_from, b_to, give_opposite!; kwargs...)
end

# merge matrix B into A, overriding the entries of A if an entry is both present in A and B
function merge!(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix)
rows, cols, values = findnz(B)
for (row, col, value) in zip(rows, cols, values)
A[row, col] = value
end
return nothing
end

function _get_periodic_coupling_matrix(
FES::FESpace{Tv},
xgrid::ExtendableGrid{TvG, TiG},
b_from,
b_to,
give_opposite!::Function;
mask = :auto,
sparsity_tol = 1.0e-12
sparsity_tol = 1.0e-12,
parallel = false,
threads = Threads.nthreads()
) where {Tv, TvG, TiG}

@info "Computing periodic coupling matrix. This may take a while."
nthr = parallel ? threads : 1
@info "Computing periodic coupling matrix with $nthr thread(s). This may take a while."

if typeof(b_from) <: Int
b_from = [b_from]
Expand All @@ -239,19 +251,6 @@ function _get_periodic_coupling_matrix(
# FE basis dofs on each boundary face
dofs_on_boundary = FES[BFaceDofs]

# fe vector used for interpolation
fe_vector = FEVector(FES)
# to be sure
fill!(fe_vector.entries, 0.0)

# FE target vector for interpolation with sparse entries
fe_vector_target = FEVector(FES; entries = SparseVector{Float64, Int64}(FES.ndofs, Int64[], Float64[]))


# resulting sparse matrix
n = length(fe_vector.entries)
result = ExtendableSparseMatrix(n, n)

# face numbers of the boundary faces
face_numbers_of_bfaces = xgrid[BFaceFaces]

Expand Down Expand Up @@ -307,26 +306,18 @@ function _get_periodic_coupling_matrix(
return
end

eval_point, set_start = interpolate_on_boundaryfaces(fe_vector, xgrid, give_opposite!)

# precompute approximate search region for each boundary face in b_from
searchareas = ExtendableGrids.VariableTargetAdjacency(TiG)
coords = xgrid[Coordinates]
facenodes = xgrid[FaceNodes]
faces_to = zeros(Int, 1)
coords_from = coords[:, facenodes[:, 1]]
coords_to = coords[:, facenodes[:, 1]]
nodes_per_faces = size(coords_from, 2)
dim = size(coords_from, 1)
box_from = [Float64[0, 0], Float64[0, 0], Float64[0, 0]]
box_to = [Float64[0, 0], Float64[0, 0], Float64[0, 0]]
nfaces_to = 0
box_from = @MArray [Float64[0, 0], Float64[0, 0], Float64[0, 0]]
for face_from in faces_in_b_from
for j in 1:nodes_per_faces, k in 1:dim
coords_from[k, j] = coords[k, facenodes[j, face_from]]
end
fill!(faces_to, 0)
nfaces_to = 0

# transfer the coords_from to the other side
transfer_face!(coords_from)
Expand All @@ -337,26 +328,44 @@ function _get_periodic_coupling_matrix(
box_from[k][2] = maximum(view(coords_from, k, :))
end

for face_to in faces_in_b_to
for j in 1:nodes_per_faces, k in 1:dim
coords_to[k, j] = coords[k, facenodes[j, face_to]]
end
for k in 1:dim
box_to[k][1] = minimum(view(coords_to, k, :))
box_to[k][2] = maximum(view(coords_to, k, :))
end
function inner_loop(faces_chunk)
# some data
local coords_to = coords[:, facenodes[:, 1]]
local box_to = @MArray [Float64[0, 0], Float64[0, 0], Float64[0, 0]]
local faces_to = Int[]

for face_to in faces_chunk
for j in 1:nodes_per_faces, k in 1:dim
coords_to[k, j] = coords[k, facenodes[j, face_to]]
end
for k in 1:dim
box_to[k][1] = minimum(view(coords_to, k, :))
box_to[k][2] = maximum(view(coords_to, k, :))
end

if do_boxes_overlap(box_from, box_to)
nfaces_to += 1
if length(faces_to) < nfaces_to
if do_boxes_overlap(box_from, box_to)
push!(faces_to, face_to)
else
faces_to[nfaces_to] = face_to
end
end

return faces_to
end

append!(searchareas, view(faces_to, 1:nfaces_to))
if parallel && nthr > 1
# create chunks to split this range for the threads
faces_chunks = chunks(faces_in_b_to, n = nthr)

tasks = map(faces_chunks) do faces_chunk
Threads.@spawn inner_loop(faces_chunk)
end

# put all results together
faces_to = vcat(fetch.(tasks)...)
else
faces_to = inner_loop(faces_in_b_to)
end

append!(searchareas, faces_to)
end

# throw error if no search area had been found for a bface
Expand All @@ -366,13 +375,31 @@ function _get_periodic_coupling_matrix(
end
end

# loop over boundary face indices: we need this index for dofs_on_boundary
for i_boundary_face in 1:n_boundary_faces
# we are only interest in global bface numbers on the "from" boundary
bfaces_of_interest = filter(face -> boundary_regions[face] in b_from, 1:n_boundary_faces)
n_bface_of_interest = length(bfaces_of_interest)

# loop over boundary face indices in a chunk: we need this index for dofs_on_boundary
function compute_chunk_result(chunk)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be the need for "local" comes from the fact that you define this function
inside another one.


# prepare data for this chunk
# we need our own copy of the FE Space to avoid data race in the pre-computed interpolators
our_FES = deepcopy(FES)
local fe_vector = FEVector(our_FES)
# to be sure
fill!(fe_vector.entries, 0.0)

# for each boundary face: check if in b_from
if boundary_regions[i_boundary_face] in b_from
local entries = SparseVector{Float64, Int64}(our_FES.ndofs, Int64[], Float64[])
local fe_vector_target = FEVector(our_FES; entries)

local_dofs = @views dofs_on_boundary[:, i_boundary_face]
local n = length(fe_vector.entries)
local result = ExtendableSparseMatrix(n, n)

local eval_point, _ = interpolate_on_boundaryfaces(fe_vector, xgrid, give_opposite!)

for i_boundary_face in chunk

local local_dofs = @views dofs_on_boundary[:, i_boundary_face]
for local_dof in local_dofs
# compute number of component
if mask[1 + ((local_dof - 1) ÷ coffset)] == 0.0
Expand All @@ -388,7 +415,7 @@ function _get_periodic_coupling_matrix(

# interpolate on the opposite boundary using x_trafo = give_opposite

j = findfirst(==(face_numbers_of_bfaces[i_boundary_face]), faces_in_b_from)
local j = findfirst(==(face_numbers_of_bfaces[i_boundary_face]), faces_in_b_from)
if j <= 0
throw("face $(face_numbers_of_bfaces[i_boundary_face]) is not on source boundary. Are the from/to regions and the give_opposite function correct?")
end
Expand All @@ -410,6 +437,31 @@ function _get_periodic_coupling_matrix(
end
end
end

return result
end

if parallel && nthr > 1
# we loop ober the n_boundary_faces in parallel:
# create chunks to split this range for the threads
bface_chunks = chunks(bfaces_of_interest, n = nthr)

# show start all chunks in parallel
tasks = map(bface_chunks) do chunk
Threads.@spawn compute_chunk_result(chunk)
end

# @info "done..."
# wait for all chunks to finish and get results
results = fetch.(tasks)

# merge all matrices
result = results[begin]
for res_i in results[(begin + 1):end]
merge!(result, res_i)
end
else
result = compute_chunk_result(bfaces_of_interest)
end

sp_result = sparse(result)
Expand Down
Loading