diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dba78f93..855a1bda 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,8 +50,8 @@ jobs: ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - # env: - # JULIA_NUM_THREADS: 4 + env: + JULIA_NUM_THREADS: 4 - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v5 docs: diff --git a/CHANGELOG.md b/CHANGELOG.md index f45b8c23..00c06c87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # CHANGES +## v1.5.0 + +### Added + + - `compute_periodic_coupling_matrix` can be computed thread parallel ## v1.4.0 diff --git a/Project.toml b/Project.toml index e932f510..4927a007 100644 --- a/Project.toml +++ b/Project.toml @@ -1,9 +1,10 @@ name = "ExtendableFEM" uuid = "a722555e-65e0-4074-a036-ca7ce79a4aed" -version = "1.4" +version = "1.5.0" authors = ["Christian Merdon ", "Patrick Jaap "] [deps] +ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e" CommonSolve = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" @@ -18,12 +19,14 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SparseDiffTools = "47a9eef4-7e08-11e9-0b38-333d64bd3804" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" [compat] Aqua = "0.8" +ChunkSplitters = "3.1.2" CommonSolve = "0.2" DiffResults = "1" DocStringExtensions = "0.8,0.9" diff --git a/examples/Example212_PeriodicBoundary2D.jl b/examples/Example212_PeriodicBoundary2D.jl index 24bd7bd9..379b19cc 100644 --- a/examples/Example212_PeriodicBoundary2D.jl +++ b/examples/Example212_PeriodicBoundary2D.jl @@ -121,6 +121,7 @@ function main(; h = 1.0e-2, width = 6.0, height = 1.0, + threads = 1, kwargs... ) xgrid = create_grid(; h, width, height) @@ -153,8 +154,7 @@ function main(; return nothing end - coupling_matrix = get_periodic_coupling_matrix(FES, reg_left, reg_right, give_opposite!) - display(coupling_matrix) + @time coupling_matrix = get_periodic_coupling_matrix(FES, reg_left, reg_right, give_opposite!; parallel = threads > 1, threads) assign_operator!(PD, CombineDofs(u, u, coupling_matrix; kwargs...)) end @@ -172,8 +172,10 @@ end generateplots = ExtendableFEM.default_generateplots(Example212_PeriodicBoundary2D, "example212.png") #hide function runtests() #hide - sol, plt = main() #hide + sol, _ = main() #hide @test abs(maximum(view(sol[1])) - 1.3447465095618172) < 1.0e-3 #hide + sol2, _ = main(threads = 4) #hide + @test sol.entries ≈ sol2.entries #hide return nothing #hide end #hide diff --git a/examples/Example312_PeriodicBoundary3D.jl b/examples/Example312_PeriodicBoundary3D.jl index 6915e633..e5f656a1 100644 --- a/examples/Example312_PeriodicBoundary3D.jl +++ b/examples/Example312_PeriodicBoundary3D.jl @@ -136,6 +136,7 @@ function main(; width = 6.0, height = 0.2, depth = 1, + threads = 1, kwargs... ) @@ -168,8 +169,7 @@ function main(; y[1] = width - x[1] return nothing end - coupling_matrix = get_periodic_coupling_matrix(FES, reg_left, reg_right, give_opposite!) - display(coupling_matrix) + @time coupling_matrix = get_periodic_coupling_matrix(FES, reg_left, reg_right, give_opposite!; parallel = threads > 1, threads) assign_operator!(PD, CombineDofs(u, u, coupling_matrix; kwargs...)) end @@ -185,8 +185,10 @@ end generateplots = ExtendableFEM.default_generateplots(Example312_PeriodicBoundary3D, "example312.png") #hide function runtests() #hide - sol, plt = main() #hide - @test abs(maximum(view(sol[1])) - 1.8004602502175202) < 2.0e-3 #hide + sol, _ = main() #hide + @test abs(maximum(view(sol[1])) - 1.8004602502175202) < 2.0e-3 #hide + sol2, _ = main(threads = 4) #hide + @test sol.entries ≈ sol2.entries #hide return nothing #hide end #hide diff --git a/src/ExtendableFEM.jl b/src/ExtendableFEM.jl index f7091e1a..6c7cfdbc 100644 --- a/src/ExtendableFEM.jl +++ b/src/ExtendableFEM.jl @@ -5,6 +5,7 @@ $(read(joinpath(@__DIR__, "..", "README.md"), String)) """ module ExtendableFEM +using ChunkSplitters: chunks using CommonSolve: CommonSolve using DiffResults: DiffResults using DocStringExtensions: DocStringExtensions, TYPEDEF, TYPEDSIGNATURES @@ -68,6 +69,7 @@ using LinearSolve: LinearSolve, LinearProblem, UMFPACKFactorization, deleteat!, using Printf: Printf, @printf, @sprintf using SparseArrays: SparseArrays, AbstractSparseArray, SparseMatrixCSC, findnz, nnz, nzrange, rowvals, sparse, SparseVector +using StaticArrays: @MArray using SparseDiffTools: SparseDiffTools, ForwardColorJacCache, forwarddiff_color_jacobian!, matrix_colors using Symbolics: Symbolics diff --git a/src/helper_functions.jl b/src/helper_functions.jl index 38d707cd..f671b731 100644 --- a/src/helper_functions.jl +++ b/src/helper_functions.jl @@ -209,6 +209,15 @@ function get_periodic_coupling_matrix( return _get_periodic_coupling_matrix(FES, xgrid, b_from, b_to, give_opposite!; kwargs...) end +# merge matrix B into A, overriding the entries of A if an entry is both present in A and B +function merge!(A::ExtendableSparseMatrix, B::ExtendableSparseMatrix) + rows, cols, values = findnz(B) + for (row, col, value) in zip(rows, cols, values) + A[row, col] = value + end + return nothing +end + function _get_periodic_coupling_matrix( FES::FESpace{Tv}, xgrid::ExtendableGrid{TvG, TiG}, @@ -216,10 +225,13 @@ function _get_periodic_coupling_matrix( b_to, give_opposite!::Function; mask = :auto, - sparsity_tol = 1.0e-12 + sparsity_tol = 1.0e-12, + parallel = false, + threads = Threads.nthreads() ) where {Tv, TvG, TiG} - @info "Computing periodic coupling matrix. This may take a while." + nthr = parallel ? threads : 1 + @info "Computing periodic coupling matrix with $nthr thread(s). This may take a while." if typeof(b_from) <: Int b_from = [b_from] @@ -239,19 +251,6 @@ function _get_periodic_coupling_matrix( # FE basis dofs on each boundary face dofs_on_boundary = FES[BFaceDofs] - # fe vector used for interpolation - fe_vector = FEVector(FES) - # to be sure - fill!(fe_vector.entries, 0.0) - - # FE target vector for interpolation with sparse entries - fe_vector_target = FEVector(FES; entries = SparseVector{Float64, Int64}(FES.ndofs, Int64[], Float64[])) - - - # resulting sparse matrix - n = length(fe_vector.entries) - result = ExtendableSparseMatrix(n, n) - # face numbers of the boundary faces face_numbers_of_bfaces = xgrid[BFaceFaces] @@ -307,26 +306,18 @@ function _get_periodic_coupling_matrix( return end - eval_point, set_start = interpolate_on_boundaryfaces(fe_vector, xgrid, give_opposite!) - # precompute approximate search region for each boundary face in b_from searchareas = ExtendableGrids.VariableTargetAdjacency(TiG) coords = xgrid[Coordinates] facenodes = xgrid[FaceNodes] - faces_to = zeros(Int, 1) coords_from = coords[:, facenodes[:, 1]] - coords_to = coords[:, facenodes[:, 1]] nodes_per_faces = size(coords_from, 2) dim = size(coords_from, 1) - box_from = [Float64[0, 0], Float64[0, 0], Float64[0, 0]] - box_to = [Float64[0, 0], Float64[0, 0], Float64[0, 0]] - nfaces_to = 0 + box_from = @MArray [Float64[0, 0], Float64[0, 0], Float64[0, 0]] for face_from in faces_in_b_from for j in 1:nodes_per_faces, k in 1:dim coords_from[k, j] = coords[k, facenodes[j, face_from]] end - fill!(faces_to, 0) - nfaces_to = 0 # transfer the coords_from to the other side transfer_face!(coords_from) @@ -337,26 +328,44 @@ function _get_periodic_coupling_matrix( box_from[k][2] = maximum(view(coords_from, k, :)) end - for face_to in faces_in_b_to - for j in 1:nodes_per_faces, k in 1:dim - coords_to[k, j] = coords[k, facenodes[j, face_to]] - end - for k in 1:dim - box_to[k][1] = minimum(view(coords_to, k, :)) - box_to[k][2] = maximum(view(coords_to, k, :)) - end + function inner_loop(faces_chunk) + # some data + local coords_to = coords[:, facenodes[:, 1]] + local box_to = @MArray [Float64[0, 0], Float64[0, 0], Float64[0, 0]] + local faces_to = Int[] + + for face_to in faces_chunk + for j in 1:nodes_per_faces, k in 1:dim + coords_to[k, j] = coords[k, facenodes[j, face_to]] + end + for k in 1:dim + box_to[k][1] = minimum(view(coords_to, k, :)) + box_to[k][2] = maximum(view(coords_to, k, :)) + end - if do_boxes_overlap(box_from, box_to) - nfaces_to += 1 - if length(faces_to) < nfaces_to + if do_boxes_overlap(box_from, box_to) push!(faces_to, face_to) - else - faces_to[nfaces_to] = face_to end end + + return faces_to end - append!(searchareas, view(faces_to, 1:nfaces_to)) + if parallel && nthr > 1 + # create chunks to split this range for the threads + faces_chunks = chunks(faces_in_b_to, n = nthr) + + tasks = map(faces_chunks) do faces_chunk + Threads.@spawn inner_loop(faces_chunk) + end + + # put all results together + faces_to = vcat(fetch.(tasks)...) + else + faces_to = inner_loop(faces_in_b_to) + end + + append!(searchareas, faces_to) end # throw error if no search area had been found for a bface @@ -366,13 +375,31 @@ function _get_periodic_coupling_matrix( end end - # loop over boundary face indices: we need this index for dofs_on_boundary - for i_boundary_face in 1:n_boundary_faces + # we are only interest in global bface numbers on the "from" boundary + bfaces_of_interest = filter(face -> boundary_regions[face] in b_from, 1:n_boundary_faces) + n_bface_of_interest = length(bfaces_of_interest) + + # loop over boundary face indices in a chunk: we need this index for dofs_on_boundary + function compute_chunk_result(chunk) + + # prepare data for this chunk + # we need our own copy of the FE Space to avoid data race in the pre-computed interpolators + our_FES = deepcopy(FES) + local fe_vector = FEVector(our_FES) + # to be sure + fill!(fe_vector.entries, 0.0) - # for each boundary face: check if in b_from - if boundary_regions[i_boundary_face] in b_from + local entries = SparseVector{Float64, Int64}(our_FES.ndofs, Int64[], Float64[]) + local fe_vector_target = FEVector(our_FES; entries) - local_dofs = @views dofs_on_boundary[:, i_boundary_face] + local n = length(fe_vector.entries) + local result = ExtendableSparseMatrix(n, n) + + local eval_point, _ = interpolate_on_boundaryfaces(fe_vector, xgrid, give_opposite!) + + for i_boundary_face in chunk + + local local_dofs = @views dofs_on_boundary[:, i_boundary_face] for local_dof in local_dofs # compute number of component if mask[1 + ((local_dof - 1) ÷ coffset)] == 0.0 @@ -388,7 +415,7 @@ function _get_periodic_coupling_matrix( # interpolate on the opposite boundary using x_trafo = give_opposite - j = findfirst(==(face_numbers_of_bfaces[i_boundary_face]), faces_in_b_from) + local j = findfirst(==(face_numbers_of_bfaces[i_boundary_face]), faces_in_b_from) if j <= 0 throw("face $(face_numbers_of_bfaces[i_boundary_face]) is not on source boundary. Are the from/to regions and the give_opposite function correct?") end @@ -410,6 +437,31 @@ function _get_periodic_coupling_matrix( end end end + + return result + end + + if parallel && nthr > 1 + # we loop ober the n_boundary_faces in parallel: + # create chunks to split this range for the threads + bface_chunks = chunks(bfaces_of_interest, n = nthr) + + # show start all chunks in parallel + tasks = map(bface_chunks) do chunk + Threads.@spawn compute_chunk_result(chunk) + end + + # @info "done..." + # wait for all chunks to finish and get results + results = fetch.(tasks) + + # merge all matrices + result = results[begin] + for res_i in results[(begin + 1):end] + merge!(result, res_i) + end + else + result = compute_chunk_result(bfaces_of_interest) end sp_result = sparse(result)