diff --git a/extra/weak_scaling/run_particleda.jl b/extra/weak_scaling/run_particleda.jl
index e687339..bd99fd8 100644
--- a/extra/weak_scaling/run_particleda.jl
+++ b/extra/weak_scaling/run_particleda.jl
@@ -1,10 +1,29 @@
 using ParticleDA
 using TimerOutputs
 using MPI
+using ThreadPinning
 
 # Initialise MPI
 MPI.Init()
-mpi_size = MPI.Comm_size(MPI.COMM_WORLD)
+comm = MPI.COMM_WORLD
+mpi_size = MPI.Comm_size(comm)
+my_rank = MPI.Comm_rank(comm)
+
+cores_per_numa = 16
+threads_per_rank = Threads.nthreads()
+ranks_per_numa = div(cores_per_numa, threads_per_rank)
+
+# Pin threads so that threads of a MPI rank will be pinned to cores with
+# contiguous IDs. This will ensure that
+#  - When running 16 or less threads per rank, all threads will be pinned to the same
+#    NUMA region as their master (sharing a memory controller within Infinity fabric)
+#  - When running 8 or less threads per rank, all threads will be pinned to the same
+#    Core Complex Die
+#  - When running 4 or less threads per rank, all threads will be pinned to the same
+#    Core Complex (sharing a L3 cache)
+
+my_numa, my_id_in_numa = divrem(my_rank, ranks_per_numa) .+ (1, 0)
+pinthreads( numa( my_numa, 1:Threads.nthreads() ) .+ threads_per_rank .* my_id_in_numa )
 
 # Save some variables for later use
 test_dir = joinpath(dirname(pathof(ParticleDA)), "..", "test")