From 7661c287128a46de2f63bf27ee6b98b7afde233d Mon Sep 17 00:00:00 2001 From: Francesco Nigro Date: Wed, 8 Jan 2025 14:08:45 +0100 Subject: [PATCH 1/2] Save complex OS-level I/O to know the CPU topology --- .../jboss/threads/EnhancedQueueExecutor.java | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/jboss/threads/EnhancedQueueExecutor.java b/src/main/java/org/jboss/threads/EnhancedQueueExecutor.java index 3ce0b001..da367c1a 100644 --- a/src/main/java/org/jboss/threads/EnhancedQueueExecutor.java +++ b/src/main/java/org/jboss/threads/EnhancedQueueExecutor.java @@ -47,7 +47,6 @@ import javax.management.ObjectInstance; import javax.management.ObjectName; -import io.smallrye.common.cpu.CacheInfo; import org.jboss.threads.management.ManageableThreadPoolExecutorService; import org.jboss.threads.management.StandardThreadPoolMXBean; @@ -123,13 +122,6 @@ public final class EnhancedQueueExecutor extends AbstractExecutorService impleme // Optimization control flags // ======================================================= - /** - * A global hint which establishes whether it is recommended to disable uses of {@code EnhancedQueueExecutor}. - * This hint defaults to {@code false} but can be changed to {@code true} by setting the {@code jboss.threads.eqe.disable} - * property to {@code true} before this class is initialized. - */ - public static final boolean DISABLE_HINT = readBooleanPropertyPrefixed("disable", false); - /** * Update the summary statistics. */ @@ -337,13 +329,10 @@ private static final class RuntimeFields { private static final long queueSizeOffset; static { - int cacheLine = CacheInfo.getSmallestDataCacheLineSize(); - if (cacheLine == 0) { - // guess - cacheLine = 64; - } + // this is fine for pretty much 32 and 64 bit x86 and ARM processors; see + // https://github.com/ziglang/zig/blob/0.13.0/lib/std/atomic.zig#L424-L434 // cpu spatial prefetcher can drag 2 cache-lines at once into L2 - int pad = cacheLine > 128 ? cacheLine : 128; + int pad = 128; int longScale = unsafe.arrayIndexScale(long[].class); int taskNodeScale = unsafe.arrayIndexScale(TaskNode[].class); // these fields are in units of array scale From 1d1403f2c1fc4173f2c7bee82e23acbfdaab2494 Mon Sep 17 00:00:00 2001 From: Francesco Nigro Date: Wed, 8 Jan 2025 14:52:25 +0100 Subject: [PATCH 2/2] Address David comments --- .../jboss/threads/EnhancedQueueExecutor.java | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/jboss/threads/EnhancedQueueExecutor.java b/src/main/java/org/jboss/threads/EnhancedQueueExecutor.java index da367c1a..2300fea4 100644 --- a/src/main/java/org/jboss/threads/EnhancedQueueExecutor.java +++ b/src/main/java/org/jboss/threads/EnhancedQueueExecutor.java @@ -122,6 +122,13 @@ public final class EnhancedQueueExecutor extends AbstractExecutorService impleme // Optimization control flags // ======================================================= + /** + * A global hint which establishes whether it is recommended to disable uses of {@code EnhancedQueueExecutor}. + * This hint defaults to {@code false} but can be changed to {@code true} by setting the {@code jboss.threads.eqe.disable} + * property to {@code true} before this class is initialized. + */ + public static final boolean DISABLE_HINT = readBooleanPropertyPrefixed("disable", false); + /** * Update the summary statistics. */ @@ -329,20 +336,31 @@ private static final class RuntimeFields { private static final long queueSizeOffset; static { - // this is fine for pretty much 32 and 64 bit x86 and ARM processors; see - // https://github.com/ziglang/zig/blob/0.13.0/lib/std/atomic.zig#L424-L434 - // cpu spatial prefetcher can drag 2 cache-lines at once into L2 - int pad = 128; int longScale = unsafe.arrayIndexScale(long[].class); int taskNodeScale = unsafe.arrayIndexScale(TaskNode[].class); - // these fields are in units of array scale - unsharedTaskNodesSize = pad / taskNodeScale * (numUnsharedObjects + 1); - unsharedLongsSize = pad / longScale * (numUnsharedLongs + 1); - // these fields are in bytes - headOffset = unsafe.arrayBaseOffset(TaskNode[].class) + pad; - tailOffset = unsafe.arrayBaseOffset(TaskNode[].class) + pad * 2; - threadStatusOffset = unsafe.arrayBaseOffset(long[].class) + pad; - queueSizeOffset = unsafe.arrayBaseOffset(long[].class) + pad * 2; + if (ProcessorInfo.availableProcessors() > 1) { + // this is fine for pretty much 32 and 64 bit x86 and ARM processors; see + // https://github.com/ziglang/zig/blob/0.13.0/lib/std/atomic.zig#L424-L434 + // cpu spatial prefetcher can drag 2 cache-lines at once into L2 + int pad = 128; + // we both pad before and after the array to avoid false sharing with surrounding heap objects + unsharedTaskNodesSize = pad / taskNodeScale * (numUnsharedObjects + 1); + unsharedLongsSize = pad / longScale * (numUnsharedLongs + 1); + // these fields are in bytes + headOffset = unsafe.arrayBaseOffset(TaskNode[].class) + pad; + tailOffset = unsafe.arrayBaseOffset(TaskNode[].class) + pad * 2; + threadStatusOffset = unsafe.arrayBaseOffset(long[].class) + pad; + queueSizeOffset = unsafe.arrayBaseOffset(long[].class) + pad * 2; + } else { + unsharedTaskNodesSize = numUnsharedObjects; + unsharedLongsSize = numUnsharedLongs; + // position 0 + headOffset = unsafe.arrayBaseOffset(TaskNode[].class); + // position 1 in the object array + tailOffset = unsafe.arrayBaseOffset(TaskNode[].class) + taskNodeScale; + threadStatusOffset = unsafe.arrayBaseOffset(long[].class); + queueSizeOffset = unsafe.arrayBaseOffset(long[].class) + longScale; + } } }