diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index fc818da8..3f64e135 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -7,19 +7,34 @@ on:
   workflow_dispatch:
   merge_group:
     types: [checks_requested]
-    
+
 jobs:
-  linux-ci:
-    name: Linux
-    runs-on: ubuntu-latest
+  test:
+    name: ${{ format('{0} {1}', matrix.platform.target, matrix.features)}}
+    runs-on: ${{ matrix.platform.os }}
+    env:
+      RUST_BACKTRACE: 1
     strategy:
       matrix:
-        features: ["", "force-inprocess", "memfd", "async"]
+        platform:
+          - { target: aarch64-apple-darwin, os: macos-14 }
+          - { target: x86_64-apple-darwin, os: macos-13 }
+          - { target: x86_64-unknown-linux-gnu, os: ubuntu-latest }
+          - { target: x86_64-pc-windows-msvc, os: windows-latest }
+          - { target: i686-pc-windows-msvc, os: windows-latest }
+        features: ["", "force-inprocess", "async"]
+        include:
+          - features: "windows-shared-memory-equality"
+            platform: { target: x86_64-pc-windows-msvc, os: windows-latest }
+          - features: "windows-shared-memory-equality"
+            platform: { target: i686-pc-windows-msvc, os: windows-latest }
+          - features: "memfd"
+            platform: { target: x86_64-unknown-linux-gnu, os: ubuntu-latest }
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
-      - name: Install nightly toolchain
-        uses: dtolnay/rust-toolchain@nightly
+      - name: Install stable toolchain
+        uses: dtolnay/rust-toolchain@stable
         with:
           components: rustfmt, clippy
 
@@ -27,69 +42,20 @@ jobs:
         run: cargo fmt --check
 
       - name: clippy
-        run: cargo clippy --features "${{ matrix.features }}"
-
-      - name: Cargo build
-        run: cargo build --features "${{ matrix.features }}"
-
-      - name: Cargo test
-        run: cargo test --features "${{ matrix.features }}"
-        env:
-          RUST_BACKTRACE: 1
-
-  mac-ci:
-    name: macOS
-    runs-on: macos-latest
-    strategy:
-      matrix:
-        features: ["", "force-inprocess", "async"]
-
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Install nightly toolchain
-        uses: dtolnay/rust-toolchain@nightly
-
-      - name: Cargo build
-        run: cargo build --features "${{ matrix.features }}"
+        run: cargo clippy --features "${{ matrix.features }}" --target ${{ matrix.platform.target }}
 
       - name: Cargo test
-        run: cargo test --features "${{ matrix.features }}"
-        env:
-          RUST_BACKTRACE: 1
-
-  windows-ci:
-    name: Windows
-    runs-on: windows-latest
-    strategy:
-      matrix:
-        features: ["", "--features force-inprocess", "--features windows-shared-memory-equality", "--features async"]
-        target: ["x86_64-pc-windows-msvc", "i686-pc-windows-msvc"]
-
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Install nightly toolchain
-        uses: dtolnay/rust-toolchain@nightly
-        with:
-          targets: ${{ matrix.target }}
+        run: cargo test --features "${{ matrix.features }}" --target ${{ matrix.platform.target }}
 
-      - name: Cargo build
-        run: cargo build ${{ matrix.features }} --target ${{ matrix.target }}
-
-      - name: Cargo test
-        run: cargo test ${{ matrix.features }} --target ${{ matrix.target }}
-        env:
-          RUST_BACKTRACE: 1
+      - name: Cargo test benches
+        run: cargo test --benches --features "${{ matrix.features }}" --target ${{ matrix.platform.target }}
 
   build_result:
     name: Result
     runs-on: ubuntu-latest
     if: always()
     needs:
-      - "linux-ci"
-      - "mac-ci"
-      - "windows-ci"
+      - "test"
 
     steps:
       - name: Success
@@ -98,4 +64,3 @@ jobs:
       - name: Failure
         run: exit 1
         if: contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')
-
diff --git a/Cargo.toml b/Cargo.toml
index cad7cacf..d60dd45f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,6 +7,18 @@ license = "MIT OR Apache-2.0"
 repository = "https://github.com/servo/ipc-channel"
 edition = "2021"
 
+[[bench]]
+name = "platform"
+harness = false
+
+[[bench]]
+name = "ipc"
+harness = false
+
+[[bench]]
+name = "ipc_receiver_set"
+harness = false
+
 [features]
 force-inprocess = []
 memfd = ["sc"]
@@ -34,6 +46,7 @@ tempfile = "3.4"
 [dev-dependencies]
 crossbeam-utils = "0.8"
 static_assertions = "1.1.0"
+criterion = { version = "0.5", features = ["html_reports"] }
 
 [target.'cfg(target_os = "windows")'.dependencies.windows]
 version = "0.58.0"
diff --git a/benches/bench.rs b/benches/bench.rs
deleted file mode 100644
index 6989bec6..00000000
--- a/benches/bench.rs
+++ /dev/null
@@ -1,422 +0,0 @@
-#![feature(test)]
-extern crate test;
-
-/// Allows doing multiple inner iterations per bench.iter() run.
-///
-/// This is mostly to amortise the overhead of spawning a thread in the benchmark
-/// when sending larger messages (that might be fragmented).
-///
-/// Note that you need to compensate the displayed results
-/// for the proportionally longer runs yourself,
-/// as the benchmark framework doesn't know about the inner iterations...
-const ITERATIONS: usize = 1;
-
-mod platform {
-    use crate::ITERATIONS;
-    use ipc_channel::platform;
-    use std::sync::{mpsc, Mutex};
-
-    #[bench]
-    fn create_channel(b: &mut test::Bencher) {
-        b.iter(|| {
-            for _ in 0..ITERATIONS {
-                platform::channel().unwrap();
-            }
-        });
-    }
-
-    fn bench_transfer_data(b: &mut test::Bencher, size: usize) {
-        let data: Vec<u8> = (0..size).map(|i| (i % 251) as u8).collect();
-        let (tx, rx) = platform::channel().unwrap();
-
-        let (wait_tx, wait_rx) = mpsc::channel();
-        let wait_rx = Mutex::new(wait_rx);
-
-        if size > platform::OsIpcSender::get_max_fragment_size() {
-            b.iter(|| {
-                crossbeam_utils::thread::scope(|scope| {
-                    let tx = tx.clone();
-                    scope.spawn(|_| {
-                        let wait_rx = wait_rx.lock().unwrap();
-                        let tx = tx;
-                        for _ in 0..ITERATIONS {
-                            tx.send(&data, vec![], vec![]).unwrap();
-                            if ITERATIONS > 1 {
-                                // Prevent beginning of the next send
-                                // from overlapping with receive of last fragment,
-                                // as otherwise results of runs with a large tail fragment
-                                // are significantly skewed.
-                                wait_rx.recv().unwrap();
-                            }
-                        }
-                    });
-                    for _ in 0..ITERATIONS {
-                        rx.recv().unwrap();
-                        if ITERATIONS > 1 {
-                            wait_tx.send(()).unwrap();
-                        }
-                    }
-                    // For reasons mysterious to me,
-                    // not returning a value *from every branch*
-                    // adds some 100 ns or so of overhead to all results --
-                    // which is quite significant for very short tests...
-                    0
-                })
-            });
-        } else {
-            b.iter(|| {
-                for _ in 0..ITERATIONS {
-                    tx.send(&data, vec![], vec![]).unwrap();
-                    rx.recv().unwrap();
-                }
-                0
-            });
-        }
-    }
-
-    #[bench]
-    fn transfer_data_00_1(b: &mut test::Bencher) {
-        bench_transfer_data(b, 1);
-    }
-    #[bench]
-    fn transfer_data_01_2(b: &mut test::Bencher) {
-        bench_transfer_data(b, 2);
-    }
-    #[bench]
-    fn transfer_data_02_4(b: &mut test::Bencher) {
-        bench_transfer_data(b, 4);
-    }
-    #[bench]
-    fn transfer_data_03_8(b: &mut test::Bencher) {
-        bench_transfer_data(b, 8);
-    }
-    #[bench]
-    fn transfer_data_04_16(b: &mut test::Bencher) {
-        bench_transfer_data(b, 16);
-    }
-    #[bench]
-    fn transfer_data_05_32(b: &mut test::Bencher) {
-        bench_transfer_data(b, 32);
-    }
-    #[bench]
-    fn transfer_data_06_64(b: &mut test::Bencher) {
-        bench_transfer_data(b, 64);
-    }
-    #[bench]
-    fn transfer_data_07_128(b: &mut test::Bencher) {
-        bench_transfer_data(b, 128);
-    }
-    #[bench]
-    fn transfer_data_08_256(b: &mut test::Bencher) {
-        bench_transfer_data(b, 256);
-    }
-    #[bench]
-    fn transfer_data_09_512(b: &mut test::Bencher) {
-        bench_transfer_data(b, 512);
-    }
-    #[bench]
-    fn transfer_data_10_1k(b: &mut test::Bencher) {
-        bench_transfer_data(b, 1 * 1024);
-    }
-    #[bench]
-    fn transfer_data_11_2k(b: &mut test::Bencher) {
-        bench_transfer_data(b, 2 * 1024);
-    }
-    #[bench]
-    fn transfer_data_12_4k(b: &mut test::Bencher) {
-        bench_transfer_data(b, 4 * 1024);
-    }
-    #[bench]
-    fn transfer_data_13_8k(b: &mut test::Bencher) {
-        bench_transfer_data(b, 8 * 1024);
-    }
-    #[bench]
-    fn transfer_data_14_16k(b: &mut test::Bencher) {
-        bench_transfer_data(b, 16 * 1024);
-    }
-    #[bench]
-    fn transfer_data_15_32k(b: &mut test::Bencher) {
-        bench_transfer_data(b, 32 * 1024);
-    }
-    #[bench]
-    fn transfer_data_16_64k(b: &mut test::Bencher) {
-        bench_transfer_data(b, 64 * 1024);
-    }
-    #[bench]
-    fn transfer_data_17_128k(b: &mut test::Bencher) {
-        bench_transfer_data(b, 128 * 1024);
-    }
-    #[bench]
-    fn transfer_data_18_256k(b: &mut test::Bencher) {
-        bench_transfer_data(b, 256 * 1024);
-    }
-    #[bench]
-    fn transfer_data_19_512k(b: &mut test::Bencher) {
-        bench_transfer_data(b, 512 * 1024);
-    }
-    #[bench]
-    fn transfer_data_20_1m(b: &mut test::Bencher) {
-        bench_transfer_data(b, 1 * 1024 * 1024);
-    }
-    #[bench]
-    fn transfer_data_21_2m(b: &mut test::Bencher) {
-        bench_transfer_data(b, 2 * 1024 * 1024);
-    }
-    #[bench]
-    fn transfer_data_22_4m(b: &mut test::Bencher) {
-        bench_transfer_data(b, 4 * 1024 * 1024);
-    }
-    #[bench]
-    fn transfer_data_23_8m(b: &mut test::Bencher) {
-        bench_transfer_data(b, 8 * 1024 * 1024);
-    }
-}
-
-mod ipc {
-    use crate::ITERATIONS;
-    use ipc_channel::ipc;
-
-    #[bench]
-    fn transfer_empty(b: &mut test::Bencher) {
-        let (tx, rx) = ipc::channel().unwrap();
-        b.iter(|| {
-            for _ in 0..ITERATIONS {
-                tx.send(()).unwrap();
-                rx.recv().unwrap()
-            }
-        });
-    }
-
-    fn bench_transfer_senders(b: &mut test::Bencher, count: usize) {
-        let (main_tx, main_rx) = ipc::channel().unwrap();
-        let transfer_txs: Vec<_> = (0..count)
-            .map(|_| ipc::channel::<()>().unwrap())
-            .map(|(tx, _)| tx)
-            .collect();
-        let mut transfer_txs = Some(transfer_txs);
-        b.iter(|| {
-            for _ in 0..ITERATIONS {
-                main_tx.send(transfer_txs.take().unwrap()).unwrap();
-                transfer_txs = Some(main_rx.recv().unwrap());
-            }
-        });
-    }
-
-    #[bench]
-    fn transfer_senders_00(b: &mut test::Bencher) {
-        bench_transfer_senders(b, 0);
-    }
-
-    #[bench]
-    fn transfer_senders_01(b: &mut test::Bencher) {
-        bench_transfer_senders(b, 1);
-    }
-
-    #[bench]
-    fn transfer_senders_08(b: &mut test::Bencher) {
-        bench_transfer_senders(b, 8);
-    }
-
-    #[bench]
-    fn transfer_senders_64(b: &mut test::Bencher) {
-        bench_transfer_senders(b, 64);
-    }
-
-    fn bench_transfer_receivers(b: &mut test::Bencher, count: usize) {
-        let (main_tx, main_rx) = ipc::channel().unwrap();
-        let transfer_rxs: Vec<_> = (0..count)
-            .map(|_| ipc::channel::<()>().unwrap())
-            .map(|(_, rx)| rx)
-            .collect();
-        let mut transfer_rxs = Some(transfer_rxs);
-        b.iter(|| {
-            for _ in 0..ITERATIONS {
-                main_tx.send(transfer_rxs.take().unwrap()).unwrap();
-                transfer_rxs = Some(main_rx.recv().unwrap());
-            }
-        });
-    }
-
-    #[bench]
-    fn transfer_receivers_00(b: &mut test::Bencher) {
-        bench_transfer_receivers(b, 0);
-    }
-
-    #[bench]
-    fn transfer_receivers_01(b: &mut test::Bencher) {
-        bench_transfer_receivers(b, 1);
-    }
-
-    #[bench]
-    fn transfer_receivers_08(b: &mut test::Bencher) {
-        bench_transfer_receivers(b, 8);
-    }
-
-    #[bench]
-    fn transfer_receivers_64(b: &mut test::Bencher) {
-        bench_transfer_receivers(b, 64);
-    }
-
-    mod receiver_set {
-        use crate::ITERATIONS;
-        use ipc_channel::ipc::{self, IpcReceiverSet};
-
-        // Benchmark selecting over a set of `n` receivers,
-        // with `to_send` of them actually having pending data.
-        fn bench_send_on_m_of_n(b: &mut test::Bencher, to_send: usize, n: usize) {
-            let mut senders = Vec::with_capacity(n);
-            let mut rx_set = IpcReceiverSet::new().unwrap();
-            for _ in 0..n {
-                let (tx, rx) = ipc::channel().unwrap();
-                rx_set.add(rx).unwrap();
-                senders.push(tx);
-            }
-            b.iter(|| {
-                for _ in 0..ITERATIONS {
-                    for tx in senders.iter().take(to_send) {
-                        tx.send(()).unwrap();
-                    }
-                    let mut received = 0;
-                    while received < to_send {
-                        received += rx_set.select().unwrap().len();
-                    }
-                }
-            });
-        }
-
-        #[bench]
-        fn send_on_1_of_1(b: &mut test::Bencher) {
-            bench_send_on_m_of_n(b, 1, 1);
-        }
-
-        #[bench]
-        fn send_on_1_of_5(b: &mut test::Bencher) {
-            bench_send_on_m_of_n(b, 1, 5);
-        }
-
-        #[bench]
-        fn send_on_2_of_5(b: &mut test::Bencher) {
-            bench_send_on_m_of_n(b, 2, 5);
-        }
-
-        #[bench]
-        fn send_on_5_of_5(b: &mut test::Bencher) {
-            bench_send_on_m_of_n(b, 5, 5);
-        }
-
-        #[bench]
-        fn send_on_1_of_20(b: &mut test::Bencher) {
-            bench_send_on_m_of_n(b, 1, 20);
-        }
-
-        #[bench]
-        fn send_on_5_of_20(b: &mut test::Bencher) {
-            bench_send_on_m_of_n(b, 5, 20);
-        }
-
-        #[bench]
-        fn send_on_20_of_20(b: &mut test::Bencher) {
-            bench_send_on_m_of_n(b, 20, 20);
-        }
-
-        #[bench]
-        fn send_on_1_of_100(b: &mut test::Bencher) {
-            bench_send_on_m_of_n(b, 1, 100);
-        }
-
-        #[bench]
-        fn send_on_5_of_100(b: &mut test::Bencher) {
-            bench_send_on_m_of_n(b, 5, 100);
-        }
-        #[bench]
-        fn send_on_20_of_100(b: &mut test::Bencher) {
-            bench_send_on_m_of_n(b, 20, 100);
-        }
-
-        #[bench]
-        fn send_on_100_of_100(b: &mut test::Bencher) {
-            bench_send_on_m_of_n(b, 100, 100);
-        }
-
-        fn create_set_of_n(n: usize) -> IpcReceiverSet {
-            let mut rx_set = IpcReceiverSet::new().unwrap();
-            for _ in 0..n {
-                let (_, rx) = ipc::channel::<()>().unwrap();
-                rx_set.add(rx).unwrap();
-            }
-            rx_set
-        }
-
-        #[bench]
-        fn create_and_destroy_empty_set(b: &mut test::Bencher) {
-            b.iter(|| {
-                for _ in 0..ITERATIONS {
-                    create_set_of_n(0);
-                }
-            });
-        }
-
-        #[bench]
-        fn create_and_destroy_set_of_1(b: &mut test::Bencher) {
-            b.iter(|| {
-                for _ in 0..ITERATIONS {
-                    create_set_of_n(1);
-                }
-            });
-        }
-
-        #[bench]
-        fn create_and_destroy_set_of_10(b: &mut test::Bencher) {
-            b.iter(|| {
-                for _ in 0..ITERATIONS {
-                    create_set_of_n(10);
-                }
-            });
-        }
-
-        #[bench]
-        fn create_and_destroy_set_of_100(b: &mut test::Bencher) {
-            b.iter(|| {
-                for _ in 0..ITERATIONS {
-                    create_set_of_n(100);
-                }
-            });
-        }
-
-        // Benchmark performance of removing closed receivers from set.
-        // This also includes the time for adding receivers,
-        // as there is no way to measure the removing in isolation.
-        fn bench_remove_closed(b: &mut test::Bencher, n: usize) {
-            b.iter(|| {
-                for _ in 0..ITERATIONS {
-                    // We could keep adding/removing senders to the same set,
-                    // instead of creating a new one in each iteration.
-                    // However, this would actually make the results harder to compare...
-                    let mut rx_set = create_set_of_n(n);
-
-                    let mut dropped_count = 0;
-                    while dropped_count < n {
-                        // On `select()`, receivers with a "ClosedChannel" event will be closed,
-                        // and automatically dropped from the set.
-                        dropped_count += rx_set.select().unwrap().len();
-                    }
-                }
-            });
-        }
-
-        #[bench]
-        fn add_and_remove_1_closed_receivers(b: &mut test::Bencher) {
-            bench_remove_closed(b, 1);
-        }
-
-        #[bench]
-        fn add_and_remove_10_closed_receivers(b: &mut test::Bencher) {
-            bench_remove_closed(b, 10);
-        }
-
-        #[bench]
-        fn add_and_remove_100_closed_receivers(b: &mut test::Bencher) {
-            bench_remove_closed(b, 100);
-        }
-    }
-}
diff --git a/benches/ipc.rs b/benches/ipc.rs
new file mode 100644
index 00000000..c481e993
--- /dev/null
+++ b/benches/ipc.rs
@@ -0,0 +1,73 @@
+#![allow(clippy::identity_op)]
+use criterion::{criterion_group, criterion_main, Criterion};
+use ipc_channel::ipc;
+
+/// Allows doing multiple inner iterations per bench.iter() run.
+///
+/// This is mostly to amortise the overhead of spawning a thread in the benchmark
+/// when sending larger messages (that might be fragmented).
+///
+/// Note that you need to compensate the displayed results
+/// for the proportionally longer runs yourself,
+/// as the benchmark framework doesn't know about the inner iterations...
+const ITERATIONS: usize = 1;
+
+fn transfer_empty(criterion: &mut Criterion) {
+    criterion.bench_function("transfer_empty", |bencher| {
+        let (tx, rx) = ipc::channel().unwrap();
+        bencher.iter(|| {
+            for _ in 0..ITERATIONS {
+                tx.send(()).unwrap();
+                rx.recv().unwrap()
+            }
+        });
+    });
+}
+
+fn transfer_senders<const COUNT: usize>(criterion: &mut Criterion) {
+    criterion.bench_function(&format!("transfer_senders_{COUNT:02}"), |bencher| {
+        let (main_tx, main_rx) = ipc::channel().unwrap();
+        let transfer_txs: Vec<_> = (0..COUNT)
+            .map(|_| ipc::channel::<()>().unwrap())
+            .map(|(tx, _)| tx)
+            .collect();
+        let mut transfer_txs = Some(transfer_txs);
+        bencher.iter(|| {
+            for _ in 0..ITERATIONS {
+                main_tx.send(transfer_txs.take().unwrap()).unwrap();
+                transfer_txs = Some(main_rx.recv().unwrap());
+            }
+        });
+    });
+}
+
+fn transfer_receivers<const COUNT: usize>(criterion: &mut Criterion) {
+    criterion.bench_function(&format!("transfer_receivers_{COUNT:02}"), |bencher| {
+        let (main_tx, main_rx) = ipc::channel().unwrap();
+        let transfer_rxs: Vec<_> = (0..COUNT)
+            .map(|_| ipc::channel::<()>().unwrap())
+            .map(|(_, rx)| rx)
+            .collect();
+        let mut transfer_rxs = Some(transfer_rxs);
+        bencher.iter(|| {
+            for _ in 0..ITERATIONS {
+                main_tx.send(transfer_rxs.take().unwrap()).unwrap();
+                transfer_rxs = Some(main_rx.recv().unwrap());
+            }
+        });
+    });
+}
+
+criterion_group!(
+    benches,
+    transfer_empty,
+    transfer_senders<0>,
+    transfer_senders<1>,
+    transfer_senders<8>,
+    transfer_senders<64>,
+    transfer_receivers<0>,
+    transfer_receivers<1>,
+    transfer_receivers<8>,
+    transfer_receivers<64>,
+);
+criterion_main!(benches);
diff --git a/benches/ipc_receiver_set.rs b/benches/ipc_receiver_set.rs
new file mode 100644
index 00000000..5bbd9357
--- /dev/null
+++ b/benches/ipc_receiver_set.rs
@@ -0,0 +1,104 @@
+#![allow(clippy::identity_op)]
+use criterion::{criterion_group, criterion_main, Criterion};
+
+/// Allows doing multiple inner iterations per bench.iter() run.
+///
+/// This is mostly to amortise the overhead of spawning a thread in the benchmark
+/// when sending larger messages (that might be fragmented).
+///
+/// Note that you need to compensate the displayed results
+/// for the proportionally longer runs yourself,
+/// as the benchmark framework doesn't know about the inner iterations...
+const ITERATIONS: usize = 1;
+
+use ipc_channel::ipc::{self, IpcReceiverSet};
+
+/// Benchmark selecting over a set of `n` receivers,
+/// with `to_send` of them actually having pending data.
+fn bench_send_on_m_of_n<const TO_SEND: usize, const N: usize>(criterion: &mut Criterion) {
+    criterion.bench_function(&format!("bench_send_on_{TO_SEND}_of_{N}"), |bencher| {
+        let mut senders = Vec::with_capacity(N);
+        let mut rx_set = IpcReceiverSet::new().unwrap();
+        for _ in 0..N {
+            let (tx, rx) = ipc::channel().unwrap();
+            rx_set.add(rx).unwrap();
+            senders.push(tx);
+        }
+        bencher.iter(|| {
+            for _ in 0..ITERATIONS {
+                for tx in senders.iter().take(TO_SEND) {
+                    tx.send(()).unwrap();
+                }
+                let mut received = 0;
+                while received < TO_SEND {
+                    received += rx_set.select().unwrap().len();
+                }
+            }
+        });
+    });
+}
+
+fn create_set_of_n<const N: usize>() -> IpcReceiverSet {
+    let mut rx_set = IpcReceiverSet::new().unwrap();
+    for _ in 0..N {
+        let (_, rx) = ipc::channel::<()>().unwrap();
+        rx_set.add(rx).unwrap();
+    }
+    rx_set
+}
+
+fn create_and_destroy_set_of_n<const N: usize>(criterion: &mut Criterion) {
+    criterion.bench_function(&format!("create_and_destroy_set_of_{N}"), |bencher| {
+        bencher.iter(|| {
+            for _ in 0..ITERATIONS {
+                create_set_of_n::<N>();
+            }
+        });
+    });
+}
+
+// Benchmark performance of removing closed receivers from set.
+// This also includes the time for adding receivers,
+// as there is no way to measure the removing in isolation.
+fn add_and_remove_n_closed_receivers<const N: usize>(criterion: &mut Criterion) {
+    criterion.bench_function(&format!("add_and_remove_{N}_closed_receivers"), |bencher| {
+        bencher.iter(|| {
+            for _ in 0..ITERATIONS {
+                // We could keep adding/removing senders to the same set,
+                // instead of creating a new one in each iteration.
+                // However, this would actually make the results harder to compare...
+                let mut rx_set = create_set_of_n::<N>();
+
+                let mut dropped_count = 0;
+                while dropped_count < N {
+                    // On `select()`, receivers with a "ClosedChannel" event will be closed,
+                    // and automatically dropped from the set.
+                    dropped_count += rx_set.select().unwrap().len();
+                }
+            }
+        });
+    });
+}
+
+criterion_group!(
+    benches,
+    bench_send_on_m_of_n<1,1>,
+    bench_send_on_m_of_n<1,5>,
+    bench_send_on_m_of_n<2,5>,
+    bench_send_on_m_of_n<5,5>,
+    bench_send_on_m_of_n<1,20>,
+    bench_send_on_m_of_n<5,20>,
+    bench_send_on_m_of_n<20,20>,
+    bench_send_on_m_of_n<1,100>,
+    bench_send_on_m_of_n<5,100>,
+    bench_send_on_m_of_n<20,100>,
+    bench_send_on_m_of_n<100,100>,
+    create_and_destroy_set_of_n<0>,
+    create_and_destroy_set_of_n<1>,
+    create_and_destroy_set_of_n<10>,
+    create_and_destroy_set_of_n<100>,
+    add_and_remove_n_closed_receivers<1>,
+    add_and_remove_n_closed_receivers<10>,
+    add_and_remove_n_closed_receivers<100>,
+);
+criterion_main!(benches);
diff --git a/benches/platform.rs b/benches/platform.rs
new file mode 100644
index 00000000..29ed98ba
--- /dev/null
+++ b/benches/platform.rs
@@ -0,0 +1,105 @@
+#![allow(clippy::identity_op)]
+use criterion::{criterion_group, criterion_main, Criterion};
+use ipc_channel::platform;
+use std::sync::{mpsc, Mutex};
+
+/// Allows doing multiple inner iterations per bench.iter() run.
+///
+/// This is mostly to amortise the overhead of spawning a thread in the benchmark
+/// when sending larger messages (that might be fragmented).
+///
+/// Note that you need to compensate the displayed results
+/// for the proportionally longer runs yourself,
+/// as the benchmark framework doesn't know about the inner iterations...
+const ITERATIONS: usize = 1;
+
+fn create_channel(criterion: &mut Criterion) {
+    criterion.bench_function("create_channel", |bencher| {
+        bencher.iter(|| {
+            for _ in 0..ITERATIONS {
+                platform::channel().unwrap();
+            }
+        });
+    });
+}
+
+fn transfer_data<const SIZE: usize>(criterion: &mut Criterion) {
+    criterion.bench_function(&format!("transfer_data_{SIZE}"), |bencher| {
+        let data: Vec<u8> = (0..SIZE).map(|i| (i % 251) as u8).collect();
+        let (tx, rx) = platform::channel().unwrap();
+
+        let (wait_tx, wait_rx) = mpsc::channel();
+        let wait_rx = Mutex::new(wait_rx);
+
+        if SIZE > platform::OsIpcSender::get_max_fragment_size() {
+            bencher.iter(|| {
+                crossbeam_utils::thread::scope(|scope| {
+                    let tx = tx.clone();
+                    scope.spawn(|_| {
+                        let wait_rx = wait_rx.lock().unwrap();
+                        let tx = tx;
+                        for _ in 0..ITERATIONS {
+                            tx.send(&data, vec![], vec![]).unwrap();
+                            if ITERATIONS > 1 {
+                                // Prevent beginning of the next send
+                                // from overlapping with receive of last fragment,
+                                // as otherwise results of runs with a large tail fragment
+                                // are significantly skewed.
+                                wait_rx.recv().unwrap();
+                            }
+                        }
+                    });
+                    for _ in 0..ITERATIONS {
+                        rx.recv().unwrap();
+                        if ITERATIONS > 1 {
+                            wait_tx.send(()).unwrap();
+                        }
+                    }
+                    // For reasons mysterious to me,
+                    // not returning a value *from every branch*
+                    // adds some 100 ns or so of overhead to all results --
+                    // which is quite significant for very short tests...
+                    0
+                })
+            });
+        } else {
+            bencher.iter(|| {
+                for _ in 0..ITERATIONS {
+                    tx.send(&data, vec![], vec![]).unwrap();
+                    rx.recv().unwrap();
+                }
+                0
+            });
+        }
+    });
+}
+
+criterion_group!(
+    benches,
+    create_channel,
+    transfer_data<1>,
+    transfer_data<2>,
+    transfer_data<4>,
+    transfer_data<8>,
+    transfer_data<16>,
+    transfer_data<32>,
+    transfer_data<64>,
+    transfer_data<128>,
+    transfer_data<256>,
+    transfer_data<512>,
+    transfer_data<{ 1 * 1024 }>,
+    transfer_data<{ 2 * 1024 }>,
+    transfer_data<{ 4 * 1024 }>,
+    transfer_data<{ 8 * 1024 }>,
+    transfer_data<{ 16 * 1024 }>,
+    transfer_data<{ 32 * 1024 }>,
+    transfer_data<{ 64 * 1024 }>,
+    transfer_data<{ 128 * 1024 }>,
+    transfer_data<{ 256 * 1024 }>,
+    transfer_data<{ 512 * 1024 }>,
+    transfer_data<{ 1 * 1024 * 1024 }>,
+    transfer_data<{ 2 * 1024 * 1024 }>,
+    transfer_data<{ 4 * 1024 * 1024 }>,
+    transfer_data<{ 8 * 1024 * 1024 }>,
+);
+criterion_main!(benches);
diff --git a/rustfmt.toml b/rustfmt.toml
index 3f376251..ecd1b146 100644
--- a/rustfmt.toml
+++ b/rustfmt.toml
@@ -1,9 +1,2 @@
 match_block_trailing_comma = true
-match_arm_blocks = false
-binop_separator = "Back"
 reorder_imports = true
-# TODO(dlrobertson): gradually start formatting files
-ignore = [
-  "src/platform",
-  "src/ipc.rs"
-]
diff --git a/src/router.rs b/src/router.rs
index 0c7f669d..99943aa6 100644
--- a/src/router.rs
+++ b/src/router.rs
@@ -178,7 +178,7 @@ impl Router {
                 match result {
                     // Message came from the RouterProxy. Listen on our `msg_receiver`
                     // channel.
-                    IpcSelectionResult::MessageReceived(id, _) if id == self.msg_wakeup_id =>
+                    IpcSelectionResult::MessageReceived(id, _) if id == self.msg_wakeup_id => {
                         match self.msg_receiver.recv().unwrap() {
                             RouterMsg::AddRoute(receiver, handler) => {
                                 let new_receiver_id =
@@ -191,10 +191,12 @@ impl Router {
                                     .expect("Failed to send comfirmation of shutdown.");
                                 break;
                             },
-                        },
+                        }
+                    },
                     // Event from one of our registered receivers, call callback.
-                    IpcSelectionResult::MessageReceived(id, message) =>
-                        self.handlers.get_mut(&id).unwrap()(message),
+                    IpcSelectionResult::MessageReceived(id, message) => {
+                        self.handlers.get_mut(&id).unwrap()(message)
+                    },
                     IpcSelectionResult::ChannelClosed(id) => {
                         let _ = self.handlers.remove(&id).unwrap();
                     },