Skip to content

Make backends return errors #38

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions blog/2024-11-21-optimizing-matrix-mul/code/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions blog/2024-11-21-optimizing-matrix-mul/code/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ spirv-std = { git = "https://github.com/rust-gpu/rust-gpu", rev = "0da80f8a61867
futures = "0.3"
glam = { version = "0.29.2", features = ["cuda", "bytemuck"] }
tracing = "0.1.40"
wgpu = { version = "23.0", features = ["spirv", "vulkan-portability"] }

# Enable incremental by default in release mode.
[profile.release]
Expand Down
30 changes: 7 additions & 23 deletions blog/2024-11-21-optimizing-matrix-mul/code/benches/gpu_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,17 @@ const SIZES: &[(u32, u32, u32)] = &[
(64, 32, 128), // A: 64x32, B: 32x128, Result: 64x128
(1024, 512, 2048), // A: 1024x512, B: 512x2048, Result: 1024x2048
(2048, 1024, 4096), // A: 2048x1024, B: 1024x4096, Result: 2048x4096
*/
];

fn bench_all_variants(c: &mut Criterion) {
// Initialize all variants outside the loop
let multiplier_naive = matmul::naive::wgpu();
let multiplier_workgroup_256 = matmul::workgroup_256::wgpu();
let multiplier_workgroup_2d = matmul::workgroup_2d::wgpu();
let multiplier_tiling_1d = matmul::tiling_1d::wgpu();
let multiplier_tiling_1d_loop = matmul::tiling_1d_loop::wgpu();
let multiplier_tiling_2d = matmul::tiling_2d::wgpu();
let multiplier_isomorphic_gpu = matmul::isomorphic::wgpu();
let multiplier_naive = matmul::naive::wgpu().unwrap();
let multiplier_workgroup_256 = matmul::workgroup_256::wgpu().unwrap();
let multiplier_workgroup_2d = matmul::workgroup_2d::wgpu().unwrap();
let multiplier_tiling_1d = matmul::tiling_1d::wgpu().unwrap();
let multiplier_tiling_1d_loop = matmul::tiling_1d_loop::wgpu().unwrap();
let multiplier_tiling_2d = matmul::tiling_2d::wgpu().unwrap();

for &(m, k, n) in SIZES {
// Calculate FLOPs for this size
Expand Down Expand Up @@ -134,22 +134,6 @@ fn bench_all_variants(c: &mut Criterion) {
});
},
);

group.bench_with_input(
BenchmarkId::new("isomorphic:wgpu", format!("{}x{}x{}", m, k, n)),
&(m, k, n),
|bench, &(m, k, n)| {
bench.iter(|| {
black_box(multiplier_isomorphic_gpu.multiply(
black_box(&a),
black_box(&b),
m,
k,
n,
))
});
},
);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use rand::Rng;
use std::time::Duration;

const WARMUP_TIME: Duration = Duration::from_secs(2);
const MEASUREMENT_TIME: Duration = Duration::from_secs(5 * 60);
const SAMPLE_SIZE: usize = 10;

/// Matrix sizes to benchmark
Expand Down Expand Up @@ -34,19 +33,18 @@ const SIZES: &[(u32, u32, u32)] = &[

fn bench_isomorphic_variants(c: &mut Criterion) {
// Initialize isomorphic variants
let multiplier_isomorphic_gpu = matmul::isomorphic::wgpu();
let multiplier_isomorphic_cpu_single = matmul::isomorphic::cpu::single_threaded();
let multiplier_isomorphic_cpu_multi = matmul::isomorphic::cpu::multi_threaded();
let multiplier_isomorphic_gpu = matmul::isomorphic::wgpu().unwrap();
let multiplier_isomorphic_cpu_single = matmul::isomorphic::cpu::single_threaded().unwrap();
let multiplier_isomorphic_cpu_multi = matmul::isomorphic::cpu::multi_threaded().unwrap();

for &(m, k, n) in SIZES {
// Calculate FLOPs for this size
let flops = 2.0 * (m as f64 * n as f64 * k as f64);

let mut group = c.benchmark_group(format!("isomorphic_matmul{}x{}x{}", m, k, n));
let mut group = c.benchmark_group("isomorphic");
group.sampling_mode(SamplingMode::Flat);
group.warm_up_time(WARMUP_TIME);
//group.measurement_time(MEASUREMENT_TIME);
group.sample_size(SAMPLE_SIZE);

// Calculate FLOPs for this size
let flops = 2.0 * (m as f64 * n as f64 * k as f64);
group.throughput(Throughput::Elements(flops as u64));

// Create matrices for the given size
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ path = "src/bin.rs"
[dependencies]
matmul = { path = "../../crates/cpu/matmul" }
settings = { path = "../../crates/shared/settings" }
wgpu.workspace = true
futures.workspace = true
tracing.workspace = true
tracing-subscriber = { version = "0.3.18", features = ["env-filter", "std"] }
Loading