Skip to content

Commit 22d91c8

Browse files
Add profiler/metal to profile gpu on macos
1 parent 01cbcf9 commit 22d91c8

File tree

5 files changed

+72
-2
lines changed

5 files changed

+72
-2
lines changed

examples/hello_world/Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ ifeq ($(shell $(CXX) -std=c++17 -x c++ -E -include array - < /dev/null > /dev/nu
99
else
1010
STDLIB := -stdlib=libc++
1111
endif
12-
FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib run.cpp -ldl -ldawn
12+
FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib -ldl -ldawn
1313

1414
run: ./build/$(TARGET) dawnlib
1515
$(LIBSPEC) && ./build/$(TARGET)
@@ -20,7 +20,7 @@ run_setup: check-python
2020
cd $(GPUCPP) && python3 setup.py
2121

2222
build/$(TARGET): run.cpp
23-
mkdir -p build && $(CXX) $(FLAGS) -DNO_LOG -o ./build/$(TARGET)
23+
mkdir -p build && $(CXX) $(FLAGS) -DNO_LOG -o $@ $<
2424

2525
debug: run.cpp
2626
mkdir -p build && $(CXX) $(FLAGS) -g -o ./build/$(TARGET)

examples/matmul/Makefile

+6
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,17 @@ FLAGS=-std=c++17 $(STDLIB) -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCP
1515
run: ./build/$(TARGET)
1616
$(LIBSPEC) && ./build/$(TARGET)
1717

18+
run_with_profile: ./build/$(TARGET)_with_profile
19+
$(LIBSPEC) && export METAL_CAPTURE_ENABLED=1 && ./build/$(TARGET)_with_profile
20+
1821
# Use clang -v to see the include paths
1922
# Note in this example optimization is turned on
2023
build/$(TARGET): run.cpp
2124
mkdir -p build && $(CXX) $(FLAGS) -o ./build/$(TARGET)
2225

26+
build/$(TARGET)_with_profile: run.cpp
27+
mkdir -p build && $(CXX) $(FLAGS) -o ./build/$(TARGET)_with_profile $(GPUCPP)/experimental/profiler/metal.mm -framework metal -framework Foundation -DMETAL_PROFILER
28+
2329
watch:
2430
@command -v entr >/dev/null 2>&1 || { echo >&2 "Please install entr with 'brew install entr' or 'sudo apt-get install entr'"; exit 1; }
2531
mkdir -p build && $(CODEPATH) | entr -s "$(LIBSPEC) && rm -f ./build/$(TARGET) && make -j$(NUM_JOBS) ./build/$(TARGET) && ./build/$(TARGET)"

examples/matmul/run.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
#include "experimental/wgsl.h" // loopUnrolling
1414
#include "numeric_types/half.hpp"
1515

16+
#ifdef METAL_PROFILER
17+
#include "experimental/profiler/metal.hpp"
18+
#endif
19+
1620
using namespace gpu;
1721

1822
const std::string versionToStr(int version);
@@ -815,8 +819,10 @@ void runTest(int version, size_t M, size_t K, size_t N,
815819
kernels[i] = selectMatmul(ctx, version, {input, weights, outputs[i]}, M, K, N, numtype);
816820
}
817821

822+
#ifndef METAL_PROFILER
818823
printf("[ Press enter to start tests ... ]\n");
819824
getchar();
825+
#endif
820826
LOG(kDefLog, kInfo, "Dispatching Kernel version %d: %s, %d iterations ...",
821827
version, versionToStr(version).c_str(), nIter);
822828

@@ -930,11 +936,17 @@ int main() {
930936
N = 2 * 4096;
931937
}
932938

939+
#ifdef METAL_PROFILER
940+
startCapture();
941+
#endif
933942
if (enableF16) {
934943
runTestWithCheck<half>(version, M, K, N, transposedInput, kTestSize, numtype);
935944
} else {
936945
runTestWithCheck<float>(version, M, K, N, transposedInput, kTestSize, numtype);
937946
}
947+
#ifdef METAL_PROFILER
948+
stopCapture();
949+
#endif
938950

939951
LOG(kDefLog, kInfo, "Done.");
940952
return 0;

experimental/profiler/metal.hpp

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#ifdef __APPLE__
2+
extern "C" {
3+
void startCapture();
4+
void stopCapture();
5+
}
6+
#endif

experimental/profiler/metal.mm

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#import <Foundation/Foundation.h>
2+
#import <Metal/Metal.h>
3+
#import <QuartzCore/CAMetalLayer.h>
4+
5+
6+
extern "C" {
7+
void startCapture() {
8+
if (![[NSProcessInfo processInfo].environment[@"METAL_CAPTURE_ENABLED"] boolValue]) {
9+
NSLog(@"METAL_CAPTURE_ENABLED is not set. Please set it to 1 to enable Metal capture.");
10+
return;
11+
}
12+
13+
MTLCaptureDescriptor *descriptor = [[MTLCaptureDescriptor alloc] init];
14+
descriptor.destination = MTLCaptureDestinationGPUTraceDocument;
15+
descriptor.outputURL = [NSURL fileURLWithPath:@"gpu.cpp.gputrace"];
16+
17+
NSFileManager *fileManager = [NSFileManager defaultManager];
18+
if ([fileManager fileExistsAtPath:@"gpu.cpp.gputrace"]) {
19+
NSError *error = nil;
20+
[fileManager removeItemAtPath:@"gpu.cpp.gputrace" error:&error];
21+
if (error) {
22+
NSLog(@"Error deleting existing gpu.cpp.gputrace directory: %@", error);
23+
return;
24+
} else {
25+
NSLog(@"Deleted existing gpu.cpp.gputrace directory.");
26+
}
27+
}
28+
29+
NSError *error = nil;
30+
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
31+
if (!device) {
32+
NSLog(@"MTLCreateSystemDefaultDevice returned nil. Metal may not be supported on this system.");
33+
return;
34+
}
35+
descriptor.captureObject = device;
36+
37+
BOOL success = [MTLCaptureManager.sharedCaptureManager startCaptureWithDescriptor:descriptor error:&error];
38+
if (!success) {
39+
NSLog(@" error capturing mtl => %@ ", [error localizedDescription] );
40+
}
41+
}
42+
43+
void stopCapture() {
44+
[MTLCaptureManager.sharedCaptureManager stopCapture];
45+
}
46+
}

0 commit comments

Comments
 (0)