|
| 1 | +# Copyright (c) Microsoft Corporation. |
| 2 | +# Licensed under the MIT License. |
| 3 | + |
| 4 | +"""Tests for mem-bw benchmark.""" |
| 5 | + |
| 6 | +import numbers |
| 7 | +from pathlib import Path |
| 8 | +import os |
| 9 | +import unittest |
| 10 | + |
| 11 | +from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform |
| 12 | + |
| 13 | + |
| 14 | +class RocmMemBwTest(unittest.TestCase): |
| 15 | + """Test class for rocm mem-bw benchmark.""" |
| 16 | + def setUp(self): |
| 17 | + """Method called to prepare the test fixture.""" |
| 18 | + # Create fake binary file just for testing. |
| 19 | + os.environ['SB_MICRO_PATH'] = '/tmp/superbench/' |
| 20 | + binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin') |
| 21 | + Path(os.getenv('SB_MICRO_PATH'), 'bin').mkdir(parents=True, exist_ok=True) |
| 22 | + self.__binary_file = Path(binary_path, 'hipBusBandwidth') |
| 23 | + self.__binary_file.touch(mode=0o755, exist_ok=True) |
| 24 | + |
| 25 | + def tearDown(self): |
| 26 | + """Method called after the test method has been called and the result recorded.""" |
| 27 | + self.__binary_file.unlink() |
| 28 | + |
| 29 | + def test_rocm_memory_bw_performance(self): |
| 30 | + """Test rocm mem-bw benchmark.""" |
| 31 | + benchmark_name = 'mem-bw' |
| 32 | + (benchmark_class, |
| 33 | + predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.ROCM) |
| 34 | + assert (benchmark_class) |
| 35 | + |
| 36 | + benchmark = benchmark_class(benchmark_name) |
| 37 | + |
| 38 | + ret = benchmark._preprocess() |
| 39 | + assert (ret is True) |
| 40 | + assert (benchmark.return_code == ReturnCode.SUCCESS) |
| 41 | + |
| 42 | + # Check basic information. |
| 43 | + assert (benchmark) |
| 44 | + assert (benchmark.name == 'mem-bw') |
| 45 | + assert (benchmark.type == BenchmarkType.MICRO) |
| 46 | + |
| 47 | + # Check command list |
| 48 | + expected_command = ['hipBusBandwidth --h2d', 'hipBusBandwidth --d2h'] |
| 49 | + for i in range(len(expected_command)): |
| 50 | + commnad = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1] |
| 51 | + assert (commnad == expected_command[i]) |
| 52 | + |
| 53 | + # Check results and metrics. |
| 54 | + raw_output = {} |
| 55 | + raw_output[0] = """ |
| 56 | +Device:Device 738c Mem=32.0GB #CUs=120 Freq=1502Mhz MallocMode=pinned |
| 57 | + test atts units median mean stddev min max |
| 58 | + H2D_Bandwidth_pinned +064By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 |
| 59 | + H2D_Bandwidth_pinned +256By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 |
| 60 | + H2D_Bandwidth_pinned +512By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 |
| 61 | + H2D_Bandwidth_pinned 1kB GB/sec 0.0414 0.0411 0.0017 0.0189 0.0434 |
| 62 | + H2D_Bandwidth_pinned 2kB GB/sec 0.0828 0.0824 0.0018 0.0683 0.0862 |
| 63 | + H2D_Bandwidth_pinned 4kB GB/sec 0.1656 0.1652 0.0032 0.1374 0.1724 |
| 64 | + H2D_Bandwidth_pinned 8kB GB/sec 0.3268 0.3251 0.0117 0.1880 0.3425 |
| 65 | + H2D_Bandwidth_pinned 16kB GB/sec 0.6410 0.6365 0.0259 0.3597 0.6757 |
| 66 | + H2D_Bandwidth_pinned 32kB GB/sec 1.2422 1.2432 0.0278 0.9346 1.2987 |
| 67 | + H2D_Bandwidth_pinned 64kB GB/sec 2.3968 2.4161 0.1486 0.7242 2.6042 |
| 68 | + H2D_Bandwidth_pinned 128kB GB/sec 4.6786 4.6339 0.1310 4.1143 4.8162 |
| 69 | + H2D_Bandwidth_pinned 256kB GB/sec 7.8349 7.8369 0.1150 6.9093 8.0270 |
| 70 | + H2D_Bandwidth_pinned 512kB GB/sec 11.9963 11.9828 0.1287 11.2158 12.2201 |
| 71 | + H2D_Bandwidth_pinned 1024kB GB/sec 16.3342 16.3315 0.0956 16.0147 16.5823 |
| 72 | + H2D_Bandwidth_pinned 2048kB GB/sec 19.9790 19.9770 0.0853 19.7681 20.1635 |
| 73 | + H2D_Bandwidth_pinned 4096kB GB/sec 22.2706 22.2642 0.0552 22.0644 22.3847 |
| 74 | + H2D_Bandwidth_pinned 8192kB GB/sec 22.8232 22.7881 0.1669 21.3196 22.8930 |
| 75 | + H2D_Bandwidth_pinned 16384kB GB/sec 24.1521 24.1411 0.0429 24.0165 24.2162 |
| 76 | + H2D_Bandwidth_pinned 32768kB GB/sec 24.8695 24.7086 0.7491 20.6288 24.9035 |
| 77 | + H2D_Bandwidth_pinned 65536kB GB/sec 24.4840 24.0101 2.5769 6.1754 24.5292 |
| 78 | + H2D_Bandwidth_pinned 131072kB GB/sec 25.0487 24.9593 0.2601 24.1286 25.0711 |
| 79 | + H2D_Bandwidth_pinned 262144kB GB/sec 25.3280 25.2351 0.1788 24.8746 25.3498 |
| 80 | + H2D_Bandwidth_pinned 524288kB GB/sec 24.7523 24.6708 0.1586 24.3154 24.7880 |
| 81 | + H2D_Timepinned +064By ms 0.0245 0.0253 0.0240 0.0232 0.7821 |
| 82 | + H2D_Timepinned +256By ms 0.0243 0.0244 0.0013 0.0232 0.0546 |
| 83 | + H2D_Timepinned +512By ms 0.0243 0.0244 0.0014 0.0230 0.0566 |
| 84 | + H2D_Timepinned 1kB ms 0.0242 0.0244 0.0016 0.0230 0.0530 |
| 85 | + H2D_Timepinned 2kB ms 0.0242 0.0243 0.0005 0.0232 0.0293 |
| 86 | + H2D_Timepinned 4kB ms 0.0242 0.0242 0.0005 0.0232 0.0291 |
| 87 | + H2D_Timepinned 8kB ms 0.0245 0.0247 0.0013 0.0234 0.0426 |
| 88 | + H2D_Timepinned 16kB ms 0.0250 0.0252 0.0015 0.0237 0.0445 |
| 89 | + H2D_Timepinned 32kB ms 0.0258 0.0258 0.0006 0.0246 0.0342 |
| 90 | + H2D_Timepinned 64kB ms 0.0271 0.0272 0.0045 0.0250 0.0898 |
| 91 | + H2D_Timepinned 128kB ms 0.0280 0.0283 0.0008 0.0272 0.0318 |
| 92 | + H2D_Timepinned 256kB ms 0.0334 0.0334 0.0005 0.0326 0.0379 |
| 93 | + H2D_Timepinned 512kB ms 0.0437 0.0437 0.0005 0.0429 0.0467 |
| 94 | + H2D_Timepinned 1024kB ms 0.0642 0.0642 0.0004 0.0632 0.0654 |
| 95 | + H2D_Timepinned 2048kB ms 0.1050 0.1050 0.0004 0.1040 0.1061 |
| 96 | + H2D_Timepinned 4096kB ms 0.1883 0.1884 0.0005 0.1874 0.1901 |
| 97 | + H2D_Timepinned 8192kB ms 0.3675 0.3681 0.0028 0.3664 0.3934 |
| 98 | + H2D_Timepinned 16384kB ms 0.6946 0.6950 0.0012 0.6928 0.6986 |
| 99 | + H2D_Timepinned 32768kB ms 1.3492 1.3595 0.0482 1.3474 1.6266 |
| 100 | + H2D_Timepinned 65536kB ms 2.7409 2.9163 1.1368 2.7358 10.8670 |
| 101 | + H2D_Timepinned 131072kB ms 5.3582 5.3780 0.0576 5.3534 5.5626 |
| 102 | + H2D_Timepinned 262144kB ms 10.5983 10.6379 0.0761 10.5892 10.7915 |
| 103 | + H2D_Timepinned 524288kB ms 21.6897 21.7622 0.1411 21.6585 22.0794 |
| 104 | +
|
| 105 | +Note: results marked with (*) had missing values such as |
| 106 | +might occur with a mixture of architectural capabilities. |
| 107 | + """ |
| 108 | + raw_output[1] = """ |
| 109 | +Device:Device 738c Mem=32.0GB #CUs=120 Freq=1502Mhz MallocMode=pinned |
| 110 | + test atts units median mean stddev min max |
| 111 | + D2H_Bandwidth_pinned +064By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 |
| 112 | + D2H_Bandwidth_pinned +256By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 |
| 113 | + D2H_Bandwidth_pinned +512By GB/sec 0.0000 0.0000 0.0000 0.0000 0.0000 |
| 114 | + D2H_Bandwidth_pinned 1kB GB/sec 0.0428 0.0426 0.0019 0.0114 0.0446 |
| 115 | + D2H_Bandwidth_pinned 2kB GB/sec 0.0850 0.0844 0.0034 0.0415 0.0893 |
| 116 | + D2H_Bandwidth_pinned 4kB GB/sec 0.1701 0.1687 0.0084 0.0504 0.1773 |
| 117 | + D2H_Bandwidth_pinned 8kB GB/sec 0.3378 0.3348 0.0168 0.1085 0.3546 |
| 118 | + D2H_Bandwidth_pinned 16kB GB/sec 0.6667 0.6606 0.0218 0.5618 0.6897 |
| 119 | + D2H_Bandwidth_pinned 32kB GB/sec 1.3072 1.2954 0.0663 0.5682 1.3605 |
| 120 | + D2H_Bandwidth_pinned 64kB GB/sec 2.5550 2.5339 0.0955 2.1382 2.6904 |
| 121 | + D2H_Bandwidth_pinned 128kB GB/sec 4.8162 4.7807 0.2331 2.0940 4.9621 |
| 122 | + D2H_Bandwidth_pinned 256kB GB/sec 8.2286 8.2192 0.1671 7.2456 8.5286 |
| 123 | + D2H_Bandwidth_pinned 512kB GB/sec 12.7930 12.7062 0.4407 7.1196 13.0478 |
| 124 | + D2H_Bandwidth_pinned 1024kB GB/sec 17.5603 17.4938 0.3921 12.7184 17.7989 |
| 125 | + D2H_Bandwidth_pinned 2048kB GB/sec 21.6275 21.5591 0.2233 20.6073 21.8076 |
| 126 | + D2H_Bandwidth_pinned 4096kB GB/sec 24.2708 24.2556 0.0942 23.5724 24.4292 |
| 127 | + D2H_Bandwidth_pinned 8192kB GB/sec 24.9287 24.9093 0.0733 24.7171 25.0359 |
| 128 | + D2H_Bandwidth_pinned 16384kB GB/sec 26.4588 26.1976 2.4387 1.9387 26.5191 |
| 129 | + D2H_Bandwidth_pinned 32768kB GB/sec 27.2939 27.1202 0.7941 23.2086 27.3277 |
| 130 | + D2H_Bandwidth_pinned 65536kB GB/sec 26.8278 26.7238 0.3894 24.7946 26.9000 |
| 131 | + D2H_Bandwidth_pinned 131072kB GB/sec 27.4751 27.3457 0.3968 25.4168 27.5098 |
| 132 | + D2H_Bandwidth_pinned 262144kB GB/sec 27.8236 27.7173 0.3072 26.7977 27.8525 |
| 133 | + D2H_Bandwidth_pinned 524288kB GB/sec 28.0193 27.9348 0.1912 27.4707 28.0314 |
| 134 | + D2H_Time_pinned +064By ms 0.0229 0.0246 0.0457 0.0216 1.4690 |
| 135 | + D2H_Time_pinned +256By ms 0.0232 0.0234 0.0013 0.0221 0.0378 |
| 136 | + D2H_Time_pinned +512By ms 0.0234 0.0238 0.0063 0.0224 0.2091 |
| 137 | + D2H_Time_pinned 1kB ms 0.0234 0.0236 0.0028 0.0224 0.0875 |
| 138 | + D2H_Time_pinned 2kB ms 0.0235 0.0237 0.0014 0.0224 0.0482 |
| 139 | + D2H_Time_pinned 4kB ms 0.0235 0.0239 0.0031 0.0226 0.0794 |
| 140 | + D2H_Time_pinned 8kB ms 0.0237 0.0240 0.0027 0.0226 0.0738 |
| 141 | + D2H_Time_pinned 16kB ms 0.0240 0.0242 0.0009 0.0232 0.0285 |
| 142 | + D2H_Time_pinned 32kB ms 0.0245 0.0248 0.0021 0.0235 0.0563 |
| 143 | + D2H_Time_pinned 64kB ms 0.0254 0.0257 0.0011 0.0242 0.0304 |
| 144 | + D2H_Time_pinned 128kB ms 0.0272 0.0275 0.0026 0.0264 0.0626 |
| 145 | + D2H_Time_pinned 256kB ms 0.0318 0.0319 0.0007 0.0307 0.0362 |
| 146 | + D2H_Time_pinned 512kB ms 0.0410 0.0413 0.0024 0.0402 0.0736 |
| 147 | + D2H_Time_pinned 1024kB ms 0.0597 0.0599 0.0017 0.0589 0.0824 |
| 148 | + D2H_Time_pinned 2048kB ms 0.0970 0.0973 0.0010 0.0962 0.1018 |
| 149 | + D2H_Time_pinned 4096kB ms 0.1728 0.1729 0.0007 0.1717 0.1779 |
| 150 | + D2H_Time_pinned 8192kB ms 0.3365 0.3367 0.0010 0.3350 0.3394 |
| 151 | + D2H_Time_pinned 16384kB ms 0.6341 0.7147 0.7979 0.6326 8.6538 |
| 152 | + D2H_Time_pinned 32768kB ms 1.2294 1.2385 0.0420 1.2278 1.4458 |
| 153 | + D2H_Time_pinned 65536kB ms 2.5014 2.5117 0.0391 2.4947 2.7066 |
| 154 | + D2H_Time_pinned 131072kB ms 4.8850 4.9092 0.0748 4.8789 5.2806 |
| 155 | + D2H_Time_pinned 262144kB ms 9.6478 9.6860 0.1106 9.6377 10.0171 |
| 156 | + D2H_Time_pinned 524288kB ms 19.1607 19.2196 0.1333 19.1525 19.5434 |
| 157 | +
|
| 158 | +Note: results marked with (*) had missing values such as |
| 159 | +might occur with a mixture of architectural capabilities. |
| 160 | + """ |
| 161 | + |
| 162 | + for i, metric in enumerate(['htod_524288kB', 'htod_524288kB']): |
| 163 | + assert (benchmark._process_raw_result(i, raw_output[i])) |
| 164 | + assert (metric in benchmark.result) |
| 165 | + assert (len(benchmark.result[metric]) == 1) |
| 166 | + assert (isinstance(benchmark.result[metric][0], numbers.Number)) |
| 167 | + |
| 168 | + assert (benchmark.result['htod_524288kB'][0] == 24.6708) |
| 169 | + assert (benchmark.result['dtoh_524288kB'][0] == 27.9348) |
0 commit comments