Skip to content

Commit 52c31eb

Browse files
jbaiocchitensorflower-gardener
authored andcommitted
Remove duplicated function GetDeviceCapFromXPlane
PiperOrigin-RevId: 399529225 Change-Id: I43674969ec0a25e1f01f6ba42da06c654718dc5b
1 parent c69fbe6 commit 52c31eb

File tree

4 files changed

+5
-38
lines changed

4 files changed

+5
-38
lines changed

tensorflow/core/profiler/convert/BUILD

+1
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ cc_library(
338338
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
339339
"//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
340340
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
341+
"//tensorflow/core/profiler/utils:device_caps_utils",
341342
"//tensorflow/core/profiler/utils:event_span",
342343
"//tensorflow/core/profiler/utils:hardware_type_utils",
343344
"//tensorflow/core/profiler/utils:kernel_stats_utils",

tensorflow/core/profiler/convert/xplane_to_op_stats.cc

+3-34
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ limitations under the License.
3636
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
3737
#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
3838
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
39+
#include "tensorflow/core/profiler/utils/device_caps_utils.h"
3940
#include "tensorflow/core/profiler/utils/event_span.h"
4041
#include "tensorflow/core/profiler/utils/hardware_type_utils.h"
4142
#include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
@@ -49,38 +50,6 @@ limitations under the License.
4950
namespace tensorflow {
5051
namespace profiler {
5152

52-
DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane) {
53-
DeviceCapabilities cap;
54-
XPlaneVisitor plane = CreateTfXPlaneVisitor(&device_plane);
55-
plane.ForEachStat([&cap](const XStatVisitor& stat) {
56-
if (!stat.Type().has_value()) return;
57-
switch (stat.Type().value()) {
58-
case kDevCapClockRateKHz:
59-
cap.set_clock_rate_in_ghz(stat.IntValue() / 1000000.0);
60-
break;
61-
case kDevCapCoreCount:
62-
cap.set_num_cores(stat.IntValue());
63-
break;
64-
case kDevCapMemoryBandwidth:
65-
cap.set_memory_bandwidth(stat.UintValue()); // bytes/s
66-
break;
67-
case kDevCapMemorySize:
68-
cap.set_memory_size_in_bytes(stat.UintValue());
69-
break;
70-
case kDevCapComputeCapMajor:
71-
cap.mutable_compute_capability()->set_major(stat.IntValue());
72-
break;
73-
case kDevCapComputeCapMinor:
74-
cap.mutable_compute_capability()->set_minor(stat.IntValue());
75-
break;
76-
case kDevVendor:
77-
cap.set_device_vendor(std::string(stat.StrOrRefValue()));
78-
break;
79-
}
80-
});
81-
return cap;
82-
}
83-
8453
PerfEnv MakePerfEnv(double peak_tera_flops_per_second,
8554
double peak_hbm_bw_giga_bytes_per_second) {
8655
PerfEnv result;
@@ -93,7 +62,7 @@ PerfEnv MakePerfEnv(double peak_tera_flops_per_second,
9362
}
9463

9564
PerfEnv GetPerfEnvFromXPlane(const XPlane& device_plane) {
96-
DeviceCapabilities cap = GetDeviceCapFromXPlane(device_plane);
65+
DeviceCapabilities cap = GetDeviceCaps(device_plane);
9766
return MakePerfEnv(GetFlopMaxThroughputPerSM(cap) / 1000 * cap.num_cores(),
9867
cap.memory_bandwidth() / 1e9);
9968
}
@@ -159,7 +128,7 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
159128
op_metrics_db_combiner.Combine(device_op_metrics_db);
160129
}
161130
if (gpu_model.empty()) {
162-
gpu_model = GpuModelName(GetDeviceCapFromXPlane(*device_trace));
131+
gpu_model = GpuModelName(GetDeviceCaps(*device_trace));
163132
}
164133
if (options.generate_step_db) {
165134
StepEvents device_step_events =

tensorflow/core/profiler/convert/xplane_to_op_stats.h

-3
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,6 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
4040
void PropagateXSpaceDiagnosticsToOpStats(const XSpace& space,
4141
OpStats* op_stats);
4242

43-
// Extracts DeviceCapabilities from XPlane stats.
44-
DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane);
45-
4643
// Populates PerfEnv.
4744
PerfEnv MakePerfEnv(double peak_tera_flops_per_second,
4845
double peak_hbm_bw_giga_bytes_per_second);

tensorflow/core/profiler/utils/device_caps_utils.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ DeviceCapabilities GetDeviceCaps(const XPlane& plane) {
5959
if (!stat.Type().has_value()) return;
6060
switch (stat.Type().value()) {
6161
case StatType::kDevCapClockRateKHz:
62-
caps.set_clock_rate_in_ghz(stat.IntOrUintValue() * 1000000.0);
62+
caps.set_clock_rate_in_ghz(stat.IntOrUintValue() / 1000000.0);
6363
break;
6464
case StatType::kDevCapCoreCount:
6565
caps.set_num_cores(stat.IntOrUintValue());

0 commit comments

Comments
 (0)