Skip to content

Commit

Permalink
Measure output tensor data copy when not using device memory
Browse files Browse the repository at this point in the history
  • Loading branch information
javier-intel committed Oct 18, 2024
1 parent a95ed17 commit 2cb5255
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 0 deletions.
2 changes: 2 additions & 0 deletions samples/cpp/benchmark_app/inputs_filling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ std::map<std::string, ov::TensorVector> get_tensors_static_case(const std::vecto
benchmark_app::InputsInfo& app_inputs_info,
size_t requestsNum);

ov::Tensor get_random_tensor(const std::pair<std::string, benchmark_app::InputInfo>& inputInfo);

void copy_tensor_data(ov::Tensor& dst, const ov::Tensor& src);
36 changes: 36 additions & 0 deletions samples/cpp/benchmark_app/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,7 @@ int main(int argc, char* argv[]) {
bool useNpuMem = false;

std::map<std::string, ov::TensorVector> inputsData;
std::map<std::string, ov::Tensor> hostOutputData;
if (isFlagSetInCommandLine("use_device_mem")) {
if (device_name.find("GPU") == 0) {
inputsData = ::gpu::get_remote_input_tensors(inputFiles,
Expand Down Expand Up @@ -981,6 +982,13 @@ int main(int argc, char* argv[]) {
batchSize,
app_inputs_info[0],
nireq);
for (auto& output : compiledModel.outputs()) {
auto& name = output.get_any_name();
benchmark_app::InputInfo info;
info.type = output.get_element_type();
info.dataShape = output.get_shape();
hostOutputData[name] = get_random_tensor({name, info});
}
}
}
// ----------------- 10. Measuring performance
Expand Down Expand Up @@ -1056,6 +1064,15 @@ int main(int argc, char* argv[]) {
for (auto& output : compiledModel.outputs()) {
inferRequest->set_tensor(output.get_any_name(), outputTensors[output.get_any_name()]);
}
} else if (useNpuMem) {
auto outputTensors = ::npu::get_remote_output_tensors(compiledModel);
for (auto& output : compiledModel.outputs()) {
inferRequest->set_tensor(output.get_any_name(), outputTensors[output.get_any_name()]);
}
} else {
for (auto& output : compiledModel.outputs()) {
inferRequest->set_tensor(output.get_any_name(), hostOutputData[output.get_any_name()]);
}
}
++i;
}
Expand All @@ -1082,6 +1099,15 @@ int main(int argc, char* argv[]) {
for (auto& output : compiledModel.outputs()) {
inferRequest->set_tensor(output.get_any_name(), outputTensors[output.get_any_name()]);
}
} else if (useNpuMem) {
auto outputTensors = ::npu::get_remote_output_tensors(compiledModel);
for (auto& output : compiledModel.outputs()) {
inferRequest->set_tensor(output.get_any_name(), outputTensors[output.get_any_name()]);
}
} else {
for (auto& output : compiledModel.outputs()) {
inferRequest->set_tensor(output.get_any_name(), hostOutputData[output.get_any_name()]);
}
}
}

Expand Down Expand Up @@ -1141,6 +1167,16 @@ int main(int argc, char* argv[]) {
for (auto& output : compiledModel.outputs()) {
inferRequest->set_tensor(output.get_any_name(), outputTensors[output.get_any_name()]);
}
} else if (useNpuMem) {
auto outputTensors =
::npu::get_remote_output_tensors(compiledModel);
for (auto& output : compiledModel.outputs()) {
inferRequest->set_tensor(output.get_any_name(), outputTensors[output.get_any_name()]);
}
} else {
for (auto& output : compiledModel.outputs()) {
inferRequest->set_tensor(output.get_any_name(), hostOutputData[output.get_any_name()]);
}
}
}

Expand Down
15 changes: 15 additions & 0 deletions samples/cpp/benchmark_app/remote_tensors_filling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,4 +238,19 @@ std::map<std::string, ov::TensorVector> get_remote_input_tensors(
}
return remoteTensors;
}

std::map<std::string, ov::Tensor> get_remote_output_tensors(const ov::CompiledModel& compiledModel) {
std::map<std::string, ov::Tensor> remoteTensors;
auto context = compiledModel.get_context();
auto& zeroContext = static_cast<ov::intel_npu::level_zero::ZeroContext&>(context);

for (auto& output : compiledModel.outputs()) {
auto tensor = zeroContext.create_l0_host_tensor(output.get_element_type(),
output.get_shape(),
ov::intel_npu::TensorType::OUTPUT);
remoteTensors[output.get_any_name()] = tensor;
}

return remoteTensors;
}
} // namespace npu
2 changes: 2 additions & 0 deletions samples/cpp/benchmark_app/remote_tensors_filling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,6 @@ std::map<std::string, ov::TensorVector> get_remote_input_tensors(
const std::vector<benchmark_app::InputsInfo>& app_inputs_info,
const ov::CompiledModel& compiledModel,
size_t num_requests);

std::map<std::string, ov::Tensor> get_remote_output_tensors(const ov::CompiledModel& compiledModel);
} // namespace npu

0 comments on commit 2cb5255

Please sign in to comment.