@@ -327,7 +327,9 @@ ModelInstanceState::~ModelInstanceState()
327
327
{
328
328
torch_model_.reset ();
329
329
#ifdef TRITON_ENABLE_GPU
330
- c10::cuda::CUDACachingAllocator::emptyCache ();
330
+ if (device_.is_cuda ()) {
331
+ c10::cuda::CUDACachingAllocator::emptyCache ();
332
+ }
331
333
#endif // TRITON_ENABLE_GPU
332
334
}
333
335
@@ -666,11 +668,12 @@ ModelInstanceState::ProcessRequests(
666
668
input_memories.clear ();
667
669
668
670
// Verify output indices are valid with number of outputs after execution
671
+ bool invalid_index = false ;
669
672
int max_index = output_tensors.size () - 1 ;
670
673
for (const auto & name : output_names) {
671
674
int op_index = output_index_map_[name];
672
675
if ((op_index < 0 ) || (op_index > max_index)) {
673
- RESPOND_ALL_AND_RETURN_IF_ERROR (
676
+ SendErrorForResponses (
674
677
&responses, request_count,
675
678
TRITONSERVER_ErrorNew (
676
679
TRITONSERVER_ERROR_INVALID_ARG,
@@ -680,12 +683,16 @@ ModelInstanceState::ProcessRequests(
680
683
" doesn't exist. This model has " +
681
684
std::to_string (max_index + 1 ) + " outputs" )
682
685
.c_str ()));
686
+ invalid_index = true ;
687
+ break ;
683
688
}
684
689
}
685
690
686
- ReadOutputTensors (
687
- total_batch_size, output_names, output_tensors, requests, request_count,
688
- &responses);
691
+ if (!invalid_index) {
692
+ ReadOutputTensors (
693
+ total_batch_size, output_names, output_tensors, requests, request_count,
694
+ &responses);
695
+ }
689
696
690
697
uint64_t exec_end_ns = 0 ;
691
698
SET_TIMESTAMP (exec_end_ns);
0 commit comments