diff --git a/src/flexible_inference_benchmark/engine/data.py b/src/flexible_inference_benchmark/engine/data.py index 0c75d99..9f78dde 100644 --- a/src/flexible_inference_benchmark/engine/data.py +++ b/src/flexible_inference_benchmark/engine/data.py @@ -308,7 +308,7 @@ def __init__( filtered_dataset = [ (prompt_str, prompt_len, output_len) for prompt_str, prompt_len, output_len in tokenized_dataset - if (prompt_len > 4 and output_len > 4) + if (prompt_len > 4 and output_len > 0) ] self.data = filtered_dataset diff --git a/src/flexible_inference_benchmark/main.py b/src/flexible_inference_benchmark/main.py index 2aeaf8b..2d69345 100644 --- a/src/flexible_inference_benchmark/main.py +++ b/src/flexible_inference_benchmark/main.py @@ -927,9 +927,14 @@ def run_main(args: argparse.Namespace) -> None: output_list: List[Any] = send_requests(client, requests_prompts, requests_times, arr_dims) benchmark_time = time.perf_counter() - t # pylint: disable=line-too-long + + text_summaries: list[str] = [] + if any(hasattr(o, "generated_text") for o in output_list): + text_summaries = [o.generated_text for o in output_list if hasattr(o, "generated_text")] # type: ignore output = { "backend": args.backend, "time": benchmark_time, + "summary": text_summaries, "outputs": [request_func_output.model_dump() for request_func_output in output_list], # type: ignore "inputs": requests_prompts, "tokenizer": args.tokenizer if args.tokenizer else args.model,