Skip to content

Commit 238a285

Browse files
venkywonkadominicshanshan
authored andcommitted
[https://nvbugs/5453667] [fix] reverting a breaking change: make trtllm-bench enable_chunked_context defaults backend-dependent (NVIDIA#6956)
Signed-off-by: Venky Ganesh <23023424+venkywonka@users.noreply.github.com> Signed-off-by: Wangshanshan <30051912+dominicshanshan@users.noreply.github.com>
1 parent 2927c5d commit 238a285

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

tensorrt_llm/bench/benchmark/throughput.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -281,10 +281,11 @@
281281
help="Path where per request information is written to.",
282282
)
283283
@optgroup.option(
284-
"--enable_chunked_context/--disable_chunked_context",
285-
default=True,
286-
help=
287-
"Enable/disable chunking in prefill stage for enhanced throughput benchmark. "
284+
"--enable_chunked_context",
285+
is_flag=True,
286+
default=None,
287+
help="Enable chunking in prefill stage for enhanced throughput benchmark. "
288+
"Default is False for PyTorch/AutoDeploy backend, True for TensorRT backend.",
288289
)
289290
@optgroup.option(
290291
"--scheduler_policy",
@@ -409,8 +410,11 @@ def throughput_command(
409410
kv_cache_percent = params.get("kv_cache_free_gpu_mem_fraction")
410411
beam_width = params.get("beam_width")
411412
streaming: bool = params.get("streaming")
412-
enable_chunked_context: bool = params.get("enable_chunked_context")
413413
scheduler_policy: str = params.get("scheduler_policy")
414+
enable_chunked_context: bool = params.get("enable_chunked_context")
415+
if enable_chunked_context is None:
416+
# Set default based on backend: True for TensorRT, False for others
417+
enable_chunked_context = backend.lower() == "tensorrt"
414418

415419
# Update configuration with runtime options
416420
exec_settings["settings_config"]["kv_cache_percent"] = kv_cache_percent

0 commit comments

Comments
 (0)