File tree Expand file tree Collapse file tree 1 file changed +9
-5
lines changed
tensorrt_llm/bench/benchmark Expand file tree Collapse file tree 1 file changed +9
-5
lines changed Original file line number Diff line number Diff line change 281281 help = "Path where per request information is written to." ,
282282)
283283@optgroup .option (
284- "--enable_chunked_context/--disable_chunked_context" ,
285- default = True ,
286- help =
287- "Enable/disable chunking in prefill stage for enhanced throughput benchmark. "
284+ "--enable_chunked_context" ,
285+ is_flag = True ,
286+ default = None ,
287+ help = "Enable chunking in prefill stage for enhanced throughput benchmark. "
288+ "Default is False for PyTorch/AutoDeploy backend, True for TensorRT backend." ,
288289)
289290@optgroup .option (
290291 "--scheduler_policy" ,
@@ -409,8 +410,11 @@ def throughput_command(
409410 kv_cache_percent = params .get ("kv_cache_free_gpu_mem_fraction" )
410411 beam_width = params .get ("beam_width" )
411412 streaming : bool = params .get ("streaming" )
412- enable_chunked_context : bool = params .get ("enable_chunked_context" )
413413 scheduler_policy : str = params .get ("scheduler_policy" )
414+ enable_chunked_context : bool = params .get ("enable_chunked_context" )
415+ if enable_chunked_context is None :
416+ # Set default based on backend: True for TensorRT, False for others
417+ enable_chunked_context = backend .lower () == "tensorrt"
414418
415419 # Update configuration with runtime options
416420 exec_settings ["settings_config" ]["kv_cache_percent" ] = kv_cache_percent
You can’t perform that action at this time.
0 commit comments