1313from tensorrt_llm .bindings .executor import DecodingMode
1414from tensorrt_llm .llmapi .llm_args import (EagleDecodingConfig , KvCacheConfig ,
1515 MTPDecodingConfig , PeftCacheConfig ,
16- SamplerType , SparseAttentionConfig ,
16+ SamplerType , SchedulerConfig ,
17+ SparseAttentionConfig ,
1718 SpeculativeConfig , TorchLlmArgs )
1819from tensorrt_llm .logger import logger
1920from tensorrt_llm .lora_helper import (LoraConfig ,
@@ -663,8 +664,8 @@ def create_py_executor_instance(
663664 max_batch_size : Optional [int ] = None ,
664665 max_beam_width : Optional [int ] = None ,
665666 max_num_tokens : Optional [int ] = None ,
666- peft_cache_config : Optional [trtllm . PeftCacheConfig ] = None ,
667- scheduler_config : Optional [trtllm . SchedulerConfig ] = None ,
667+ peft_cache_config : Optional [PeftCacheConfig ] = None ,
668+ scheduler_config : Optional [SchedulerConfig ] = None ,
668669 cache_transceiver_config : Optional [trtllm .CacheTransceiverConfig ] = None ,
669670) -> PyExecutor :
670671 kv_cache_manager = resources .get (ResourceManagerType .KV_CACHE_MANAGER , None )
@@ -728,16 +729,14 @@ def create_py_executor_instance(
728729 num_lora_modules = model_engine .model .model_config .pretrained_config .num_hidden_layers * \
729730 len (lora_config .lora_target_modules + lora_config .missing_qkv_modules )
730731
731- peft_cache_config_model = PeftCacheConfig .from_pybind (
732- peft_cache_config
733- ) if peft_cache_config is not None else PeftCacheConfig ()
732+ peft_cache_config_model = PeftCacheConfig (
733+ ) if peft_cache_config is None else peft_cache_config
734734 if lora_config .max_loras is not None :
735735 peft_cache_config_model .num_device_module_layer = \
736736 max_lora_rank * num_lora_modules * lora_config .max_loras
737737 if lora_config .max_cpu_loras is not None :
738738 peft_cache_config_model .num_host_module_layer = \
739739 max_lora_rank * num_lora_modules * lora_config .max_cpu_loras
740- peft_cache_config = peft_cache_config_model ._to_pybind ()
741740
742741 from tensorrt_llm .bindings import WorldConfig
743742 world_config = WorldConfig (
@@ -748,7 +747,7 @@ def create_py_executor_instance(
748747 gpus_per_node = dist .mapping .gpus_per_node ,
749748 )
750749 peft_cache_manager = PeftCacheManager (
751- peft_cache_config = peft_cache_config ,
750+ peft_cache_config = peft_cache_config_model ,
752751 lora_config = lora_config ,
753752 model_config = model_binding_config ,
754753 world_config = world_config ,
0 commit comments