refine WorkerBase interface

Superjomn · Superjomn · commit cbfa09e1d996 · 2025-07-30T15:27:57.000+08:00
Signed-off-by: Superjomn &lt;328693+Superjomn@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/executor/worker.py b/tensorrt_llm/executor/worker.py
@@ -68,6 +68,12 @@ def __init__(
         executor_config.logits_post_processor_config = tllm.LogitsPostProcessorConfig(
             processor_batched=batched_logits_processor, replicate=False)
 
+        self.create_engine(
+            engine=engine,
+            executor_config=executor_config,
+            lora_config=lora_config,
+            garbage_collection_gen0_threshold=garbage_collection_gen0_threshold)
+
         self.await_response_thread = ManagedThread(
             self.await_response_task,
             error_queue=self._error_queue,
diff --git a/tensorrt_llm/executor/worker_base.py b/tensorrt_llm/executor/worker_base.py
@@ -69,28 +69,31 @@ def __init__(
         self._await_response_helper = AwaitResponseHelper(
             self)  # TODO: make it weakref
 
-        def _create_engine():
-            device_id = self.global_rank % torch.cuda.device_count()
-            torch.cuda.set_device(device_id)
-
-            # Make sure C++ executor would use same devices/ranks as py_executor
-            global_rank = global_mpi_rank()
-            comm_ranks = mpi_comm().allgather(global_rank)
-            device_ids = mpi_comm().allgather(device_id)
-            executor_config.parallel_config = tllm.ParallelConfig(
-                participant_ids=comm_ranks, device_ids=device_ids)
-
-            if isinstance(engine, Engine):
-                return tllm.Executor(engine.engine,
-                                     json.dumps(engine.config.to_dict(),
-                                                cls=ConfigEncoder),
-                                     tllm.ModelType.DECODER_ONLY,
-                                     executor_config=executor_config,
-                                     managed_weights=engine.managed_weights)
-
-            if not hasattr(executor_config, "backend"):
-                return tllm.Executor(engine, tllm.ModelType.DECODER_ONLY,
-                                     executor_config)
+    def create_engine(self, engine: Union[Path, Engine],
+                      executor_config: tllm.ExecutorConfig,
+                      lora_config: Optional[LoraConfig],
+                      garbage_collection_gen0_threshold: Optional[int]) -> None:
+        device_id = self.global_rank % torch.cuda.device_count()
+        torch.cuda.set_device(device_id)
+
+        # Make sure C++ executor would use same devices/ranks as py_executor
+        global_rank = global_mpi_rank()
+        comm_ranks = mpi_comm().allgather(global_rank)
+        device_ids = mpi_comm().allgather(device_id)
+        executor_config.parallel_config = tllm.ParallelConfig(
+            participant_ids=comm_ranks, device_ids=device_ids)
+
+        if isinstance(engine, Engine):
+            self.engine = tllm.Executor(engine.engine,
+                                        json.dumps(engine.config.to_dict(),
+                                                   cls=ConfigEncoder),
+                                        tllm.ModelType.DECODER_ONLY,
+                                        executor_config=executor_config,
+                                        managed_weights=engine.managed_weights)
+        elif not hasattr(executor_config, "backend"):
+            self.engine = tllm.Executor(engine, tllm.ModelType.DECODER_ONLY,
+                                        executor_config)
+        else:
             args = {
                 "executor_config": executor_config,
                 "checkpoint_dir": executor_config.hf_model_dir,
@@ -109,10 +112,9 @@ def _create_engine():
             else:
                 raise ValueError(
                     f"Unsupported backend config: {executor_config.backend}")
-            return create_executor(**args)
-
-        self.engine = _create_engine()
+            self.engine = create_executor(**args)
 
+        # LoRA setup
         self._lora_manager: Optional[LoraManager] = None
         self._prompt_adapter_manager: Optional[PromptAdapterManager] = None
         self._runtime_model_config: Optional[ModelConfig] = None