We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f167b1f commit b8a1c1bCopy full SHA for b8a1c1b
tensorrt_llm/_torch/pyexecutor/model_engine.py
@@ -787,6 +787,12 @@ def release_batch(result: ScheduledRequests | None):
787
f"Run generation only CUDA graph warmup for batch size={bs}, draft_len={draft_len}"
788
)
789
self.enable_spec_decode = draft_len > 0 or self.is_draft_model
790
+ if self.pytorch_backend_config.enable_autotuner:
791
+ with self.no_cuda_graph(), autotune():
792
+ self.forward(batch,
793
+ new_tensors_device=None,
794
+ resource_manager=resource_manager)
795
+ torch.cuda.synchronize()
796
self.forward(batch,
797
new_tensors_device=None,
798
resource_manager=resource_manager)
0 commit comments