NVIDIA
diff --git a/‎pyproject.toml‎
Lines changed: 8 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎tensorrt_llm/_torch/auto_deploy/shim/demollm.py‎
Lines changed: 1 addition & 1 deletion b/‎tensorrt_llm/_torch/auto_deploy/shim/demollm.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorrt_llm/_torch/pyexecutor/py_executor.py‎
Lines changed: 2 additions & 3 deletions b/‎tensorrt_llm/_torch/pyexecutor/py_executor.py‎
Lines changed: 2 additions & 3 deletions
@@ -35,6 +35,8 @@ extend_skip_glob = [
     "tests/unittest/_torch/modeling/test_modeling_pixtral.py",
     "tests/unittest/_torch/models/checkpoints/hf/test_weight_loader.py",
     "tests/unittest/_torch/sampler/test_torch_sampler.py",
+    "tensorrt_llm/_torch/pyexecutor/sampler.py",
+    "tensorrt_llm/_torch/pyexecutor/sampling_utils.py",
 ]
 
 [tool.yapf]
@@ -67,6 +69,8 @@ ignore_patterns = [
     "tests/unittest/_torch/modeling/test_modeling_pixtral.py",
     "tests/unittest/_torch/models/checkpoints/hf/test_weight_loader.py",
     "tests/unittest/_torch/sampler/test_torch_sampler.py",
+    "tensorrt_llm/_torch/pyexecutor/sampler.py",
+    "tensorrt_llm/_torch/pyexecutor/sampling_utils.py",
 ]
 
 [tool.codespell]
@@ -102,6 +106,8 @@ exclude = [
     "tests/unittest/_torch/modeling/test_modeling_mistral.py",
     "tests/unittest/_torch/modeling/test_modeling_pixtral.py",
     "tests/unittest/_torch/models/checkpoints/hf/test_weight_loader.py",
+    "tensorrt_llm/_torch/pyexecutor/sampler.py",
+    "tensorrt_llm/_torch/pyexecutor/sampling_utils.py",
 ]
 
 
@@ -147,6 +153,8 @@ include = [
     "tests/unittest/_torch/modeling/test_modeling_pixtral.py",
     "tests/unittest/_torch/models/checkpoints/hf/test_weight_loader.py",
     "tests/unittest/_torch/sampler/test_torch_sampler.py",
+    "tensorrt_llm/_torch/pyexecutor/sampler.py",
+    "tensorrt_llm/_torch/pyexecutor/sampling_utils.py",
 ]
 exclude = [
     "**3rdparty/**",
 
@@ -13,7 +13,7 @@
 from ....executor.result import CompletionOutput, GenerationResult
 from ....inputs.multimodal import MultimodalParams
 from ....sampling_params import SamplingParams
-from ...pyexecutor.sampler import greedy_search_sampling_batch, top_k_sampling_batch
+from ...pyexecutor.sampling_utils import greedy_search_sampling_batch, top_k_sampling_batch
 from ..distributed import common as dist_ad
 from ..utils.logger import ad_logger
 from .ad_executor import ADEngine
 
@@ -1204,13 +1204,12 @@ def _executor_loop(self):
 
                 self._kv_connector_terminate_requests()
 
-                if self.enable_iter_perf_stats:
+                if self.enable_iter_perf_stats and sample_state is not None:
                     iter_stats.inflight_batching_stats.num_ctx_tokens = self.model_engine.iter_states[
                         'num_ctx_tokens']
                     self._process_iter_stats(
                         finished_requests, self.active_requests,
-                        BatchState(sample_state=SampleState(
-                            scheduled_requests=scheduled_batch),
+                        BatchState(sample_state=sample_state,
                                    iter_stats=iter_stats,
                                    iter_start_time=iter_start_time))