@@ -19,6 +19,8 @@ echo "enable_pdl: ${enable_pdl}, work_dir: ${work_dir}"
1919echo " SLURM_PROCID: ${SLURM_PROCID} , hostname: $( hostname) , instance_id: ${instance_id} "
2020
2121export TLLM_LOG_LEVEL=INFO
22+ export TRTLLM_SERVER_DISABLE_GC=1
23+ export TRTLLM_WORKER_DISABLE_GC=1
2224
2325if [ " ${enable_pdl} " = " true" ]; then
2426 export TRTLLM_ENABLE_PDL=1
@@ -62,15 +64,16 @@ if [ "${enable_nsys}" != "true" ]; then
6264 trtllm-llmapi-launch ${numa_bind_cmd} trtllm-serve ${model_path} --host $( hostname) --port ${port} --extra_llm_api_options ${config_file}
6365else
6466 nsys_prefix=" "
65- nsys_file=${work_dir} /nsys_worker_proc_${instance_id} _${SLURM_PROCID}
67+ nsys_file=${work_dir} /nsys_worker_proc_${role} _ ${ instance_id} _${SLURM_PROCID}
6668 export TLLM_PROFILE_RECORD_GC=1
6769 export TLLM_NVTX_DEBUG=1
68- if [ " ${role} " = " GEN" ] && [ " $SLURM_PROCID " = " 0" ]; then
70+ nsys_prefix=" nsys profile -e \" NSYS_MPI_STORE_TEAMS_PER_RANK=1\" -o ${nsys_file} -f true -t cuda,nvtx,python-gil -c cudaProfilerApi --cuda-graph-trace node --capture-range-end=stop --gpu-metrics-devices=none"
71+ if [ " ${role} " = " GEN" ]; then
6972 export TLLM_PROFILE_START_STOP=200-250
70- nsys_prefix=" nsys profile -e \" NSYS_MPI_STORE_TEAMS_PER_RANK=1\" -o ${nsys_file} -f true -t cuda,nvtx,python-gil -c cudaProfilerApi --cuda-graph-trace node --capture-range-end=stop --gpu-metrics-devices=none"
71- echo " nsys_prefix: ${nsys_prefix} "
73+ echo " nsys is enabled on gen_gpus"
7274 elif [ " ${role} " = " CTX" ]; then
73- echo " nsys is not enabled on ctx_gpus"
75+ export TLLM_PROFILE_START_STOP=10-30
76+ echo " nsys is enabled on ctx_gpus"
7477 fi
7578 ${nsys_prefix} trtllm-llmapi-launch ${numa_bind_cmd} \
7679 trtllm-serve ${model_path} \
0 commit comments